tesseract_bin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
|
@@ -0,0 +1,1341 @@
|
|
|
1
|
+
/******************************************************************************
|
|
2
|
+
** Filename: mfTraining.c
|
|
3
|
+
** Purpose: Separates training pages into files for each character.
|
|
4
|
+
** Strips from files only the features and there parameters of
|
|
5
|
+
the feature type mf.
|
|
6
|
+
** Author: Dan Johnson
|
|
7
|
+
** Revisment: Christy Russon
|
|
8
|
+
** Environment: HPUX 6.5
|
|
9
|
+
** Library: HPUX 6.5
|
|
10
|
+
** History: Fri Aug 18 08:53:50 1989, DSJ, Created.
|
|
11
|
+
** 5/25/90, DSJ, Adapted to multiple feature types.
|
|
12
|
+
** Tuesday, May 17, 1998 Changes made to make feature specific and
|
|
13
|
+
** simplify structures. First step in simplifying training process.
|
|
14
|
+
**
|
|
15
|
+
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
16
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
17
|
+
** you may not use this file except in compliance with the License.
|
|
18
|
+
** You may obtain a copy of the License at
|
|
19
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
20
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
21
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
22
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
23
|
+
** See the License for the specific language governing permissions and
|
|
24
|
+
** limitations under the License.
|
|
25
|
+
******************************************************************************/
|
|
26
|
+
/**----------------------------------------------------------------------------
|
|
27
|
+
Include Files and Type Defines
|
|
28
|
+
----------------------------------------------------------------------------**/
|
|
29
|
+
#include "oldlist.h"
|
|
30
|
+
#include "efio.h"
|
|
31
|
+
#include "emalloc.h"
|
|
32
|
+
#include "featdefs.h"
|
|
33
|
+
#include "tessopt.h"
|
|
34
|
+
#include "ocrfeatures.h"
|
|
35
|
+
#include "mf.h"
|
|
36
|
+
#include "general.h"
|
|
37
|
+
#include "clusttool.h"
|
|
38
|
+
#include "cluster.h"
|
|
39
|
+
#include "protos.h"
|
|
40
|
+
#include "minmax.h"
|
|
41
|
+
#include "debug.h"
|
|
42
|
+
#include "tprintf.h"
|
|
43
|
+
#include "const.h"
|
|
44
|
+
#include "mergenf.h"
|
|
45
|
+
#include "name2char.h"
|
|
46
|
+
#include "intproto.h"
|
|
47
|
+
#include "variables.h"
|
|
48
|
+
#include "freelist.h"
|
|
49
|
+
#include "efio.h"
|
|
50
|
+
#include "danerror.h"
|
|
51
|
+
#include "globals.h"
|
|
52
|
+
|
|
53
|
+
#include <string.h>
|
|
54
|
+
#include <stdio.h>
|
|
55
|
+
#define _USE_MATH_DEFINES
|
|
56
|
+
#include <math.h>
|
|
57
|
+
#ifdef WIN32
|
|
58
|
+
#ifndef M_PI
|
|
59
|
+
#define M_PI 3.14159265358979323846
|
|
60
|
+
#endif
|
|
61
|
+
#endif
|
|
62
|
+
|
|
63
|
+
#define MAXNAMESIZE 80
|
|
64
|
+
#define MAX_NUM_SAMPLES 10000
|
|
65
|
+
#define PROGRAM_FEATURE_TYPE "mf"
|
|
66
|
+
#define MINSD (1.0f / 128.0f)
|
|
67
|
+
#define MINSD_ANGLE (1.0f / 64.0f)
|
|
68
|
+
|
|
69
|
+
int row_number; /* cjn: fixes link problem */
|
|
70
|
+
|
|
71
|
+
typedef struct
|
|
72
|
+
{
|
|
73
|
+
char *Label;
|
|
74
|
+
int SampleCount;
|
|
75
|
+
LIST List;
|
|
76
|
+
}
|
|
77
|
+
LABELEDLISTNODE, *LABELEDLIST;
|
|
78
|
+
|
|
79
|
+
typedef struct
|
|
80
|
+
{
|
|
81
|
+
char* Label;
|
|
82
|
+
int NumMerged[MAX_NUM_PROTOS];
|
|
83
|
+
CLASS_TYPE Class;
|
|
84
|
+
}MERGE_CLASS_NODE;
|
|
85
|
+
typedef MERGE_CLASS_NODE* MERGE_CLASS;
|
|
86
|
+
|
|
87
|
+
#define round(x,frag)(floor(x/frag+.5)*frag)
|
|
88
|
+
|
|
89
|
+
/**----------------------------------------------------------------------------
|
|
90
|
+
Public Function Prototypes
|
|
91
|
+
----------------------------------------------------------------------------**/
|
|
92
|
+
int main (
|
|
93
|
+
int argc,
|
|
94
|
+
char **argv);
|
|
95
|
+
|
|
96
|
+
/**----------------------------------------------------------------------------
|
|
97
|
+
Private Function Prototypes
|
|
98
|
+
----------------------------------------------------------------------------**/
|
|
99
|
+
void ParseArguments(
|
|
100
|
+
int argc,
|
|
101
|
+
char **argv);
|
|
102
|
+
|
|
103
|
+
char *GetNextFilename ();
|
|
104
|
+
|
|
105
|
+
LIST ReadTrainingSamples (
|
|
106
|
+
FILE *File);
|
|
107
|
+
|
|
108
|
+
LABELEDLIST FindList (
|
|
109
|
+
LIST List,
|
|
110
|
+
char *Label);
|
|
111
|
+
|
|
112
|
+
MERGE_CLASS FindClass (
|
|
113
|
+
LIST List,
|
|
114
|
+
char *Label);
|
|
115
|
+
|
|
116
|
+
LABELEDLIST NewLabeledList (
|
|
117
|
+
char *Label);
|
|
118
|
+
|
|
119
|
+
MERGE_CLASS NewLabeledClass (
|
|
120
|
+
char *Label);
|
|
121
|
+
|
|
122
|
+
void WriteTrainingSamples (
|
|
123
|
+
char *Directory,
|
|
124
|
+
LIST CharList);
|
|
125
|
+
|
|
126
|
+
void WriteClusteredTrainingSamples (
|
|
127
|
+
char *Directory,
|
|
128
|
+
LIST ProtoList,
|
|
129
|
+
CLUSTERER *Clusterer,
|
|
130
|
+
LABELEDLIST CharSample);
|
|
131
|
+
/**/
|
|
132
|
+
void WriteMergedTrainingSamples(
|
|
133
|
+
char *Directory,
|
|
134
|
+
LIST ClassList);
|
|
135
|
+
|
|
136
|
+
void WriteMicrofeat(
|
|
137
|
+
char *Directory,
|
|
138
|
+
LIST ClassList);
|
|
139
|
+
|
|
140
|
+
void WriteProtos(
|
|
141
|
+
FILE* File,
|
|
142
|
+
MERGE_CLASS MergeClass);
|
|
143
|
+
|
|
144
|
+
void WriteConfigs(
|
|
145
|
+
FILE* File,
|
|
146
|
+
CLASS_TYPE Class);
|
|
147
|
+
|
|
148
|
+
void FreeTrainingSamples (
|
|
149
|
+
LIST CharList);
|
|
150
|
+
|
|
151
|
+
void FreeLabeledClassList (
|
|
152
|
+
LIST ClassList);
|
|
153
|
+
|
|
154
|
+
void FreeLabeledList (
|
|
155
|
+
LABELEDLIST LabeledList);
|
|
156
|
+
|
|
157
|
+
CLUSTERER *SetUpForClustering(
|
|
158
|
+
LABELEDLIST CharSample);
|
|
159
|
+
/*
|
|
160
|
+
PARAMDESC *ConvertToPARAMDESC(
|
|
161
|
+
PARAM_DESC* Param_Desc,
|
|
162
|
+
int N);
|
|
163
|
+
*/
|
|
164
|
+
void MergeInsignificantProtos(LIST ProtoList, const char* label,
|
|
165
|
+
CLUSTERER *Clusterer, CLUSTERCONFIG *Config);
|
|
166
|
+
|
|
167
|
+
LIST RemoveInsignificantProtos(
|
|
168
|
+
LIST ProtoList,
|
|
169
|
+
BOOL8 KeepSigProtos,
|
|
170
|
+
BOOL8 KeepInsigProtos,
|
|
171
|
+
int N);
|
|
172
|
+
|
|
173
|
+
void CleanUpUnusedData(
|
|
174
|
+
LIST ProtoList);
|
|
175
|
+
|
|
176
|
+
void Normalize (
|
|
177
|
+
float *Values);
|
|
178
|
+
|
|
179
|
+
void SetUpForFloat2Int(
|
|
180
|
+
LIST LabeledClassList);
|
|
181
|
+
|
|
182
|
+
void WritePFFMTable(INT_TEMPLATES Templates, const char* filename);
|
|
183
|
+
|
|
184
|
+
//--------------Global Data Definitions and Declarations--------------
|
|
185
|
+
static char FontName[MAXNAMESIZE];
|
|
186
|
+
// globals used for parsing command line arguments
|
|
187
|
+
static char *Directory = NULL;
|
|
188
|
+
static int MaxNumSamples = MAX_NUM_SAMPLES;
|
|
189
|
+
static int Argc;
|
|
190
|
+
static char **Argv;
|
|
191
|
+
|
|
192
|
+
// globals used to control what information is saved in the output file
|
|
193
|
+
static BOOL8 ShowAllSamples = FALSE;
|
|
194
|
+
static BOOL8 ShowSignificantProtos = TRUE;
|
|
195
|
+
static BOOL8 ShowInsignificantProtos = FALSE;
|
|
196
|
+
|
|
197
|
+
// global variable to hold configuration parameters to control clustering
|
|
198
|
+
// -M 0.40 -B 0.05 -I 1.0 -C 1e-6.
|
|
199
|
+
static CLUSTERCONFIG Config =
|
|
200
|
+
{ elliptical, 0.625, 0.05, 1.0, 1e-6, 0 };
|
|
201
|
+
|
|
202
|
+
static FLOAT32 RoundingAccuracy = 0.0f;
|
|
203
|
+
|
|
204
|
+
// The unicharset used during mftraining
|
|
205
|
+
static UNICHARSET unicharset_mftraining;
|
|
206
|
+
|
|
207
|
+
const char* test_ch = "";
|
|
208
|
+
|
|
209
|
+
/*----------------------------------------------------------------------------
|
|
210
|
+
Public Code
|
|
211
|
+
-----------------------------------------------------------------------------*/
|
|
212
|
+
void DisplayProtoList(const char* ch, LIST protolist) {
|
|
213
|
+
void* window = c_create_window("Char samples", 50, 200,
|
|
214
|
+
520, 520, -130.0, 130.0, -130.0, 130.0);
|
|
215
|
+
LIST proto = protolist;
|
|
216
|
+
iterate(proto) {
|
|
217
|
+
PROTOTYPE* prototype = reinterpret_cast<PROTOTYPE *>(first_node(proto));
|
|
218
|
+
if (prototype->Significant)
|
|
219
|
+
c_line_color_index(window, Green);
|
|
220
|
+
else if (prototype->NumSamples == 0)
|
|
221
|
+
c_line_color_index(window, Blue);
|
|
222
|
+
else if (prototype->Merged)
|
|
223
|
+
c_line_color_index(window, Magenta);
|
|
224
|
+
else
|
|
225
|
+
c_line_color_index(window, Red);
|
|
226
|
+
float x = CenterX(prototype->Mean);
|
|
227
|
+
float y = CenterY(prototype->Mean);
|
|
228
|
+
double angle = OrientationOf(prototype->Mean) * 2 * M_PI;
|
|
229
|
+
float dx = static_cast<float>(LengthOf(prototype->Mean) * cos(angle) / 2);
|
|
230
|
+
float dy = static_cast<float>(LengthOf(prototype->Mean) * sin(angle) / 2);
|
|
231
|
+
c_move(window, (x - dx) * 256, (y - dy) * 256);
|
|
232
|
+
c_draw(window, (x + dx) * 256, (y + dy) * 256);
|
|
233
|
+
if (prototype->Significant)
|
|
234
|
+
tprintf("Green proto at (%g,%g)+(%g,%g) %d samples\n",
|
|
235
|
+
x, y, dx, dy, prototype->NumSamples);
|
|
236
|
+
else if (prototype->NumSamples > 0 && !prototype->Merged)
|
|
237
|
+
tprintf("Red proto at (%g,%g)+(%g,%g) %d samples\n",
|
|
238
|
+
x, y, dx, dy, prototype->NumSamples);
|
|
239
|
+
}
|
|
240
|
+
c_make_current(window);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/*---------------------------------------------------------------------------*/
|
|
244
|
+
int main (int argc, char **argv) {
|
|
245
|
+
/*
|
|
246
|
+
** Parameters:
|
|
247
|
+
** argc number of command line arguments
|
|
248
|
+
** argv array of command line arguments
|
|
249
|
+
** Globals: none
|
|
250
|
+
** Operation:
|
|
251
|
+
** This program reads in a text file consisting of feature
|
|
252
|
+
** samples from a training page in the following format:
|
|
253
|
+
**
|
|
254
|
+
** FontName CharName NumberOfFeatureTypes(N)
|
|
255
|
+
** FeatureTypeName1 NumberOfFeatures(M)
|
|
256
|
+
** Feature1
|
|
257
|
+
** ...
|
|
258
|
+
** FeatureM
|
|
259
|
+
** FeatureTypeName2 NumberOfFeatures(M)
|
|
260
|
+
** Feature1
|
|
261
|
+
** ...
|
|
262
|
+
** FeatureM
|
|
263
|
+
** ...
|
|
264
|
+
** FeatureTypeNameN NumberOfFeatures(M)
|
|
265
|
+
** Feature1
|
|
266
|
+
** ...
|
|
267
|
+
** FeatureM
|
|
268
|
+
** FontName CharName ...
|
|
269
|
+
**
|
|
270
|
+
** The result of this program is a binary inttemp file used by
|
|
271
|
+
** the OCR engine.
|
|
272
|
+
** Return: none
|
|
273
|
+
** Exceptions: none
|
|
274
|
+
** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
|
|
275
|
+
** Mon May 18 1998, Christy Russson, Revistion started.
|
|
276
|
+
*/
|
|
277
|
+
char *PageName;
|
|
278
|
+
FILE *TrainingPage;
|
|
279
|
+
FILE *OutFile;
|
|
280
|
+
LIST CharList;
|
|
281
|
+
CLUSTERER *Clusterer = NULL;
|
|
282
|
+
LIST ProtoList = NIL;
|
|
283
|
+
LABELEDLIST CharSample;
|
|
284
|
+
PROTOTYPE *Prototype;
|
|
285
|
+
LIST ClassList = NIL;
|
|
286
|
+
int Cid, Pid;
|
|
287
|
+
PROTO Proto;
|
|
288
|
+
PROTO_STRUCT DummyProto;
|
|
289
|
+
BIT_VECTOR Config2;
|
|
290
|
+
MERGE_CLASS MergeClass;
|
|
291
|
+
INT_TEMPLATES IntTemplates;
|
|
292
|
+
LIST pCharList, pProtoList;
|
|
293
|
+
char Filename[MAXNAMESIZE];
|
|
294
|
+
|
|
295
|
+
// Clean the unichar set
|
|
296
|
+
unicharset_mftraining.clear();
|
|
297
|
+
// Space character needed to represent NIL classification
|
|
298
|
+
unicharset_mftraining.unichar_insert(" ");
|
|
299
|
+
|
|
300
|
+
ParseArguments (argc, argv);
|
|
301
|
+
InitFastTrainerVars ();
|
|
302
|
+
InitSubfeatureVars ();
|
|
303
|
+
while ((PageName = GetNextFilename()) != NULL) {
|
|
304
|
+
printf ("Reading %s ...\n", PageName);
|
|
305
|
+
TrainingPage = Efopen (PageName, "r");
|
|
306
|
+
CharList = ReadTrainingSamples (TrainingPage);
|
|
307
|
+
fclose (TrainingPage);
|
|
308
|
+
//WriteTrainingSamples (Directory, CharList);
|
|
309
|
+
pCharList = CharList;
|
|
310
|
+
iterate(pCharList) {
|
|
311
|
+
//Cluster
|
|
312
|
+
CharSample = (LABELEDLIST) first_node (pCharList);
|
|
313
|
+
// printf ("\nClustering %s ...", CharSample->Label);
|
|
314
|
+
Clusterer = SetUpForClustering(CharSample);
|
|
315
|
+
Config.MagicSamples = CharSample->SampleCount;
|
|
316
|
+
ProtoList = ClusterSamples(Clusterer, &Config);
|
|
317
|
+
CleanUpUnusedData(ProtoList);
|
|
318
|
+
|
|
319
|
+
//Merge
|
|
320
|
+
MergeInsignificantProtos(ProtoList, CharSample->Label,
|
|
321
|
+
Clusterer, &Config);
|
|
322
|
+
if (strcmp(test_ch, CharSample->Label) == 0)
|
|
323
|
+
DisplayProtoList(test_ch, ProtoList);
|
|
324
|
+
ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos,
|
|
325
|
+
ShowInsignificantProtos,
|
|
326
|
+
Clusterer->SampleSize);
|
|
327
|
+
FreeClusterer(Clusterer);
|
|
328
|
+
MergeClass = FindClass (ClassList, CharSample->Label);
|
|
329
|
+
if (MergeClass == NULL) {
|
|
330
|
+
MergeClass = NewLabeledClass (CharSample->Label);
|
|
331
|
+
ClassList = push (ClassList, MergeClass);
|
|
332
|
+
}
|
|
333
|
+
Cid = AddConfigToClass(MergeClass->Class);
|
|
334
|
+
pProtoList = ProtoList;
|
|
335
|
+
iterate (pProtoList) {
|
|
336
|
+
Prototype = (PROTOTYPE *) first_node (pProtoList);
|
|
337
|
+
|
|
338
|
+
// see if proto can be approximated by existing proto
|
|
339
|
+
Pid = FindClosestExistingProto(MergeClass->Class,
|
|
340
|
+
MergeClass->NumMerged, Prototype);
|
|
341
|
+
if (Pid == NO_PROTO) {
|
|
342
|
+
Pid = AddProtoToClass (MergeClass->Class);
|
|
343
|
+
Proto = ProtoIn (MergeClass->Class, Pid);
|
|
344
|
+
MakeNewFromOld (Proto, Prototype);
|
|
345
|
+
MergeClass->NumMerged[Pid] = 1;
|
|
346
|
+
}
|
|
347
|
+
else {
|
|
348
|
+
MakeNewFromOld (&DummyProto, Prototype);
|
|
349
|
+
ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto,
|
|
350
|
+
(FLOAT32) MergeClass->NumMerged[Pid], 1.0,
|
|
351
|
+
ProtoIn (MergeClass->Class, Pid));
|
|
352
|
+
MergeClass->NumMerged[Pid] ++;
|
|
353
|
+
}
|
|
354
|
+
Config2 = MergeClass->Class->Configurations[Cid];
|
|
355
|
+
AddProtoToConfig (Pid, Config2);
|
|
356
|
+
}
|
|
357
|
+
FreeProtoList (&ProtoList);
|
|
358
|
+
}
|
|
359
|
+
FreeTrainingSamples (CharList);
|
|
360
|
+
}
|
|
361
|
+
//WriteMergedTrainingSamples(Directory,ClassList);
|
|
362
|
+
WriteMicrofeat(Directory, ClassList);
|
|
363
|
+
InitIntProtoVars ();
|
|
364
|
+
InitPrototypes ();
|
|
365
|
+
SetUpForFloat2Int(ClassList);
|
|
366
|
+
IntTemplates = CreateIntTemplates(TrainingData, unicharset_mftraining);
|
|
367
|
+
strcpy (Filename, "");
|
|
368
|
+
if (Directory != NULL) {
|
|
369
|
+
strcat (Filename, Directory);
|
|
370
|
+
strcat (Filename, "/");
|
|
371
|
+
}
|
|
372
|
+
strcat (Filename, "inttemp");
|
|
373
|
+
#ifdef __UNIX__
|
|
374
|
+
OutFile = Efopen (Filename, "w");
|
|
375
|
+
#else
|
|
376
|
+
OutFile = Efopen (Filename, "wb");
|
|
377
|
+
#endif
|
|
378
|
+
WriteIntTemplates(OutFile, IntTemplates, unicharset_mftraining);
|
|
379
|
+
fclose (OutFile);
|
|
380
|
+
strcpy (Filename, "");
|
|
381
|
+
if (Directory != NULL) {
|
|
382
|
+
strcat (Filename, Directory);
|
|
383
|
+
strcat (Filename, "/");
|
|
384
|
+
}
|
|
385
|
+
strcat (Filename, "pffmtable");
|
|
386
|
+
// Now create pffmtable.
|
|
387
|
+
WritePFFMTable(IntTemplates, Filename);
|
|
388
|
+
printf ("Done!\n"); /**/
|
|
389
|
+
FreeLabeledClassList (ClassList);
|
|
390
|
+
return 0;
|
|
391
|
+
} /* main */
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
/**----------------------------------------------------------------------------
|
|
395
|
+
Private Code
|
|
396
|
+
----------------------------------------------------------------------------**/
|
|
397
|
+
/*---------------------------------------------------------------------------*/
|
|
398
|
+
void ParseArguments(
|
|
399
|
+
int argc,
|
|
400
|
+
char **argv)
|
|
401
|
+
|
|
402
|
+
/*
|
|
403
|
+
** Parameters:
|
|
404
|
+
** argc number of command line arguments to parse
|
|
405
|
+
** argv command line arguments
|
|
406
|
+
** Globals:
|
|
407
|
+
** ShowAllSamples flag controlling samples display
|
|
408
|
+
** ShowSignificantProtos flag controlling proto display
|
|
409
|
+
** ShowInsignificantProtos flag controlling proto display
|
|
410
|
+
** Config current clustering parameters
|
|
411
|
+
** tessoptarg, tessoptind defined by tessopt sys call
|
|
412
|
+
** Argc, Argv global copies of argc and argv
|
|
413
|
+
** Operation:
|
|
414
|
+
** This routine parses the command line arguments that were
|
|
415
|
+
** passed to the program. The legal arguments are:
|
|
416
|
+
** -d "turn off display of samples"
|
|
417
|
+
** -p "turn off significant protos"
|
|
418
|
+
** -n "turn off insignificant proto"
|
|
419
|
+
** -S [ spherical | elliptical | mixed | automatic ]
|
|
420
|
+
** -M MinSamples "min samples per prototype (%)"
|
|
421
|
+
** -B MaxIllegal "max illegal chars per cluster (%)"
|
|
422
|
+
** -I Independence "0 to 1"
|
|
423
|
+
** -C Confidence "1e-200 to 1.0"
|
|
424
|
+
** -D Directory
|
|
425
|
+
** -N MaxNumSamples
|
|
426
|
+
** -R RoundingAccuracy
|
|
427
|
+
** Return: none
|
|
428
|
+
** Exceptions: Illegal options terminate the program.
|
|
429
|
+
** History: 7/24/89, DSJ, Created.
|
|
430
|
+
*/
|
|
431
|
+
|
|
432
|
+
{
|
|
433
|
+
int Option;
|
|
434
|
+
int ParametersRead;
|
|
435
|
+
BOOL8 Error;
|
|
436
|
+
|
|
437
|
+
Error = FALSE;
|
|
438
|
+
Argc = argc;
|
|
439
|
+
Argv = argv;
|
|
440
|
+
while (( Option = tessopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )
|
|
441
|
+
{
|
|
442
|
+
switch ( Option )
|
|
443
|
+
{
|
|
444
|
+
case 'n':
|
|
445
|
+
ShowInsignificantProtos = FALSE;
|
|
446
|
+
break;
|
|
447
|
+
case 'p':
|
|
448
|
+
ShowSignificantProtos = FALSE;
|
|
449
|
+
break;
|
|
450
|
+
case 'd':
|
|
451
|
+
ShowAllSamples = FALSE;
|
|
452
|
+
break;
|
|
453
|
+
case 'C':
|
|
454
|
+
ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
|
|
455
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
456
|
+
else if ( Config.Confidence > 1 ) Config.Confidence = 1;
|
|
457
|
+
else if ( Config.Confidence < 0 ) Config.Confidence = 0;
|
|
458
|
+
break;
|
|
459
|
+
case 'I':
|
|
460
|
+
ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
|
|
461
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
462
|
+
else if ( Config.Independence > 1 ) Config.Independence = 1;
|
|
463
|
+
else if ( Config.Independence < 0 ) Config.Independence = 0;
|
|
464
|
+
break;
|
|
465
|
+
case 'M':
|
|
466
|
+
ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
|
|
467
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
468
|
+
else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
|
|
469
|
+
else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
|
|
470
|
+
break;
|
|
471
|
+
case 'B':
|
|
472
|
+
ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
|
|
473
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
474
|
+
else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
|
|
475
|
+
else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
|
|
476
|
+
break;
|
|
477
|
+
case 'R':
|
|
478
|
+
ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
|
|
479
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
480
|
+
else if ( RoundingAccuracy > 0.01f ) RoundingAccuracy = 0.01f;
|
|
481
|
+
else if ( RoundingAccuracy < 0.0f ) RoundingAccuracy = 0.0f;
|
|
482
|
+
break;
|
|
483
|
+
case 'S':
|
|
484
|
+
switch ( tessoptarg[0] )
|
|
485
|
+
{
|
|
486
|
+
case 's': Config.ProtoStyle = spherical; break;
|
|
487
|
+
case 'e': Config.ProtoStyle = elliptical; break;
|
|
488
|
+
case 'm': Config.ProtoStyle = mixed; break;
|
|
489
|
+
case 'a': Config.ProtoStyle = automatic; break;
|
|
490
|
+
default: Error = TRUE;
|
|
491
|
+
}
|
|
492
|
+
break;
|
|
493
|
+
case 'D':
|
|
494
|
+
Directory = tessoptarg;
|
|
495
|
+
break;
|
|
496
|
+
case 'N':
|
|
497
|
+
if (sscanf (tessoptarg, "%d", &MaxNumSamples) != 1 ||
|
|
498
|
+
MaxNumSamples <= 0)
|
|
499
|
+
Error = TRUE;
|
|
500
|
+
break;
|
|
501
|
+
case '?':
|
|
502
|
+
Error = TRUE;
|
|
503
|
+
break;
|
|
504
|
+
}
|
|
505
|
+
if ( Error )
|
|
506
|
+
{
|
|
507
|
+
fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] );
|
|
508
|
+
fprintf (stderr, "\t[-S ProtoStyle]\n");
|
|
509
|
+
fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" );
|
|
510
|
+
fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n");
|
|
511
|
+
exit (2);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
} // ParseArguments
|
|
515
|
+
|
|
516
|
+
/*---------------------------------------------------------------------------*/
|
|
517
|
+
char *GetNextFilename ()
|
|
518
|
+
/*
|
|
519
|
+
** Parameters: none
|
|
520
|
+
** Globals:
|
|
521
|
+
** tessoptind defined by tessopt sys call
|
|
522
|
+
** Argc, Argv global copies of argc and argv
|
|
523
|
+
** Operation:
|
|
524
|
+
** This routine returns the next command line argument. If
|
|
525
|
+
** there are no remaining command line arguments, it returns
|
|
526
|
+
** NULL. This routine should only be called after all option
|
|
527
|
+
** arguments have been parsed and removed with ParseArguments.
|
|
528
|
+
** Return: Next command line argument or NULL.
|
|
529
|
+
** Exceptions: none
|
|
530
|
+
** History: Fri Aug 18 09:34:12 1989, DSJ, Created.
|
|
531
|
+
*/
|
|
532
|
+
|
|
533
|
+
{
|
|
534
|
+
if (tessoptind < Argc)
|
|
535
|
+
return (Argv [tessoptind++]);
|
|
536
|
+
else
|
|
537
|
+
return (NULL);
|
|
538
|
+
|
|
539
|
+
} /* GetNextFilename */
|
|
540
|
+
|
|
541
|
+
/*---------------------------------------------------------------------------*/
|
|
542
|
+
LIST ReadTrainingSamples (
|
|
543
|
+
FILE *File)
|
|
544
|
+
|
|
545
|
+
/*
|
|
546
|
+
** Parameters:
|
|
547
|
+
** File open text file to read samples from
|
|
548
|
+
** Globals: none
|
|
549
|
+
** Operation:
|
|
550
|
+
** This routine reads training samples from a file and
|
|
551
|
+
** places them into a data structure which organizes the
|
|
552
|
+
** samples by FontName and CharName. It then returns this
|
|
553
|
+
** data structure.
|
|
554
|
+
** Return: none
|
|
555
|
+
** Exceptions: none
|
|
556
|
+
** History: Fri Aug 18 13:11:39 1989, DSJ, Created.
|
|
557
|
+
** Tue May 17 1998 simplifications to structure, illiminated
|
|
558
|
+
** font, and feature specification levels of structure.
|
|
559
|
+
*/
|
|
560
|
+
|
|
561
|
+
{
|
|
562
|
+
char unichar[UNICHAR_LEN + 1];
|
|
563
|
+
LABELEDLIST CharSample;
|
|
564
|
+
FEATURE_SET FeatureSamples;
|
|
565
|
+
LIST TrainingSamples = NIL;
|
|
566
|
+
CHAR_DESC CharDesc;
|
|
567
|
+
int Type, i;
|
|
568
|
+
|
|
569
|
+
while (fscanf (File, "%s %s", FontName, unichar) == 2) {
|
|
570
|
+
if (!unicharset_mftraining.contains_unichar(unichar)) {
|
|
571
|
+
unicharset_mftraining.unichar_insert(unichar);
|
|
572
|
+
if (unicharset_mftraining.size() > MAX_NUM_CLASSES) {
|
|
573
|
+
cprintf("Error: Size of unicharset of mftraining is "
|
|
574
|
+
"greater than MAX_NUM_CLASSES\n");
|
|
575
|
+
exit(1);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
CharSample = FindList (TrainingSamples, unichar);
|
|
579
|
+
if (CharSample == NULL) {
|
|
580
|
+
CharSample = NewLabeledList (unichar);
|
|
581
|
+
TrainingSamples = push (TrainingSamples, CharSample);
|
|
582
|
+
}
|
|
583
|
+
CharDesc = ReadCharDescription (File);
|
|
584
|
+
Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
|
|
585
|
+
FeatureSamples = CharDesc->FeatureSets[Type];
|
|
586
|
+
for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
|
|
587
|
+
FEATURE f = FeatureSamples->Features[feature];
|
|
588
|
+
for (int dim =0; dim < f->Type->NumParams; ++dim)
|
|
589
|
+
f->Params[dim] += dim == MFDirection ?
|
|
590
|
+
UniformRandomNumber(-MINSD_ANGLE, MINSD_ANGLE) :
|
|
591
|
+
UniformRandomNumber(-MINSD, MINSD);
|
|
592
|
+
}
|
|
593
|
+
CharSample->List = push (CharSample->List, FeatureSamples);
|
|
594
|
+
CharSample->SampleCount++;
|
|
595
|
+
for (i = 0; i < CharDesc->NumFeatureSets; i++)
|
|
596
|
+
if (Type != i)
|
|
597
|
+
FreeFeatureSet(CharDesc->FeatureSets[i]);
|
|
598
|
+
free (CharDesc);
|
|
599
|
+
}
|
|
600
|
+
return (TrainingSamples);
|
|
601
|
+
|
|
602
|
+
} /* ReadTrainingSamples */
|
|
603
|
+
|
|
604
|
+
/*---------------------------------------------------------------------------*/
|
|
605
|
+
LABELEDLIST FindList (
|
|
606
|
+
LIST List,
|
|
607
|
+
char *Label)
|
|
608
|
+
|
|
609
|
+
/*
|
|
610
|
+
** Parameters:
|
|
611
|
+
** List list to search
|
|
612
|
+
** Label label to search for
|
|
613
|
+
** Globals: none
|
|
614
|
+
** Operation:
|
|
615
|
+
** This routine searches thru a list of labeled lists to find
|
|
616
|
+
** a list with the specified label. If a matching labeled list
|
|
617
|
+
** cannot be found, NULL is returned.
|
|
618
|
+
** Return: Labeled list with the specified Label or NULL.
|
|
619
|
+
** Exceptions: none
|
|
620
|
+
** History: Fri Aug 18 15:57:41 1989, DSJ, Created.
|
|
621
|
+
*/
|
|
622
|
+
|
|
623
|
+
{
|
|
624
|
+
LABELEDLIST LabeledList;
|
|
625
|
+
|
|
626
|
+
iterate (List)
|
|
627
|
+
{
|
|
628
|
+
LabeledList = (LABELEDLIST) first_node (List);
|
|
629
|
+
if (strcmp (LabeledList->Label, Label) == 0)
|
|
630
|
+
return (LabeledList);
|
|
631
|
+
}
|
|
632
|
+
return (NULL);
|
|
633
|
+
|
|
634
|
+
} /* FindList */
|
|
635
|
+
|
|
636
|
+
/*----------------------------------------------------------------------------*/
|
|
637
|
+
MERGE_CLASS FindClass (
|
|
638
|
+
LIST List,
|
|
639
|
+
char *Label)
|
|
640
|
+
{
|
|
641
|
+
MERGE_CLASS MergeClass;
|
|
642
|
+
|
|
643
|
+
iterate (List)
|
|
644
|
+
{
|
|
645
|
+
MergeClass = (MERGE_CLASS) first_node (List);
|
|
646
|
+
if (strcmp (MergeClass->Label, Label) == 0)
|
|
647
|
+
return (MergeClass);
|
|
648
|
+
}
|
|
649
|
+
return (NULL);
|
|
650
|
+
|
|
651
|
+
} /* FindClass */
|
|
652
|
+
|
|
653
|
+
/*---------------------------------------------------------------------------*/
|
|
654
|
+
LABELEDLIST NewLabeledList (
|
|
655
|
+
char *Label)
|
|
656
|
+
|
|
657
|
+
/*
|
|
658
|
+
** Parameters:
|
|
659
|
+
** Label label for new list
|
|
660
|
+
** Globals: none
|
|
661
|
+
** Operation:
|
|
662
|
+
** This routine allocates a new, empty labeled list and gives
|
|
663
|
+
** it the specified label.
|
|
664
|
+
** Return: New, empty labeled list.
|
|
665
|
+
** Exceptions: none
|
|
666
|
+
** History: Fri Aug 18 16:08:46 1989, DSJ, Created.
|
|
667
|
+
*/
|
|
668
|
+
|
|
669
|
+
{
|
|
670
|
+
LABELEDLIST LabeledList;
|
|
671
|
+
|
|
672
|
+
LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
|
|
673
|
+
LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
|
|
674
|
+
strcpy (LabeledList->Label, Label);
|
|
675
|
+
LabeledList->List = NIL;
|
|
676
|
+
LabeledList->SampleCount = 0;
|
|
677
|
+
return (LabeledList);
|
|
678
|
+
|
|
679
|
+
} /* NewLabeledList */
|
|
680
|
+
|
|
681
|
+
/*---------------------------------------------------------------------------*/
|
|
682
|
+
MERGE_CLASS NewLabeledClass (
|
|
683
|
+
char *Label)
|
|
684
|
+
{
|
|
685
|
+
MERGE_CLASS MergeClass;
|
|
686
|
+
|
|
687
|
+
MergeClass = (MERGE_CLASS) Emalloc (sizeof (MERGE_CLASS_NODE));
|
|
688
|
+
MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
|
|
689
|
+
strcpy (MergeClass->Label, Label);
|
|
690
|
+
MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
|
|
691
|
+
return (MergeClass);
|
|
692
|
+
|
|
693
|
+
} /* NewLabeledClass */
|
|
694
|
+
|
|
695
|
+
/*---------------------------------------------------------------------------*/
|
|
696
|
+
void WriteTrainingSamples (
|
|
697
|
+
char *Directory,
|
|
698
|
+
LIST CharList)
|
|
699
|
+
|
|
700
|
+
/*
|
|
701
|
+
** Parameters:
|
|
702
|
+
** Directory directory to place sample files into
|
|
703
|
+
** FontList list of fonts used in the training samples
|
|
704
|
+
** Globals:
|
|
705
|
+
** MaxNumSamples max number of samples per class to write
|
|
706
|
+
** Operation:
|
|
707
|
+
** This routine writes the specified samples into files which
|
|
708
|
+
** are organized according to the font name and character name
|
|
709
|
+
** of the samples.
|
|
710
|
+
** Return: none
|
|
711
|
+
** Exceptions: none
|
|
712
|
+
** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
|
|
713
|
+
*/
|
|
714
|
+
|
|
715
|
+
{
|
|
716
|
+
LABELEDLIST CharSample;
|
|
717
|
+
FEATURE_SET FeatureSet;
|
|
718
|
+
LIST FeatureList;
|
|
719
|
+
FILE *File;
|
|
720
|
+
char Filename[MAXNAMESIZE];
|
|
721
|
+
int NumSamples;
|
|
722
|
+
|
|
723
|
+
iterate (CharList) // iterate thru all of the fonts
|
|
724
|
+
{
|
|
725
|
+
CharSample = (LABELEDLIST) first_node (CharList);
|
|
726
|
+
|
|
727
|
+
// construct the full pathname for the current samples file
|
|
728
|
+
strcpy (Filename, "");
|
|
729
|
+
if (Directory != NULL)
|
|
730
|
+
{
|
|
731
|
+
strcat (Filename, Directory);
|
|
732
|
+
strcat (Filename, "/");
|
|
733
|
+
}
|
|
734
|
+
strcat (Filename, FontName);
|
|
735
|
+
strcat (Filename, "/");
|
|
736
|
+
strcat (Filename, CharSample->Label);
|
|
737
|
+
strcat (Filename, ".");
|
|
738
|
+
strcat (Filename, PROGRAM_FEATURE_TYPE);
|
|
739
|
+
printf ("\nWriting %s ...", Filename);
|
|
740
|
+
|
|
741
|
+
/* if file does not exist, create a new one with an appropriate
|
|
742
|
+
header; otherwise append samples to the existing file */
|
|
743
|
+
File = fopen (Filename, "r");
|
|
744
|
+
if (File == NULL)
|
|
745
|
+
{
|
|
746
|
+
File = Efopen (Filename, "w");
|
|
747
|
+
WriteOldParamDesc
|
|
748
|
+
(File, FeatureDefs.FeatureDesc[ShortNameToFeatureType (PROGRAM_FEATURE_TYPE)]);
|
|
749
|
+
}
|
|
750
|
+
else
|
|
751
|
+
{
|
|
752
|
+
fclose (File);
|
|
753
|
+
File = Efopen (Filename, "a");
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
// append samples onto the file
|
|
757
|
+
FeatureList = CharSample->List;
|
|
758
|
+
NumSamples = 0;
|
|
759
|
+
iterate (FeatureList)
|
|
760
|
+
{
|
|
761
|
+
if (NumSamples >= MaxNumSamples) break;
|
|
762
|
+
|
|
763
|
+
FeatureSet = (FEATURE_SET) first_node (FeatureList);
|
|
764
|
+
WriteFeatureSet (File, FeatureSet);
|
|
765
|
+
NumSamples++;
|
|
766
|
+
}
|
|
767
|
+
fclose (File);
|
|
768
|
+
}
|
|
769
|
+
} /* WriteTrainingSamples */
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
/*----------------------------------------------------------------------------*/
|
|
773
|
+
void WriteClusteredTrainingSamples (
|
|
774
|
+
char *Directory,
|
|
775
|
+
LIST ProtoList,
|
|
776
|
+
CLUSTERER *Clusterer,
|
|
777
|
+
LABELEDLIST CharSample)
|
|
778
|
+
|
|
779
|
+
/*
|
|
780
|
+
** Parameters:
|
|
781
|
+
** Directory directory to place sample files into
|
|
782
|
+
** Globals:
|
|
783
|
+
** MaxNumSamples max number of samples per class to write
|
|
784
|
+
** Operation:
|
|
785
|
+
** This routine writes the specified samples into files which
|
|
786
|
+
** are organized according to the font name and character name
|
|
787
|
+
** of the samples.
|
|
788
|
+
** Return: none
|
|
789
|
+
** Exceptions: none
|
|
790
|
+
** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
|
|
791
|
+
*/
|
|
792
|
+
|
|
793
|
+
{
|
|
794
|
+
FILE *File;
|
|
795
|
+
char Filename[MAXNAMESIZE];
|
|
796
|
+
|
|
797
|
+
strcpy (Filename, "");
|
|
798
|
+
if (Directory != NULL)
|
|
799
|
+
{
|
|
800
|
+
strcat (Filename, Directory);
|
|
801
|
+
strcat (Filename, "/");
|
|
802
|
+
}
|
|
803
|
+
strcat (Filename, FontName);
|
|
804
|
+
strcat (Filename, "/");
|
|
805
|
+
strcat (Filename, CharSample->Label);
|
|
806
|
+
strcat (Filename, ".");
|
|
807
|
+
strcat (Filename, PROGRAM_FEATURE_TYPE);
|
|
808
|
+
strcat (Filename, ".p");
|
|
809
|
+
printf ("\nWriting %s ...", Filename);
|
|
810
|
+
File = Efopen (Filename, "w");
|
|
811
|
+
WriteProtoList(File, Clusterer->SampleSize, Clusterer->ParamDesc,
|
|
812
|
+
ProtoList, ShowSignificantProtos, ShowInsignificantProtos);
|
|
813
|
+
fclose (File);
|
|
814
|
+
|
|
815
|
+
} /* WriteClusteredTrainingSamples */
|
|
816
|
+
|
|
817
|
+
/*---------------------------------------------------------------------------*/
|
|
818
|
+
void WriteMergedTrainingSamples(
|
|
819
|
+
char *Directory,
|
|
820
|
+
LIST ClassList)
|
|
821
|
+
|
|
822
|
+
{
|
|
823
|
+
FILE *File;
|
|
824
|
+
char Filename[MAXNAMESIZE];
|
|
825
|
+
MERGE_CLASS MergeClass;
|
|
826
|
+
|
|
827
|
+
iterate (ClassList)
|
|
828
|
+
{
|
|
829
|
+
MergeClass = (MERGE_CLASS) first_node (ClassList);
|
|
830
|
+
strcpy (Filename, "");
|
|
831
|
+
if (Directory != NULL)
|
|
832
|
+
{
|
|
833
|
+
strcat (Filename, Directory);
|
|
834
|
+
strcat (Filename, "/");
|
|
835
|
+
}
|
|
836
|
+
strcat (Filename, "Merged/");
|
|
837
|
+
strcat (Filename, MergeClass->Label);
|
|
838
|
+
strcat (Filename, PROTO_SUFFIX);
|
|
839
|
+
printf ("\nWriting Merged %s ...", Filename);
|
|
840
|
+
File = Efopen (Filename, "w");
|
|
841
|
+
WriteOldProtoFile (File, MergeClass->Class);
|
|
842
|
+
fclose (File);
|
|
843
|
+
|
|
844
|
+
strcpy (Filename, "");
|
|
845
|
+
if (Directory != NULL)
|
|
846
|
+
{
|
|
847
|
+
strcat (Filename, Directory);
|
|
848
|
+
strcat (Filename, "/");
|
|
849
|
+
}
|
|
850
|
+
strcat (Filename, "Merged/");
|
|
851
|
+
strcat (Filename, MergeClass->Label);
|
|
852
|
+
strcat (Filename, CONFIG_SUFFIX);
|
|
853
|
+
printf ("\nWriting Merged %s ...", Filename);
|
|
854
|
+
File = Efopen (Filename, "w");
|
|
855
|
+
WriteOldConfigFile (File, MergeClass->Class);
|
|
856
|
+
fclose (File);
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
} // WriteMergedTrainingSamples
|
|
860
|
+
|
|
861
|
+
/*--------------------------------------------------------------------------*/
|
|
862
|
+
void WriteMicrofeat(
|
|
863
|
+
char *Directory,
|
|
864
|
+
LIST ClassList)
|
|
865
|
+
|
|
866
|
+
{
|
|
867
|
+
FILE *File;
|
|
868
|
+
char Filename[MAXNAMESIZE];
|
|
869
|
+
MERGE_CLASS MergeClass;
|
|
870
|
+
|
|
871
|
+
strcpy (Filename, "");
|
|
872
|
+
if (Directory != NULL)
|
|
873
|
+
{
|
|
874
|
+
strcat (Filename, Directory);
|
|
875
|
+
strcat (Filename, "/");
|
|
876
|
+
}
|
|
877
|
+
strcat (Filename, "Microfeat");
|
|
878
|
+
File = Efopen (Filename, "w");
|
|
879
|
+
printf ("\nWriting Merged %s ...", Filename);
|
|
880
|
+
iterate(ClassList)
|
|
881
|
+
{
|
|
882
|
+
MergeClass = (MERGE_CLASS) first_node (ClassList);
|
|
883
|
+
WriteProtos(File, MergeClass);
|
|
884
|
+
WriteConfigs(File, MergeClass->Class);
|
|
885
|
+
}
|
|
886
|
+
fclose (File);
|
|
887
|
+
} // WriteMicrofeat
|
|
888
|
+
|
|
889
|
+
/*---------------------------------------------------------------------------*/
|
|
890
|
+
void WriteProtos(
|
|
891
|
+
FILE* File,
|
|
892
|
+
MERGE_CLASS MergeClass)
|
|
893
|
+
{
|
|
894
|
+
float Values[3];
|
|
895
|
+
int i;
|
|
896
|
+
PROTO Proto;
|
|
897
|
+
|
|
898
|
+
fprintf(File, "%s\n", MergeClass->Label);
|
|
899
|
+
fprintf(File, "%d\n", MergeClass->Class->NumProtos);
|
|
900
|
+
for(i=0; i < (MergeClass->Class)->NumProtos; i++)
|
|
901
|
+
{
|
|
902
|
+
Proto = ProtoIn(MergeClass->Class,i);
|
|
903
|
+
fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", Proto->X, Proto->Y,
|
|
904
|
+
Proto->Length, Proto->Angle);
|
|
905
|
+
Values[0] = Proto->X;
|
|
906
|
+
Values[1] = Proto->Y;
|
|
907
|
+
Values[2] = Proto->Angle;
|
|
908
|
+
Normalize(Values);
|
|
909
|
+
fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]);
|
|
910
|
+
}
|
|
911
|
+
} // WriteProtos
|
|
912
|
+
|
|
913
|
+
/*----------------------------------------------------------------------------*/
|
|
914
|
+
void WriteConfigs(
|
|
915
|
+
FILE* File,
|
|
916
|
+
CLASS_TYPE Class)
|
|
917
|
+
{
|
|
918
|
+
BIT_VECTOR Config;
|
|
919
|
+
int i, j, WordsPerConfig;
|
|
920
|
+
|
|
921
|
+
WordsPerConfig = WordsInVectorOfSize(Class->NumProtos);
|
|
922
|
+
fprintf(File, "%d %d\n", Class->NumConfigs, WordsPerConfig);
|
|
923
|
+
for(i=0; i < Class->NumConfigs; i++)
|
|
924
|
+
{
|
|
925
|
+
Config = Class->Configurations[i];
|
|
926
|
+
for(j=0; j < WordsPerConfig; j++)
|
|
927
|
+
fprintf(File, "%08x ", Config[j]);
|
|
928
|
+
fprintf(File, "\n");
|
|
929
|
+
}
|
|
930
|
+
fprintf(File, "\n");
|
|
931
|
+
} // WriteConfigs
|
|
932
|
+
|
|
933
|
+
/*---------------------------------------------------------------------------*/
|
|
934
|
+
void FreeTrainingSamples (
|
|
935
|
+
LIST CharList)
|
|
936
|
+
|
|
937
|
+
/*
|
|
938
|
+
** Parameters:
|
|
939
|
+
** FontList list of all fonts in document
|
|
940
|
+
** Globals: none
|
|
941
|
+
** Operation:
|
|
942
|
+
** This routine deallocates all of the space allocated to
|
|
943
|
+
** the specified list of training samples.
|
|
944
|
+
** Return: none
|
|
945
|
+
** Exceptions: none
|
|
946
|
+
** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
|
947
|
+
*/
|
|
948
|
+
|
|
949
|
+
{
|
|
950
|
+
LABELEDLIST CharSample;
|
|
951
|
+
FEATURE_SET FeatureSet;
|
|
952
|
+
LIST FeatureList;
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
// printf ("FreeTrainingSamples...\n");
|
|
956
|
+
iterate (CharList) /* iterate thru all of the fonts */
|
|
957
|
+
{
|
|
958
|
+
CharSample = (LABELEDLIST) first_node (CharList);
|
|
959
|
+
FeatureList = CharSample->List;
|
|
960
|
+
iterate (FeatureList) /* iterate thru all of the classes */
|
|
961
|
+
{
|
|
962
|
+
FeatureSet = (FEATURE_SET) first_node (FeatureList);
|
|
963
|
+
FreeFeatureSet (FeatureSet);
|
|
964
|
+
}
|
|
965
|
+
FreeLabeledList (CharSample);
|
|
966
|
+
}
|
|
967
|
+
destroy (CharList);
|
|
968
|
+
|
|
969
|
+
} /* FreeTrainingSamples */
|
|
970
|
+
|
|
971
|
+
/*-----------------------------------------------------------------------------*/
|
|
972
|
+
void FreeLabeledClassList (
|
|
973
|
+
LIST ClassList)
|
|
974
|
+
|
|
975
|
+
/*
|
|
976
|
+
** Parameters:
|
|
977
|
+
** FontList list of all fonts in document
|
|
978
|
+
** Globals: none
|
|
979
|
+
** Operation:
|
|
980
|
+
** This routine deallocates all of the space allocated to
|
|
981
|
+
** the specified list of training samples.
|
|
982
|
+
** Return: none
|
|
983
|
+
** Exceptions: none
|
|
984
|
+
** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
|
985
|
+
*/
|
|
986
|
+
|
|
987
|
+
{
|
|
988
|
+
MERGE_CLASS MergeClass;
|
|
989
|
+
|
|
990
|
+
iterate (ClassList) /* iterate thru all of the fonts */
|
|
991
|
+
{
|
|
992
|
+
MergeClass = (MERGE_CLASS) first_node (ClassList);
|
|
993
|
+
free (MergeClass->Label);
|
|
994
|
+
FreeClass(MergeClass->Class);
|
|
995
|
+
free (MergeClass);
|
|
996
|
+
}
|
|
997
|
+
destroy (ClassList);
|
|
998
|
+
|
|
999
|
+
} /* FreeLabeledClassList */
|
|
1000
|
+
|
|
1001
|
+
/*---------------------------------------------------------------------------*/
|
|
1002
|
+
void FreeLabeledList (
|
|
1003
|
+
LABELEDLIST LabeledList)
|
|
1004
|
+
|
|
1005
|
+
/*
|
|
1006
|
+
** Parameters:
|
|
1007
|
+
** LabeledList labeled list to be freed
|
|
1008
|
+
** Globals: none
|
|
1009
|
+
** Operation:
|
|
1010
|
+
** This routine deallocates all of the memory consumed by
|
|
1011
|
+
** a labeled list. It does not free any memory which may be
|
|
1012
|
+
** consumed by the items in the list.
|
|
1013
|
+
** Return: none
|
|
1014
|
+
** Exceptions: none
|
|
1015
|
+
** History: Fri Aug 18 17:52:45 1989, DSJ, Created.
|
|
1016
|
+
*/
|
|
1017
|
+
|
|
1018
|
+
{
|
|
1019
|
+
destroy (LabeledList->List);
|
|
1020
|
+
free (LabeledList->Label);
|
|
1021
|
+
free (LabeledList);
|
|
1022
|
+
|
|
1023
|
+
} /* FreeLabeledList */
|
|
1024
|
+
|
|
1025
|
+
/*---------------------------------------------------------------------------*/
|
|
1026
|
+
CLUSTERER *SetUpForClustering(
|
|
1027
|
+
LABELEDLIST CharSample)
|
|
1028
|
+
|
|
1029
|
+
/*
|
|
1030
|
+
** Parameters:
|
|
1031
|
+
** CharSample: LABELEDLIST that holds all the feature information for a
|
|
1032
|
+
** given character.
|
|
1033
|
+
** Globals:
|
|
1034
|
+
** None
|
|
1035
|
+
** Operation:
|
|
1036
|
+
** This routine reads samples from a LABELEDLIST and enters
|
|
1037
|
+
** those samples into a clusterer data structure. This
|
|
1038
|
+
** data structure is then returned to the caller.
|
|
1039
|
+
** Return:
|
|
1040
|
+
** Pointer to new clusterer data structure.
|
|
1041
|
+
** Exceptions:
|
|
1042
|
+
** None
|
|
1043
|
+
** History:
|
|
1044
|
+
** 8/16/89, DSJ, Created.
|
|
1045
|
+
*/
|
|
1046
|
+
|
|
1047
|
+
{
|
|
1048
|
+
uinT16 N;
|
|
1049
|
+
int i, j;
|
|
1050
|
+
FLOAT32 *Sample = NULL;
|
|
1051
|
+
CLUSTERER *Clusterer;
|
|
1052
|
+
inT32 CharID;
|
|
1053
|
+
LIST FeatureList = NULL;
|
|
1054
|
+
FEATURE_SET FeatureSet = NULL;
|
|
1055
|
+
FEATURE_DESC FeatureDesc = NULL;
|
|
1056
|
+
// PARAM_DESC* ParamDesc;
|
|
1057
|
+
|
|
1058
|
+
FeatureDesc = FeatureDefs.FeatureDesc[ShortNameToFeatureType(PROGRAM_FEATURE_TYPE)];
|
|
1059
|
+
N = FeatureDesc->NumParams;
|
|
1060
|
+
// ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N);
|
|
1061
|
+
Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);
|
|
1062
|
+
// free(ParamDesc);
|
|
1063
|
+
|
|
1064
|
+
FeatureList = CharSample->List;
|
|
1065
|
+
CharID = 0;
|
|
1066
|
+
iterate(FeatureList)
|
|
1067
|
+
{
|
|
1068
|
+
if (CharID >= MaxNumSamples) break;
|
|
1069
|
+
|
|
1070
|
+
FeatureSet = (FEATURE_SET) first_node (FeatureList);
|
|
1071
|
+
for (i=0; i < FeatureSet->MaxNumFeatures; i++)
|
|
1072
|
+
{
|
|
1073
|
+
if (Sample == NULL)
|
|
1074
|
+
Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
|
1075
|
+
for (j=0; j < N; j++)
|
|
1076
|
+
if (RoundingAccuracy != 0.0f)
|
|
1077
|
+
Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);
|
|
1078
|
+
else
|
|
1079
|
+
Sample[j] = FeatureSet->Features[i]->Params[j];
|
|
1080
|
+
MakeSample (Clusterer, Sample, CharID);
|
|
1081
|
+
}
|
|
1082
|
+
CharID++;
|
|
1083
|
+
}
|
|
1084
|
+
if ( Sample != NULL ) free( Sample );
|
|
1085
|
+
return( Clusterer );
|
|
1086
|
+
|
|
1087
|
+
} /* SetUpForClustering */
|
|
1088
|
+
|
|
1089
|
+
/*------------------------------------------------------------------------*/
|
|
1090
|
+
void MergeInsignificantProtos(LIST ProtoList, const char* label,
|
|
1091
|
+
CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
|
|
1092
|
+
PROTOTYPE *Prototype;
|
|
1093
|
+
bool debug = strcmp(test_ch, label) == 0;
|
|
1094
|
+
|
|
1095
|
+
LIST pProtoList = ProtoList;
|
|
1096
|
+
iterate(pProtoList) {
|
|
1097
|
+
Prototype = (PROTOTYPE *) first_node (pProtoList);
|
|
1098
|
+
if (Prototype->Significant || Prototype->Merged)
|
|
1099
|
+
continue;
|
|
1100
|
+
FLOAT32 best_dist = 0.125;
|
|
1101
|
+
PROTOTYPE* best_match = NULL;
|
|
1102
|
+
// Find the nearest alive prototype.
|
|
1103
|
+
LIST list_it = ProtoList;
|
|
1104
|
+
iterate(list_it) {
|
|
1105
|
+
PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);
|
|
1106
|
+
if (test_p != Prototype && !test_p->Merged) {
|
|
1107
|
+
FLOAT32 dist = ComputeDistance(Clusterer->SampleSize,
|
|
1108
|
+
Clusterer->ParamDesc,
|
|
1109
|
+
Prototype->Mean, test_p->Mean);
|
|
1110
|
+
if (dist < best_dist) {
|
|
1111
|
+
best_match = test_p;
|
|
1112
|
+
best_dist = dist;
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
if (best_match != NULL && !best_match->Significant) {
|
|
1117
|
+
if (debug)
|
|
1118
|
+
tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
|
|
1119
|
+
best_match->NumSamples, Prototype->NumSamples,
|
|
1120
|
+
best_match->Mean[0], best_match->Mean[1],
|
|
1121
|
+
Prototype->Mean[0], Prototype->Mean[1]);
|
|
1122
|
+
best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
|
|
1123
|
+
Clusterer->ParamDesc,
|
|
1124
|
+
best_match->NumSamples,
|
|
1125
|
+
Prototype->NumSamples,
|
|
1126
|
+
best_match->Mean,
|
|
1127
|
+
best_match->Mean, Prototype->Mean);
|
|
1128
|
+
Prototype->NumSamples = 0;
|
|
1129
|
+
Prototype->Merged = 1;
|
|
1130
|
+
} else if (best_match != NULL) {
|
|
1131
|
+
if (debug)
|
|
1132
|
+
tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
|
|
1133
|
+
Prototype->Mean[0], Prototype->Mean[1],
|
|
1134
|
+
best_match->Mean[0], best_match->Mean[1]);
|
|
1135
|
+
Prototype->Merged = 1;
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
// Mark significant those that now have enough samples.
|
|
1139
|
+
int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar);
|
|
1140
|
+
pProtoList = ProtoList;
|
|
1141
|
+
iterate(pProtoList) {
|
|
1142
|
+
Prototype = (PROTOTYPE *) first_node (pProtoList);
|
|
1143
|
+
// Process insignificant protos that do not match a green one
|
|
1144
|
+
if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
|
|
1145
|
+
!Prototype->Merged) {
|
|
1146
|
+
if (debug)
|
|
1147
|
+
tprintf("Red proto at %g,%g becoming green\n",
|
|
1148
|
+
Prototype->Mean[0], Prototype->Mean[1]);
|
|
1149
|
+
Prototype->Significant = true;
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
} /* MergeInsignificantProtos */
|
|
1153
|
+
|
|
1154
|
+
/*------------------------------------------------------------------------*/
|
|
1155
|
+
LIST RemoveInsignificantProtos(
|
|
1156
|
+
LIST ProtoList,
|
|
1157
|
+
BOOL8 KeepSigProtos,
|
|
1158
|
+
BOOL8 KeepInsigProtos,
|
|
1159
|
+
int N)
|
|
1160
|
+
|
|
1161
|
+
{
|
|
1162
|
+
LIST NewProtoList = NIL;
|
|
1163
|
+
LIST pProtoList;
|
|
1164
|
+
PROTOTYPE* Proto;
|
|
1165
|
+
PROTOTYPE* NewProto;
|
|
1166
|
+
int i;
|
|
1167
|
+
|
|
1168
|
+
pProtoList = ProtoList;
|
|
1169
|
+
iterate(pProtoList)
|
|
1170
|
+
{
|
|
1171
|
+
Proto = (PROTOTYPE *) first_node (pProtoList);
|
|
1172
|
+
if ((Proto->Significant && KeepSigProtos) ||
|
|
1173
|
+
(!Proto->Significant && KeepInsigProtos))
|
|
1174
|
+
{
|
|
1175
|
+
NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
|
|
1176
|
+
|
|
1177
|
+
NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
|
1178
|
+
NewProto->Significant = Proto->Significant;
|
|
1179
|
+
NewProto->Style = Proto->Style;
|
|
1180
|
+
NewProto->NumSamples = Proto->NumSamples;
|
|
1181
|
+
NewProto->Cluster = NULL;
|
|
1182
|
+
NewProto->Distrib = NULL;
|
|
1183
|
+
|
|
1184
|
+
for (i=0; i < N; i++)
|
|
1185
|
+
NewProto->Mean[i] = Proto->Mean[i];
|
|
1186
|
+
if (Proto->Variance.Elliptical != NULL)
|
|
1187
|
+
{
|
|
1188
|
+
NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
|
1189
|
+
for (i=0; i < N; i++)
|
|
1190
|
+
NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
|
|
1191
|
+
}
|
|
1192
|
+
else
|
|
1193
|
+
NewProto->Variance.Elliptical = NULL;
|
|
1194
|
+
//---------------------------------------------
|
|
1195
|
+
if (Proto->Magnitude.Elliptical != NULL)
|
|
1196
|
+
{
|
|
1197
|
+
NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
|
1198
|
+
for (i=0; i < N; i++)
|
|
1199
|
+
NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
|
|
1200
|
+
}
|
|
1201
|
+
else
|
|
1202
|
+
NewProto->Magnitude.Elliptical = NULL;
|
|
1203
|
+
//------------------------------------------------
|
|
1204
|
+
if (Proto->Weight.Elliptical != NULL)
|
|
1205
|
+
{
|
|
1206
|
+
NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
|
1207
|
+
for (i=0; i < N; i++)
|
|
1208
|
+
NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
|
|
1209
|
+
}
|
|
1210
|
+
else
|
|
1211
|
+
NewProto->Weight.Elliptical = NULL;
|
|
1212
|
+
|
|
1213
|
+
NewProto->TotalMagnitude = Proto->TotalMagnitude;
|
|
1214
|
+
NewProto->LogMagnitude = Proto->LogMagnitude;
|
|
1215
|
+
NewProtoList = push_last(NewProtoList, NewProto);
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
//FreeProtoList (ProtoList);
|
|
1219
|
+
return (NewProtoList);
|
|
1220
|
+
} /* RemoveInsignificantProtos */
|
|
1221
|
+
/*-----------------------------------------------------------------------------*/
|
|
1222
|
+
void CleanUpUnusedData(
|
|
1223
|
+
LIST ProtoList)
|
|
1224
|
+
{
|
|
1225
|
+
PROTOTYPE* Prototype;
|
|
1226
|
+
|
|
1227
|
+
iterate(ProtoList)
|
|
1228
|
+
{
|
|
1229
|
+
Prototype = (PROTOTYPE *) first_node (ProtoList);
|
|
1230
|
+
if(Prototype->Variance.Elliptical != NULL)
|
|
1231
|
+
{
|
|
1232
|
+
memfree(Prototype->Variance.Elliptical);
|
|
1233
|
+
Prototype->Variance.Elliptical = NULL;
|
|
1234
|
+
}
|
|
1235
|
+
if(Prototype->Magnitude.Elliptical != NULL)
|
|
1236
|
+
{
|
|
1237
|
+
memfree(Prototype->Magnitude.Elliptical);
|
|
1238
|
+
Prototype->Magnitude.Elliptical = NULL;
|
|
1239
|
+
}
|
|
1240
|
+
if(Prototype->Weight.Elliptical != NULL)
|
|
1241
|
+
{
|
|
1242
|
+
memfree(Prototype->Weight.Elliptical);
|
|
1243
|
+
Prototype->Weight.Elliptical = NULL;
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
/*--------------------------------------------------------------------------*/
|
|
1249
|
+
void Normalize (
|
|
1250
|
+
float *Values)
|
|
1251
|
+
{
|
|
1252
|
+
register float Slope;
|
|
1253
|
+
register float Intercept;
|
|
1254
|
+
register float Normalizer;
|
|
1255
|
+
|
|
1256
|
+
Slope = tan (Values [2] * 2 * PI);
|
|
1257
|
+
Intercept = Values [1] - Slope * Values [0];
|
|
1258
|
+
Normalizer = 1 / sqrt (Slope * Slope + 1.0);
|
|
1259
|
+
|
|
1260
|
+
Values [0] = Slope * Normalizer;
|
|
1261
|
+
Values [1] = - Normalizer;
|
|
1262
|
+
Values [2] = Intercept * Normalizer;
|
|
1263
|
+
} // Normalize
|
|
1264
|
+
|
|
1265
|
+
/** SetUpForFloat2Int **************************************************/
|
|
1266
|
+
void SetUpForFloat2Int(
|
|
1267
|
+
LIST LabeledClassList)
|
|
1268
|
+
{
|
|
1269
|
+
MERGE_CLASS MergeClass;
|
|
1270
|
+
CLASS_TYPE Class;
|
|
1271
|
+
int NumProtos;
|
|
1272
|
+
int NumConfigs;
|
|
1273
|
+
int NumWords;
|
|
1274
|
+
int i, j;
|
|
1275
|
+
float Values[3];
|
|
1276
|
+
PROTO NewProto;
|
|
1277
|
+
PROTO OldProto;
|
|
1278
|
+
BIT_VECTOR NewConfig;
|
|
1279
|
+
BIT_VECTOR OldConfig;
|
|
1280
|
+
|
|
1281
|
+
// printf("Float2Int ...\n");
|
|
1282
|
+
|
|
1283
|
+
iterate(LabeledClassList)
|
|
1284
|
+
{
|
|
1285
|
+
MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
|
|
1286
|
+
Class = &TrainingData[unicharset_mftraining.unichar_to_id(
|
|
1287
|
+
MergeClass->Label)];
|
|
1288
|
+
NumProtos = (MergeClass->Class)->NumProtos;
|
|
1289
|
+
NumConfigs = MergeClass->Class->NumConfigs;
|
|
1290
|
+
|
|
1291
|
+
Class->NumProtos = NumProtos;
|
|
1292
|
+
Class->MaxNumProtos = NumProtos;
|
|
1293
|
+
Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
|
|
1294
|
+
for(i=0; i < NumProtos; i++)
|
|
1295
|
+
{
|
|
1296
|
+
NewProto = ProtoIn(Class, i);
|
|
1297
|
+
OldProto = ProtoIn(MergeClass->Class, i);
|
|
1298
|
+
Values[0] = OldProto->X;
|
|
1299
|
+
Values[1] = OldProto->Y;
|
|
1300
|
+
Values[2] = OldProto->Angle;
|
|
1301
|
+
Normalize(Values);
|
|
1302
|
+
NewProto->X = OldProto->X;
|
|
1303
|
+
NewProto->Y = OldProto->Y;
|
|
1304
|
+
NewProto->Length = OldProto->Length;
|
|
1305
|
+
NewProto->Angle = OldProto->Angle;
|
|
1306
|
+
NewProto->A = Values[0];
|
|
1307
|
+
NewProto->B = Values[1];
|
|
1308
|
+
NewProto->C = Values[2];
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
Class->NumConfigs = NumConfigs;
|
|
1312
|
+
Class->MaxNumConfigs = NumConfigs;
|
|
1313
|
+
Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
|
|
1314
|
+
NumWords = WordsInVectorOfSize(NumProtos);
|
|
1315
|
+
for(i=0; i < NumConfigs; i++)
|
|
1316
|
+
{
|
|
1317
|
+
NewConfig = NewBitVector(NumProtos);
|
|
1318
|
+
OldConfig = MergeClass->Class->Configurations[i];
|
|
1319
|
+
for(j=0; j < NumWords; j++)
|
|
1320
|
+
NewConfig[j] = OldConfig[j];
|
|
1321
|
+
Class->Configurations[i] = NewConfig;
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
} // SetUpForFloat2Int
|
|
1325
|
+
|
|
1326
|
+
/*--------------------------------------------------------------------------*/
|
|
1327
|
+
void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) {
|
|
1328
|
+
FILE* fp = Efopen(filename, "wb");
|
|
1329
|
+
/* then write out each class */
|
|
1330
|
+
for (int i = 0; i < Templates->NumClasses; i++) {
|
|
1331
|
+
int MaxLength = 0;
|
|
1332
|
+
INT_CLASS Class = Templates->Class[i];
|
|
1333
|
+
for (int ConfigId = 0; ConfigId < Class->NumConfigs; ConfigId++) {
|
|
1334
|
+
if (Class->ConfigLengths[ConfigId] > MaxLength)
|
|
1335
|
+
MaxLength = Class->ConfigLengths[ConfigId];
|
|
1336
|
+
}
|
|
1337
|
+
fprintf(fp, "%s %d\n", unicharset_mftraining.id_to_unichar(
|
|
1338
|
+
Templates->ClassIdFor[i]), MaxLength);
|
|
1339
|
+
}
|
|
1340
|
+
fclose(fp);
|
|
1341
|
+
} // WritePFFMTable
|