tesseract_bin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
|
@@ -0,0 +1,855 @@
|
|
|
1
|
+
/******************************************************************************
|
|
2
|
+
** Filename: cnTraining.cpp
|
|
3
|
+
** Purpose: Generates a normproto and pffmtable.
|
|
4
|
+
** Author: Dan Johnson
|
|
5
|
+
** Revisment: Christy Russon
|
|
6
|
+
** History: Fri Aug 18 08:53:50 1989, DSJ, Created.
|
|
7
|
+
** 5/25/90, DSJ, Adapted to multiple feature types.
|
|
8
|
+
** Tuesday, May 17, 1998 Changes made to make feature specific and
|
|
9
|
+
** simplify structures. First step in simplifying training process.
|
|
10
|
+
**
|
|
11
|
+
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
12
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
+
** you may not use this file except in compliance with the License.
|
|
14
|
+
** You may obtain a copy of the License at
|
|
15
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
16
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
17
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
+
** See the License for the specific language governing permissions and
|
|
20
|
+
** limitations under the License.
|
|
21
|
+
******************************************************************************/
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
/**----------------------------------------------------------------------------
|
|
25
|
+
Include Files and Type Defines
|
|
26
|
+
----------------------------------------------------------------------------**/
|
|
27
|
+
#include "oldlist.h"
|
|
28
|
+
#include "efio.h"
|
|
29
|
+
#include "emalloc.h"
|
|
30
|
+
#include "featdefs.h"
|
|
31
|
+
#include "tessopt.h"
|
|
32
|
+
#include "ocrfeatures.h"
|
|
33
|
+
#include "general.h"
|
|
34
|
+
#include "clusttool.h"
|
|
35
|
+
#include "cluster.h"
|
|
36
|
+
#include "name2char.h"
|
|
37
|
+
#include <string.h>
|
|
38
|
+
#include <stdio.h>
|
|
39
|
+
#include <math.h>
|
|
40
|
+
#include "unichar.h"
|
|
41
|
+
|
|
42
|
+
#define MAXNAMESIZE 80
|
|
43
|
+
#define MAX_NUM_SAMPLES 10000
|
|
44
|
+
#define PROGRAM_FEATURE_TYPE "cn"
|
|
45
|
+
#define MINSD (1.0f / 64.0f)
|
|
46
|
+
|
|
47
|
+
int row_number; /* cjn: fixes link problem */
|
|
48
|
+
|
|
49
|
+
typedef struct
|
|
50
|
+
{
|
|
51
|
+
char *Label;
|
|
52
|
+
int SampleCount;
|
|
53
|
+
LIST List;
|
|
54
|
+
}
|
|
55
|
+
LABELEDLISTNODE, *LABELEDLIST;
|
|
56
|
+
|
|
57
|
+
#define round(x,frag)(floor(x/frag+.5)*frag)
|
|
58
|
+
|
|
59
|
+
/**----------------------------------------------------------------------------
|
|
60
|
+
Public Function Prototypes
|
|
61
|
+
----------------------------------------------------------------------------**/
|
|
62
|
+
int main (
|
|
63
|
+
int argc,
|
|
64
|
+
char **argv);
|
|
65
|
+
|
|
66
|
+
/**----------------------------------------------------------------------------
|
|
67
|
+
Private Function Prototypes
|
|
68
|
+
----------------------------------------------------------------------------**/
|
|
69
|
+
void ParseArguments(
|
|
70
|
+
int argc,
|
|
71
|
+
char **argv);
|
|
72
|
+
|
|
73
|
+
char *GetNextFilename ();
|
|
74
|
+
|
|
75
|
+
void ReadTrainingSamples (
|
|
76
|
+
FILE *File,
|
|
77
|
+
LIST* TrainingSamples);
|
|
78
|
+
|
|
79
|
+
LABELEDLIST FindList (
|
|
80
|
+
LIST List,
|
|
81
|
+
char *Label);
|
|
82
|
+
|
|
83
|
+
LABELEDLIST NewLabeledList (
|
|
84
|
+
char *Label);
|
|
85
|
+
|
|
86
|
+
void WriteTrainingSamples (
|
|
87
|
+
char *Directory,
|
|
88
|
+
LIST CharList);
|
|
89
|
+
|
|
90
|
+
void WriteNormProtos (
|
|
91
|
+
char *Directory,
|
|
92
|
+
LIST LabeledProtoList,
|
|
93
|
+
CLUSTERER *Clusterer);
|
|
94
|
+
|
|
95
|
+
void FreeTrainingSamples (
|
|
96
|
+
LIST CharList);
|
|
97
|
+
|
|
98
|
+
void FreeNormProtoList (
|
|
99
|
+
LIST CharList);
|
|
100
|
+
|
|
101
|
+
void FreeLabeledList (
|
|
102
|
+
LABELEDLIST LabeledList);
|
|
103
|
+
|
|
104
|
+
CLUSTERER *SetUpForClustering(
|
|
105
|
+
LABELEDLIST CharSample);
|
|
106
|
+
/*
|
|
107
|
+
PARAMDESC *ConvertToPARAMDESC(
|
|
108
|
+
PARAM_DESC* Param_Desc,
|
|
109
|
+
int N);
|
|
110
|
+
*/
|
|
111
|
+
void AddToNormProtosList(
|
|
112
|
+
LIST* NormProtoList,
|
|
113
|
+
LIST ProtoList,
|
|
114
|
+
char* CharName);
|
|
115
|
+
|
|
116
|
+
void WriteProtos(
|
|
117
|
+
FILE *File,
|
|
118
|
+
uinT16 N,
|
|
119
|
+
LIST ProtoList,
|
|
120
|
+
BOOL8 WriteSigProtos,
|
|
121
|
+
BOOL8 WriteInsigProtos);
|
|
122
|
+
|
|
123
|
+
int NumberOfProtos(
|
|
124
|
+
LIST ProtoList,
|
|
125
|
+
BOOL8 CountSigProtos,
|
|
126
|
+
BOOL8 CountInsigProtos);
|
|
127
|
+
|
|
128
|
+
/**----------------------------------------------------------------------------
|
|
129
|
+
Global Data Definitions and Declarations
|
|
130
|
+
----------------------------------------------------------------------------**/
|
|
131
|
+
static char FontName[MAXNAMESIZE];
|
|
132
|
+
/* globals used for parsing command line arguments */
|
|
133
|
+
static char *Directory = NULL;
|
|
134
|
+
static int MaxNumSamples = MAX_NUM_SAMPLES;
|
|
135
|
+
static int Argc;
|
|
136
|
+
static char **Argv;
|
|
137
|
+
|
|
138
|
+
/* globals used to control what information is saved in the output file */
|
|
139
|
+
static BOOL8 ShowAllSamples = FALSE;
|
|
140
|
+
static BOOL8 ShowSignificantProtos = TRUE;
|
|
141
|
+
static BOOL8 ShowInsignificantProtos = FALSE;
|
|
142
|
+
|
|
143
|
+
/* global variable to hold configuration parameters to control clustering */
|
|
144
|
+
//-M 0.025 -B 0.05 -I 0.8 -C 1e-3
|
|
145
|
+
static CLUSTERCONFIG Config =
|
|
146
|
+
{
|
|
147
|
+
elliptical, 0.025, 0.05, 0.8, 1e-3, 0
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
static FLOAT32 RoundingAccuracy = 0.0;
|
|
151
|
+
|
|
152
|
+
/**----------------------------------------------------------------------------
|
|
153
|
+
Public Code
|
|
154
|
+
----------------------------------------------------------------------------**/
|
|
155
|
+
/*---------------------------------------------------------------------------*/
|
|
156
|
+
int main (
|
|
157
|
+
int argc,
|
|
158
|
+
char **argv)
|
|
159
|
+
|
|
160
|
+
/*
|
|
161
|
+
** Parameters:
|
|
162
|
+
** argc number of command line arguments
|
|
163
|
+
** argv array of command line arguments
|
|
164
|
+
** Globals: none
|
|
165
|
+
** Operation:
|
|
166
|
+
** This program reads in a text file consisting of feature
|
|
167
|
+
** samples from a training page in the following format:
|
|
168
|
+
**
|
|
169
|
+
** FontName CharName NumberOfFeatureTypes(N)
|
|
170
|
+
** FeatureTypeName1 NumberOfFeatures(M)
|
|
171
|
+
** Feature1
|
|
172
|
+
** ...
|
|
173
|
+
** FeatureM
|
|
174
|
+
** FeatureTypeName2 NumberOfFeatures(M)
|
|
175
|
+
** Feature1
|
|
176
|
+
** ...
|
|
177
|
+
** FeatureM
|
|
178
|
+
** ...
|
|
179
|
+
** FeatureTypeNameN NumberOfFeatures(M)
|
|
180
|
+
** Feature1
|
|
181
|
+
** ...
|
|
182
|
+
** FeatureM
|
|
183
|
+
** FontName CharName ...
|
|
184
|
+
**
|
|
185
|
+
** It then appends these samples into a separate file for each
|
|
186
|
+
** character. The name of the file is
|
|
187
|
+
**
|
|
188
|
+
** DirectoryName/FontName/CharName.FeatureTypeName
|
|
189
|
+
**
|
|
190
|
+
** The DirectoryName can be specified via a command
|
|
191
|
+
** line argument. If not specified, it defaults to the
|
|
192
|
+
** current directory. The format of the resulting files is:
|
|
193
|
+
**
|
|
194
|
+
** NumberOfFeatures(M)
|
|
195
|
+
** Feature1
|
|
196
|
+
** ...
|
|
197
|
+
** FeatureM
|
|
198
|
+
** NumberOfFeatures(M)
|
|
199
|
+
** ...
|
|
200
|
+
**
|
|
201
|
+
** The output files each have a header which describes the
|
|
202
|
+
** type of feature which the file contains. This header is
|
|
203
|
+
** in the format required by the clusterer. A command line
|
|
204
|
+
** argument can also be used to specify that only the first
|
|
205
|
+
** N samples of each class should be used.
|
|
206
|
+
** Return: none
|
|
207
|
+
** Exceptions: none
|
|
208
|
+
** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
|
|
209
|
+
*/
|
|
210
|
+
|
|
211
|
+
{
|
|
212
|
+
char *PageName;
|
|
213
|
+
FILE *TrainingPage;
|
|
214
|
+
LIST CharList = NIL;
|
|
215
|
+
CLUSTERER *Clusterer = NULL;
|
|
216
|
+
LIST ProtoList = NIL;
|
|
217
|
+
LIST NormProtoList = NIL;
|
|
218
|
+
LIST pCharList;
|
|
219
|
+
LABELEDLIST CharSample;
|
|
220
|
+
|
|
221
|
+
ParseArguments (argc, argv);
|
|
222
|
+
while ((PageName = GetNextFilename()) != NULL)
|
|
223
|
+
{
|
|
224
|
+
printf ("Reading %s ...\n", PageName);
|
|
225
|
+
TrainingPage = Efopen (PageName, "r");
|
|
226
|
+
ReadTrainingSamples (TrainingPage, &CharList);
|
|
227
|
+
fclose (TrainingPage);
|
|
228
|
+
//WriteTrainingSamples (Directory, CharList);
|
|
229
|
+
}
|
|
230
|
+
printf("Clustering ...\n");
|
|
231
|
+
pCharList = CharList;
|
|
232
|
+
iterate(pCharList)
|
|
233
|
+
{
|
|
234
|
+
//Cluster
|
|
235
|
+
CharSample = (LABELEDLIST) first_node (pCharList);
|
|
236
|
+
//printf ("\nClustering %s ...", CharSample->Label);
|
|
237
|
+
Clusterer = SetUpForClustering(CharSample);
|
|
238
|
+
float SavedMinSamples = Config.MinSamples;
|
|
239
|
+
Config.MagicSamples = CharSample->SampleCount;
|
|
240
|
+
while (Config.MinSamples > 0.001) {
|
|
241
|
+
ProtoList = ClusterSamples(Clusterer, &Config);
|
|
242
|
+
if (NumberOfProtos(ProtoList, 1, 0) > 0)
|
|
243
|
+
break;
|
|
244
|
+
else {
|
|
245
|
+
Config.MinSamples *= 0.95;
|
|
246
|
+
printf("0 significant protos for %s."
|
|
247
|
+
" Retrying clustering with MinSamples = %f%%\n",
|
|
248
|
+
CharSample->Label, Config.MinSamples);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
Config.MinSamples = SavedMinSamples;
|
|
252
|
+
AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
|
|
253
|
+
}
|
|
254
|
+
FreeTrainingSamples (CharList);
|
|
255
|
+
WriteNormProtos (Directory, NormProtoList, Clusterer);
|
|
256
|
+
FreeClusterer(Clusterer);
|
|
257
|
+
FreeProtoList(&ProtoList);
|
|
258
|
+
FreeNormProtoList(NormProtoList);
|
|
259
|
+
printf ("\n");
|
|
260
|
+
return 0;
|
|
261
|
+
} // main
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
/**----------------------------------------------------------------------------
|
|
265
|
+
Private Code
|
|
266
|
+
----------------------------------------------------------------------------**/
|
|
267
|
+
/*---------------------------------------------------------------------------*/
|
|
268
|
+
void ParseArguments(
|
|
269
|
+
int argc,
|
|
270
|
+
char **argv)
|
|
271
|
+
|
|
272
|
+
/*
|
|
273
|
+
** Parameters:
|
|
274
|
+
** argc number of command line arguments to parse
|
|
275
|
+
** argv command line arguments
|
|
276
|
+
** Globals:
|
|
277
|
+
** ShowAllSamples flag controlling samples display
|
|
278
|
+
** ShowSignificantProtos flag controlling proto display
|
|
279
|
+
** ShowInsignificantProtos flag controlling proto display
|
|
280
|
+
** Config current clustering parameters
|
|
281
|
+
** tessoptarg, tessoptind defined by tessopt sys call
|
|
282
|
+
** Argc, Argv global copies of argc and argv
|
|
283
|
+
** Operation:
|
|
284
|
+
** This routine parses the command line arguments that were
|
|
285
|
+
** passed to the program. The legal arguments are:
|
|
286
|
+
** -d "turn off display of samples"
|
|
287
|
+
** -p "turn off significant protos"
|
|
288
|
+
** -n "turn off insignificant proto"
|
|
289
|
+
** -S [ spherical | elliptical | mixed | automatic ]
|
|
290
|
+
** -M MinSamples "min samples per prototype (%)"
|
|
291
|
+
** -B MaxIllegal "max illegal chars per cluster (%)"
|
|
292
|
+
** -I Independence "0 to 1"
|
|
293
|
+
** -C Confidence "1e-200 to 1.0"
|
|
294
|
+
** -D Directory
|
|
295
|
+
** -N MaxNumSamples
|
|
296
|
+
** -R RoundingAccuracy
|
|
297
|
+
** Return: none
|
|
298
|
+
** Exceptions: Illegal options terminate the program.
|
|
299
|
+
** History: 7/24/89, DSJ, Created.
|
|
300
|
+
*/
|
|
301
|
+
|
|
302
|
+
{
|
|
303
|
+
int Option;
|
|
304
|
+
int ParametersRead;
|
|
305
|
+
BOOL8 Error;
|
|
306
|
+
|
|
307
|
+
Error = FALSE;
|
|
308
|
+
Argc = argc;
|
|
309
|
+
Argv = argv;
|
|
310
|
+
while (( Option = tessopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )
|
|
311
|
+
{
|
|
312
|
+
switch ( Option )
|
|
313
|
+
{
|
|
314
|
+
case 'n':
|
|
315
|
+
sscanf(tessoptarg,"%d", &ParametersRead);
|
|
316
|
+
ShowInsignificantProtos = ParametersRead;
|
|
317
|
+
break;
|
|
318
|
+
case 'p':
|
|
319
|
+
sscanf(tessoptarg,"%d", &ParametersRead);
|
|
320
|
+
ShowSignificantProtos = ParametersRead;
|
|
321
|
+
break;
|
|
322
|
+
case 'd':
|
|
323
|
+
ShowAllSamples = FALSE;
|
|
324
|
+
break;
|
|
325
|
+
case 'C':
|
|
326
|
+
ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
|
|
327
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
328
|
+
else if ( Config.Confidence > 1 ) Config.Confidence = 1;
|
|
329
|
+
else if ( Config.Confidence < 0 ) Config.Confidence = 0;
|
|
330
|
+
break;
|
|
331
|
+
case 'I':
|
|
332
|
+
ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
|
|
333
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
334
|
+
else if ( Config.Independence > 1 ) Config.Independence = 1;
|
|
335
|
+
else if ( Config.Independence < 0 ) Config.Independence = 0;
|
|
336
|
+
break;
|
|
337
|
+
case 'M':
|
|
338
|
+
ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
|
|
339
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
340
|
+
else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
|
|
341
|
+
else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
|
|
342
|
+
break;
|
|
343
|
+
case 'B':
|
|
344
|
+
ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
|
|
345
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
346
|
+
else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
|
|
347
|
+
else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
|
|
348
|
+
break;
|
|
349
|
+
case 'R':
|
|
350
|
+
ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
|
|
351
|
+
if ( ParametersRead != 1 ) Error = TRUE;
|
|
352
|
+
else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
|
|
353
|
+
else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
|
|
354
|
+
break;
|
|
355
|
+
case 'S':
|
|
356
|
+
switch ( tessoptarg[0] )
|
|
357
|
+
{
|
|
358
|
+
case 's': Config.ProtoStyle = spherical; break;
|
|
359
|
+
case 'e': Config.ProtoStyle = elliptical; break;
|
|
360
|
+
case 'm': Config.ProtoStyle = mixed; break;
|
|
361
|
+
case 'a': Config.ProtoStyle = automatic; break;
|
|
362
|
+
default: Error = TRUE;
|
|
363
|
+
}
|
|
364
|
+
break;
|
|
365
|
+
case 'D':
|
|
366
|
+
Directory = tessoptarg;
|
|
367
|
+
break;
|
|
368
|
+
case 'N':
|
|
369
|
+
if (sscanf (tessoptarg, "%d", &MaxNumSamples) != 1 ||
|
|
370
|
+
MaxNumSamples <= 0)
|
|
371
|
+
Error = TRUE;
|
|
372
|
+
break;
|
|
373
|
+
case '?':
|
|
374
|
+
Error = TRUE;
|
|
375
|
+
break;
|
|
376
|
+
}
|
|
377
|
+
if ( Error )
|
|
378
|
+
{
|
|
379
|
+
fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] );
|
|
380
|
+
fprintf (stderr, "\t[-S ProtoStyle]\n");
|
|
381
|
+
fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" );
|
|
382
|
+
fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n");
|
|
383
|
+
exit (2);
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
} /* ParseArguments */
|
|
387
|
+
|
|
388
|
+
/*---------------------------------------------------------------------------*/
|
|
389
|
+
char *GetNextFilename ()
|
|
390
|
+
/*
|
|
391
|
+
** Parameters: none
|
|
392
|
+
** Globals:
|
|
393
|
+
** tessoptind defined by tessopt sys call
|
|
394
|
+
** Argc, Argv global copies of argc and argv
|
|
395
|
+
** Operation:
|
|
396
|
+
** This routine returns the next command line argument. If
|
|
397
|
+
** there are no remaining command line arguments, it returns
|
|
398
|
+
** NULL. This routine should only be called after all option
|
|
399
|
+
** arguments have been parsed and removed with ParseArguments.
|
|
400
|
+
** Return: Next command line argument or NULL.
|
|
401
|
+
** Exceptions: none
|
|
402
|
+
** History: Fri Aug 18 09:34:12 1989, DSJ, Created.
|
|
403
|
+
*/
|
|
404
|
+
|
|
405
|
+
{
|
|
406
|
+
if (tessoptind < Argc)
|
|
407
|
+
return (Argv [tessoptind++]);
|
|
408
|
+
else
|
|
409
|
+
return (NULL);
|
|
410
|
+
|
|
411
|
+
} /* GetNextFilename */
|
|
412
|
+
|
|
413
|
+
/*---------------------------------------------------------------------------*/
|
|
414
|
+
void ReadTrainingSamples (
|
|
415
|
+
FILE *File,
|
|
416
|
+
LIST* TrainingSamples)
|
|
417
|
+
|
|
418
|
+
/*
|
|
419
|
+
** Parameters:
|
|
420
|
+
** File open text file to read samples from
|
|
421
|
+
** Globals: none
|
|
422
|
+
** Operation:
|
|
423
|
+
** This routine reads training samples from a file and
|
|
424
|
+
** places them into a data structure which organizes the
|
|
425
|
+
** samples by FontName and CharName. It then returns this
|
|
426
|
+
** data structure.
|
|
427
|
+
** Return: none
|
|
428
|
+
** Exceptions: none
|
|
429
|
+
** History: Fri Aug 18 13:11:39 1989, DSJ, Created.
|
|
430
|
+
** Tue May 17 1998 simplifications to structure, illiminated
|
|
431
|
+
** font, and feature specification levels of structure.
|
|
432
|
+
*/
|
|
433
|
+
|
|
434
|
+
{
|
|
435
|
+
char unichar[UNICHAR_LEN + 1];
|
|
436
|
+
LABELEDLIST CharSample;
|
|
437
|
+
FEATURE_SET FeatureSamples;
|
|
438
|
+
CHAR_DESC CharDesc;
|
|
439
|
+
int Type, i;
|
|
440
|
+
|
|
441
|
+
while (fscanf (File, "%s %s", FontName, unichar) == 2) {
|
|
442
|
+
CharSample = FindList (*TrainingSamples, unichar);
|
|
443
|
+
if (CharSample == NULL) {
|
|
444
|
+
CharSample = NewLabeledList (unichar);
|
|
445
|
+
*TrainingSamples = push (*TrainingSamples, CharSample);
|
|
446
|
+
}
|
|
447
|
+
CharDesc = ReadCharDescription (File);
|
|
448
|
+
Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
|
|
449
|
+
FeatureSamples = CharDesc->FeatureSets[Type];
|
|
450
|
+
for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
|
|
451
|
+
FEATURE f = FeatureSamples->Features[feature];
|
|
452
|
+
for (int dim =0; dim < f->Type->NumParams; ++dim)
|
|
453
|
+
f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
|
|
454
|
+
}
|
|
455
|
+
CharSample->List = push (CharSample->List, FeatureSamples);
|
|
456
|
+
CharSample->SampleCount++;
|
|
457
|
+
for (i = 0; i < CharDesc->NumFeatureSets; i++)
|
|
458
|
+
if (Type != i)
|
|
459
|
+
FreeFeatureSet(CharDesc->FeatureSets[i]);
|
|
460
|
+
free (CharDesc);
|
|
461
|
+
}
|
|
462
|
+
} // ReadTrainingSamples
|
|
463
|
+
|
|
464
|
+
/*---------------------------------------------------------------------------*/
|
|
465
|
+
LABELEDLIST FindList (
|
|
466
|
+
LIST List,
|
|
467
|
+
char *Label)
|
|
468
|
+
|
|
469
|
+
/*
|
|
470
|
+
** Parameters:
|
|
471
|
+
** List list to search
|
|
472
|
+
** Label label to search for
|
|
473
|
+
** Globals: none
|
|
474
|
+
** Operation:
|
|
475
|
+
** This routine searches thru a list of labeled lists to find
|
|
476
|
+
** a list with the specified label. If a matching labeled list
|
|
477
|
+
** cannot be found, NULL is returned.
|
|
478
|
+
** Return: Labeled list with the specified Label or NULL.
|
|
479
|
+
** Exceptions: none
|
|
480
|
+
** History: Fri Aug 18 15:57:41 1989, DSJ, Created.
|
|
481
|
+
*/
|
|
482
|
+
|
|
483
|
+
{
|
|
484
|
+
LABELEDLIST LabeledList;
|
|
485
|
+
|
|
486
|
+
iterate (List)
|
|
487
|
+
{
|
|
488
|
+
LabeledList = (LABELEDLIST) first_node (List);
|
|
489
|
+
if (strcmp (LabeledList->Label, Label) == 0)
|
|
490
|
+
return (LabeledList);
|
|
491
|
+
}
|
|
492
|
+
return (NULL);
|
|
493
|
+
|
|
494
|
+
} /* FindList */
|
|
495
|
+
|
|
496
|
+
/*---------------------------------------------------------------------------*/
|
|
497
|
+
LABELEDLIST NewLabeledList (
|
|
498
|
+
char *Label)
|
|
499
|
+
|
|
500
|
+
/*
|
|
501
|
+
** Parameters:
|
|
502
|
+
** Label label for new list
|
|
503
|
+
** Globals: none
|
|
504
|
+
** Operation:
|
|
505
|
+
** This routine allocates a new, empty labeled list and gives
|
|
506
|
+
** it the specified label.
|
|
507
|
+
** Return: New, empty labeled list.
|
|
508
|
+
** Exceptions: none
|
|
509
|
+
** History: Fri Aug 18 16:08:46 1989, DSJ, Created.
|
|
510
|
+
*/
|
|
511
|
+
|
|
512
|
+
{
|
|
513
|
+
LABELEDLIST LabeledList;
|
|
514
|
+
|
|
515
|
+
LabeledList = (LABELEDLIST) (char*)Emalloc (sizeof (LABELEDLISTNODE));
|
|
516
|
+
LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
|
|
517
|
+
strcpy (LabeledList->Label, Label);
|
|
518
|
+
LabeledList->List = NIL;
|
|
519
|
+
LabeledList->SampleCount = 0;
|
|
520
|
+
return (LabeledList);
|
|
521
|
+
|
|
522
|
+
} /* NewLabeledList */
|
|
523
|
+
|
|
524
|
+
/*---------------------------------------------------------------------------*/
|
|
525
|
+
void WriteTrainingSamples (
|
|
526
|
+
char *Directory,
|
|
527
|
+
LIST CharList)
|
|
528
|
+
|
|
529
|
+
/*
|
|
530
|
+
** Parameters:
|
|
531
|
+
** Directory directory to place sample files into
|
|
532
|
+
** FontList list of fonts used in the training samples
|
|
533
|
+
** Globals:
|
|
534
|
+
** MaxNumSamples max number of samples per class to write
|
|
535
|
+
** Operation:
|
|
536
|
+
** This routine writes the specified samples into files which
|
|
537
|
+
** are organized according to the font name and character name
|
|
538
|
+
** of the samples.
|
|
539
|
+
** Return: none
|
|
540
|
+
** Exceptions: none
|
|
541
|
+
** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
|
|
542
|
+
*/
|
|
543
|
+
|
|
544
|
+
{
|
|
545
|
+
LABELEDLIST CharSample;
|
|
546
|
+
FEATURE_SET FeatureSet;
|
|
547
|
+
LIST FeatureList;
|
|
548
|
+
FILE *File;
|
|
549
|
+
char Filename[MAXNAMESIZE];
|
|
550
|
+
int NumSamples;
|
|
551
|
+
|
|
552
|
+
iterate (CharList) // iterate thru all of the fonts
|
|
553
|
+
{
|
|
554
|
+
CharSample = (LABELEDLIST) first_node (CharList);
|
|
555
|
+
|
|
556
|
+
// construct the full pathname for the current samples file
|
|
557
|
+
strcpy (Filename, "");
|
|
558
|
+
if (Directory != NULL)
|
|
559
|
+
{
|
|
560
|
+
strcat (Filename, Directory);
|
|
561
|
+
strcat (Filename, "/");
|
|
562
|
+
}
|
|
563
|
+
strcat (Filename, "Merged");
|
|
564
|
+
strcat (Filename, "/");
|
|
565
|
+
strcat (Filename, CharSample->Label);
|
|
566
|
+
strcat (Filename, ".");
|
|
567
|
+
strcat (Filename, PROGRAM_FEATURE_TYPE);
|
|
568
|
+
printf ("\nWriting %s ...", Filename);
|
|
569
|
+
|
|
570
|
+
/* if file does not exist, create a new one with an appropriate
|
|
571
|
+
header; otherwise append samples to the existing file */
|
|
572
|
+
File = fopen (Filename, "r");
|
|
573
|
+
if (File == NULL)
|
|
574
|
+
{
|
|
575
|
+
File = Efopen (Filename, "w");
|
|
576
|
+
WriteOldParamDesc
|
|
577
|
+
(File, FeatureDefs.FeatureDesc[ShortNameToFeatureType (PROGRAM_FEATURE_TYPE)]);
|
|
578
|
+
}
|
|
579
|
+
else
|
|
580
|
+
{
|
|
581
|
+
fclose (File);
|
|
582
|
+
File = Efopen (Filename, "a");
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// append samples onto the file
|
|
586
|
+
FeatureList = CharSample->List;
|
|
587
|
+
NumSamples = 0;
|
|
588
|
+
iterate (FeatureList)
|
|
589
|
+
{
|
|
590
|
+
//if (NumSamples >= MaxNumSamples) break;
|
|
591
|
+
|
|
592
|
+
FeatureSet = (FEATURE_SET) first_node (FeatureList);
|
|
593
|
+
WriteFeatureSet (File, FeatureSet);
|
|
594
|
+
NumSamples++;
|
|
595
|
+
}
|
|
596
|
+
fclose (File);
|
|
597
|
+
}
|
|
598
|
+
} /* WriteTrainingSamples */
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
/*----------------------------------------------------------------------------*/
|
|
602
|
+
void WriteNormProtos (
|
|
603
|
+
char *Directory,
|
|
604
|
+
LIST LabeledProtoList,
|
|
605
|
+
CLUSTERER *Clusterer)
|
|
606
|
+
|
|
607
|
+
/*
|
|
608
|
+
** Parameters:
|
|
609
|
+
** Directory directory to place sample files into
|
|
610
|
+
** Globals:
|
|
611
|
+
** MaxNumSamples max number of samples per class to write
|
|
612
|
+
** Operation:
|
|
613
|
+
** This routine writes the specified samples into files which
|
|
614
|
+
** are organized according to the font name and character name
|
|
615
|
+
** of the samples.
|
|
616
|
+
** Return: none
|
|
617
|
+
** Exceptions: none
|
|
618
|
+
** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
|
|
619
|
+
*/
|
|
620
|
+
|
|
621
|
+
{
|
|
622
|
+
FILE *File;
|
|
623
|
+
char Filename[MAXNAMESIZE];
|
|
624
|
+
LABELEDLIST LabeledProto;
|
|
625
|
+
int N;
|
|
626
|
+
|
|
627
|
+
strcpy (Filename, "");
|
|
628
|
+
if (Directory != NULL)
|
|
629
|
+
{
|
|
630
|
+
strcat (Filename, Directory);
|
|
631
|
+
strcat (Filename, "/");
|
|
632
|
+
}
|
|
633
|
+
strcat (Filename, "normproto");
|
|
634
|
+
printf ("\nWriting %s ...", Filename);
|
|
635
|
+
File = Efopen (Filename, "w");
|
|
636
|
+
fprintf(File,"%0d\n",Clusterer->SampleSize);
|
|
637
|
+
WriteParamDesc(File,Clusterer->SampleSize,Clusterer->ParamDesc);
|
|
638
|
+
iterate(LabeledProtoList)
|
|
639
|
+
{
|
|
640
|
+
LabeledProto = (LABELEDLIST) first_node (LabeledProtoList);
|
|
641
|
+
N = NumberOfProtos(LabeledProto->List,
|
|
642
|
+
ShowSignificantProtos, ShowInsignificantProtos);
|
|
643
|
+
if (N < 1) {
|
|
644
|
+
printf ("\nError! Not enough protos for %s: %d protos"
|
|
645
|
+
" (%d significant protos"
|
|
646
|
+
", %d insignificant protos)\n",
|
|
647
|
+
LabeledProto->Label, N,
|
|
648
|
+
NumberOfProtos(LabeledProto->List, 1, 0),
|
|
649
|
+
NumberOfProtos(LabeledProto->List, 0, 1));
|
|
650
|
+
exit(1);
|
|
651
|
+
}
|
|
652
|
+
fprintf(File, "\n%s %d\n", LabeledProto->Label, N);
|
|
653
|
+
WriteProtos(File, Clusterer->SampleSize, LabeledProto->List,
|
|
654
|
+
ShowSignificantProtos, ShowInsignificantProtos);
|
|
655
|
+
}
|
|
656
|
+
fclose (File);
|
|
657
|
+
|
|
658
|
+
} // WriteNormProtos
|
|
659
|
+
|
|
660
|
+
/*---------------------------------------------------------------------------*/
|
|
661
|
+
void FreeTrainingSamples (
|
|
662
|
+
LIST CharList)
|
|
663
|
+
|
|
664
|
+
/*
|
|
665
|
+
** Parameters:
|
|
666
|
+
** FontList list of all fonts in document
|
|
667
|
+
** Globals: none
|
|
668
|
+
** Operation:
|
|
669
|
+
** This routine deallocates all of the space allocated to
|
|
670
|
+
** the specified list of training samples.
|
|
671
|
+
** Return: none
|
|
672
|
+
** Exceptions: none
|
|
673
|
+
** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
|
674
|
+
*/
|
|
675
|
+
|
|
676
|
+
{
|
|
677
|
+
LABELEDLIST CharSample;
|
|
678
|
+
FEATURE_SET FeatureSet;
|
|
679
|
+
LIST FeatureList;
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
printf ("\nFreeTrainingSamples...");
|
|
683
|
+
iterate (CharList) /* iterate thru all of the fonts */
|
|
684
|
+
{
|
|
685
|
+
CharSample = (LABELEDLIST) first_node (CharList);
|
|
686
|
+
FeatureList = CharSample->List;
|
|
687
|
+
iterate (FeatureList) /* iterate thru all of the classes */
|
|
688
|
+
{
|
|
689
|
+
FeatureSet = (FEATURE_SET) first_node (FeatureList);
|
|
690
|
+
FreeFeatureSet (FeatureSet);
|
|
691
|
+
}
|
|
692
|
+
FreeLabeledList (CharSample);
|
|
693
|
+
}
|
|
694
|
+
destroy (CharList);
|
|
695
|
+
|
|
696
|
+
} /* FreeTrainingSamples */
|
|
697
|
+
|
|
698
|
+
/*-------------------------------------------------------------------------*/
|
|
699
|
+
void FreeNormProtoList (
|
|
700
|
+
LIST CharList)
|
|
701
|
+
|
|
702
|
+
{
|
|
703
|
+
LABELEDLIST CharSample;
|
|
704
|
+
|
|
705
|
+
iterate (CharList) /* iterate thru all of the fonts */
|
|
706
|
+
{
|
|
707
|
+
CharSample = (LABELEDLIST) first_node (CharList);
|
|
708
|
+
FreeLabeledList (CharSample);
|
|
709
|
+
}
|
|
710
|
+
destroy (CharList);
|
|
711
|
+
|
|
712
|
+
} // FreeNormProtoList
|
|
713
|
+
|
|
714
|
+
/*---------------------------------------------------------------------------*/
|
|
715
|
+
void FreeLabeledList (
|
|
716
|
+
LABELEDLIST LabeledList)
|
|
717
|
+
|
|
718
|
+
/*
|
|
719
|
+
** Parameters:
|
|
720
|
+
** LabeledList labeled list to be freed
|
|
721
|
+
** Globals: none
|
|
722
|
+
** Operation:
|
|
723
|
+
** This routine deallocates all of the memory consumed by
|
|
724
|
+
** a labeled list. It does not free any memory which may be
|
|
725
|
+
** consumed by the items in the list.
|
|
726
|
+
** Return: none
|
|
727
|
+
** Exceptions: none
|
|
728
|
+
** History: Fri Aug 18 17:52:45 1989, DSJ, Created.
|
|
729
|
+
*/
|
|
730
|
+
|
|
731
|
+
{
|
|
732
|
+
destroy (LabeledList->List);
|
|
733
|
+
free (LabeledList->Label);
|
|
734
|
+
free (LabeledList);
|
|
735
|
+
|
|
736
|
+
} /* FreeLabeledList */
|
|
737
|
+
|
|
738
|
+
/*---------------------------------------------------------------------------*/
|
|
739
|
+
CLUSTERER *SetUpForClustering(
|
|
740
|
+
LABELEDLIST CharSample)
|
|
741
|
+
|
|
742
|
+
/*
|
|
743
|
+
** Parameters:
|
|
744
|
+
** CharSample: LABELEDLIST that holds all the feature information for a
|
|
745
|
+
** given character.
|
|
746
|
+
** Globals:
|
|
747
|
+
** None
|
|
748
|
+
** Operation:
|
|
749
|
+
** This routine reads samples from a LABELEDLIST and enters
|
|
750
|
+
** those samples into a clusterer data structure. This
|
|
751
|
+
** data structure is then returned to the caller.
|
|
752
|
+
** Return:
|
|
753
|
+
** Pointer to new clusterer data structure.
|
|
754
|
+
** Exceptions:
|
|
755
|
+
** None
|
|
756
|
+
** History:
|
|
757
|
+
** 8/16/89, DSJ, Created.
|
|
758
|
+
*/
|
|
759
|
+
|
|
760
|
+
{
|
|
761
|
+
uinT16 N;
|
|
762
|
+
int i, j;
|
|
763
|
+
FLOAT32 *Sample = NULL;
|
|
764
|
+
CLUSTERER *Clusterer;
|
|
765
|
+
inT32 CharID;
|
|
766
|
+
LIST FeatureList = NULL;
|
|
767
|
+
FEATURE_SET FeatureSet = NULL;
|
|
768
|
+
FEATURE_DESC FeatureDesc = NULL;
|
|
769
|
+
// PARAM_DESC* ParamDesc;
|
|
770
|
+
|
|
771
|
+
FeatureDesc = FeatureDefs.FeatureDesc[ShortNameToFeatureType(PROGRAM_FEATURE_TYPE)];
|
|
772
|
+
N = FeatureDesc->NumParams;
|
|
773
|
+
//ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N);
|
|
774
|
+
Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);
|
|
775
|
+
// free(ParamDesc);
|
|
776
|
+
|
|
777
|
+
FeatureList = CharSample->List;
|
|
778
|
+
CharID = 0;
|
|
779
|
+
iterate(FeatureList)
|
|
780
|
+
{
|
|
781
|
+
FeatureSet = (FEATURE_SET) first_node (FeatureList);
|
|
782
|
+
for (i=0; i < FeatureSet->MaxNumFeatures; i++)
|
|
783
|
+
{
|
|
784
|
+
if (Sample == NULL)
|
|
785
|
+
Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
|
786
|
+
for (j=0; j < N; j++)
|
|
787
|
+
if (RoundingAccuracy != 0.0)
|
|
788
|
+
Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);
|
|
789
|
+
else
|
|
790
|
+
Sample[j] = FeatureSet->Features[i]->Params[j];
|
|
791
|
+
MakeSample (Clusterer, Sample, CharID);
|
|
792
|
+
}
|
|
793
|
+
CharID++;
|
|
794
|
+
}
|
|
795
|
+
if ( Sample != NULL ) free( Sample );
|
|
796
|
+
return( Clusterer );
|
|
797
|
+
|
|
798
|
+
} /* SetUpForClustering */
|
|
799
|
+
|
|
800
|
+
/*---------------------------------------------------------------------------*/
|
|
801
|
+
void AddToNormProtosList(
|
|
802
|
+
LIST* NormProtoList,
|
|
803
|
+
LIST ProtoList,
|
|
804
|
+
char* CharName)
|
|
805
|
+
{
|
|
806
|
+
PROTOTYPE* Proto;
|
|
807
|
+
LABELEDLIST LabeledProtoList;
|
|
808
|
+
|
|
809
|
+
LabeledProtoList = NewLabeledList(CharName);
|
|
810
|
+
iterate(ProtoList)
|
|
811
|
+
{
|
|
812
|
+
Proto = (PROTOTYPE *) first_node (ProtoList);
|
|
813
|
+
LabeledProtoList->List = push(LabeledProtoList->List, Proto);
|
|
814
|
+
}
|
|
815
|
+
*NormProtoList = push(*NormProtoList, LabeledProtoList);
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
/*-------------------------------------------------------------------------*/
|
|
819
|
+
void WriteProtos(
|
|
820
|
+
FILE *File,
|
|
821
|
+
uinT16 N,
|
|
822
|
+
LIST ProtoList,
|
|
823
|
+
BOOL8 WriteSigProtos,
|
|
824
|
+
BOOL8 WriteInsigProtos)
|
|
825
|
+
{
|
|
826
|
+
PROTOTYPE *Proto;
|
|
827
|
+
|
|
828
|
+
// write prototypes
|
|
829
|
+
iterate(ProtoList)
|
|
830
|
+
{
|
|
831
|
+
Proto = (PROTOTYPE *) first_node ( ProtoList );
|
|
832
|
+
if (( Proto->Significant && WriteSigProtos ) ||
|
|
833
|
+
( ! Proto->Significant && WriteInsigProtos ) )
|
|
834
|
+
WritePrototype( File, N, Proto );
|
|
835
|
+
}
|
|
836
|
+
} // WriteProtos
|
|
837
|
+
|
|
838
|
+
/*---------------------------------------------------------------------------*/
|
|
839
|
+
int NumberOfProtos(
|
|
840
|
+
LIST ProtoList,
|
|
841
|
+
BOOL8 CountSigProtos,
|
|
842
|
+
BOOL8 CountInsigProtos)
|
|
843
|
+
{
|
|
844
|
+
int N = 0;
|
|
845
|
+
PROTOTYPE *Proto;
|
|
846
|
+
|
|
847
|
+
iterate(ProtoList)
|
|
848
|
+
{
|
|
849
|
+
Proto = (PROTOTYPE *) first_node ( ProtoList );
|
|
850
|
+
if (( Proto->Significant && CountSigProtos ) ||
|
|
851
|
+
( ! Proto->Significant && CountInsigProtos ) )
|
|
852
|
+
N++;
|
|
853
|
+
}
|
|
854
|
+
return(N);
|
|
855
|
+
}
|