tesseract_bin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
/* -*-C-*-
|
|
2
|
+
********************************************************************************
|
|
3
|
+
*
|
|
4
|
+
* File: makedawg.cpp
|
|
5
|
+
* Description: Create a Directed Accyclic Word Graph
|
|
6
|
+
* Author: Mark Seaman, OCR Technology
|
|
7
|
+
* Created: Fri Oct 16 14:37:00 1987
|
|
8
|
+
* Modified: Fri Jul 26 12:18:12 1991 (Mark Seaman) marks@hpgrlt
|
|
9
|
+
* Language: C
|
|
10
|
+
* Package: N/A
|
|
11
|
+
* Status: Reusable Software Component
|
|
12
|
+
*
|
|
13
|
+
* (c) Copyright 1987, Hewlett-Packard Company, all rights reserved.
|
|
14
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
15
|
+
** you may not use this file except in compliance with the License.
|
|
16
|
+
** You may obtain a copy of the License at
|
|
17
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
18
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
** See the License for the specific language governing permissions and
|
|
22
|
+
** limitations under the License.
|
|
23
|
+
*
|
|
24
|
+
********************************************************************************
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
/*
|
|
29
|
+
----------------------------------------------------------------------
|
|
30
|
+
I n c l u d e s
|
|
31
|
+
----------------------------------------------------------------------
|
|
32
|
+
*/
|
|
33
|
+
#ifdef __MSW32__
|
|
34
|
+
#include <windows.h>
|
|
35
|
+
#else
|
|
36
|
+
#include <arpa/inet.h>
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
#include "makedawg.h"
|
|
40
|
+
|
|
41
|
+
#include "reduce.h"
|
|
42
|
+
#include "cutil.h"
|
|
43
|
+
#include "callcpp.h"
|
|
44
|
+
|
|
45
|
+
#ifdef __UNIX__
|
|
46
|
+
#include <assert.h>
|
|
47
|
+
#endif
|
|
48
|
+
#include <time.h>
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
/*
|
|
52
|
+
----------------------------------------------------------------------
|
|
53
|
+
V a r i a b l e s
|
|
54
|
+
----------------------------------------------------------------------
|
|
55
|
+
*/
|
|
56
|
+
|
|
57
|
+
/*
|
|
58
|
+
----------------------------------------------------------------------
|
|
59
|
+
F u n c t i o n s
|
|
60
|
+
----------------------------------------------------------------------
|
|
61
|
+
*/
|
|
62
|
+
|
|
63
|
+
/**********************************************************************
|
|
64
|
+
* build_node_map
|
|
65
|
+
*
|
|
66
|
+
* Create a node map that will help translate the indices of the DAWG
|
|
67
|
+
* into a compacted form.
|
|
68
|
+
* Construct in memory a mapping from the memory node values into the
|
|
69
|
+
* disk node values. Return the values in this map as requested. If
|
|
70
|
+
* a new value mapping is requested assign the next sequential number
|
|
71
|
+
* to it.
|
|
72
|
+
**********************************************************************/
|
|
73
|
+
|
|
74
|
+
NODE_MAP build_node_map (EDGE_ARRAY dawg,
|
|
75
|
+
inT32 *num_nodes,
|
|
76
|
+
inT32 both_links,
|
|
77
|
+
inT32 max_num_edges,
|
|
78
|
+
inT32 reserved_edges) {
|
|
79
|
+
EDGE_REF edge;
|
|
80
|
+
NODE_MAP node_map;
|
|
81
|
+
inT32 node_counter;
|
|
82
|
+
inT32 num_edges;
|
|
83
|
+
|
|
84
|
+
node_map = (NODE_MAP) malloc (sizeof (EDGE_REF) * max_num_edges);
|
|
85
|
+
|
|
86
|
+
for (edge=0; edge<max_num_edges; edge++) /* Init all slots */
|
|
87
|
+
node_map [edge] = -1;
|
|
88
|
+
|
|
89
|
+
if (both_links) /* Start after reserved */
|
|
90
|
+
node_counter = reserved_edges;
|
|
91
|
+
else
|
|
92
|
+
node_counter = num_forward_edges (dawg, 0);
|
|
93
|
+
|
|
94
|
+
*num_nodes = 0;
|
|
95
|
+
for (edge=0; edge<max_num_edges; edge++) { /* Search all slots */
|
|
96
|
+
|
|
97
|
+
if (forward_edge (dawg, edge)) {
|
|
98
|
+
(*num_nodes)++; /* Count nodes links */
|
|
99
|
+
|
|
100
|
+
node_map [edge] = (edge ? node_counter : 0);
|
|
101
|
+
|
|
102
|
+
if (both_links)
|
|
103
|
+
num_edges = edges_in_node (dawg, edge);
|
|
104
|
+
else
|
|
105
|
+
num_edges = num_forward_edges (dawg, edge);
|
|
106
|
+
|
|
107
|
+
if (edge != 0) node_counter += num_edges;
|
|
108
|
+
edge += num_edges;
|
|
109
|
+
if (backward_edge (dawg, edge)) edge_loop (dawg, edge);
|
|
110
|
+
edge--;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return (node_map);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
/**********************************************************************
|
|
118
|
+
* compact_dawg
|
|
119
|
+
*
|
|
120
|
+
* Compact the DAWG (array of edges) to leave a large chunk of blank
|
|
121
|
+
* space at the end.
|
|
122
|
+
**********************************************************************/
|
|
123
|
+
|
|
124
|
+
void compact_dawg (EDGE_ARRAY dawg,
|
|
125
|
+
inT32 max_num_edges,
|
|
126
|
+
inT32 reserved_edges) {
|
|
127
|
+
EDGE_REF edge;
|
|
128
|
+
inT32 num_edges = 0;
|
|
129
|
+
NODE_REF next_node_space;
|
|
130
|
+
NODE_REF node = 0;
|
|
131
|
+
NODE_REF destination;
|
|
132
|
+
inT32 node_count;
|
|
133
|
+
NODE_MAP node_map;
|
|
134
|
+
NODE_REF the_next_node;
|
|
135
|
+
|
|
136
|
+
if (max_new_attempts < NUM_PLACEMENT_ATTEMPTS / 10) return;
|
|
137
|
+
max_new_attempts = 0;
|
|
138
|
+
|
|
139
|
+
print_string ("Compacting the DAWG");
|
|
140
|
+
node_map = build_node_map (dawg, &node_count, TRUE,
|
|
141
|
+
max_num_edges, reserved_edges);
|
|
142
|
+
|
|
143
|
+
edge = 0;
|
|
144
|
+
next_node_space = reserved_edges;
|
|
145
|
+
while (edge < max_num_edges) {
|
|
146
|
+
/* Found a node ? */
|
|
147
|
+
if (forward_edge (dawg, edge)) {
|
|
148
|
+
node = edge;
|
|
149
|
+
num_edges = edges_in_node (dawg, node);
|
|
150
|
+
/* Move the edges */
|
|
151
|
+
if (node != 0) {
|
|
152
|
+
destination = next_node_space;
|
|
153
|
+
if (node != next_node_space)
|
|
154
|
+
move_edges (dawg, node, next_node_space, num_edges);
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
destination = 0;
|
|
158
|
+
}
|
|
159
|
+
/* Should be moved */
|
|
160
|
+
if (debug) cprintf ("Compacting node from " REFFORMAT " to " REFFORMAT \
|
|
161
|
+
" (%d)\n", node, destination, num_edges);
|
|
162
|
+
|
|
163
|
+
for (edge = destination;
|
|
164
|
+
edge < destination + num_edges;
|
|
165
|
+
edge++) {
|
|
166
|
+
|
|
167
|
+
the_next_node = next_node (dawg, edge);
|
|
168
|
+
|
|
169
|
+
assert (the_next_node >= 0 &&
|
|
170
|
+
the_next_node < max_num_edges &&
|
|
171
|
+
node_map [the_next_node] >= 0 &&
|
|
172
|
+
node_map [the_next_node] < max_num_edges);
|
|
173
|
+
|
|
174
|
+
/* Map each edge in node */
|
|
175
|
+
if (debug) cprintf (" " REFFORMAT " --> ", next_node (dawg, edge));
|
|
176
|
+
set_next_edge (dawg, edge, node_map [next_node (dawg, edge)]);
|
|
177
|
+
if (debug) cprintf (REFFORMAT "\n", next_node (dawg, edge));
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (destination != 0) next_node_space = edge;
|
|
181
|
+
edge = node + num_edges;
|
|
182
|
+
}
|
|
183
|
+
else {
|
|
184
|
+
edge++;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
cprintf ("Compacting node from " REFFORMAT " to " REFFORMAT " (%d)\n",
|
|
189
|
+
node, next_node_space, num_edges);
|
|
190
|
+
free (node_map);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
/**********************************************************************
|
|
195
|
+
* delete_node
|
|
196
|
+
*
|
|
197
|
+
* Remove all the edges that are currently used within this node in the
|
|
198
|
+
* DAWG.
|
|
199
|
+
**********************************************************************/
|
|
200
|
+
|
|
201
|
+
void delete_node (EDGE_ARRAY dawg,
|
|
202
|
+
NODE_REF node) {
|
|
203
|
+
EDGE_REF edge = node;
|
|
204
|
+
inT32 counter = edges_in_node (dawg, node);
|
|
205
|
+
|
|
206
|
+
/*
|
|
207
|
+
printf ("node deleted = %d (%d)\n", node, counter);
|
|
208
|
+
*/
|
|
209
|
+
while (counter--)
|
|
210
|
+
set_empty_edge (dawg, edge++);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
/**********************************************************************
|
|
215
|
+
* write_squished_dawg
|
|
216
|
+
*
|
|
217
|
+
* Write the DAWG out to a file
|
|
218
|
+
**********************************************************************/
|
|
219
|
+
|
|
220
|
+
void write_squished_dawg (const char *filename,
|
|
221
|
+
EDGE_ARRAY dawg,
|
|
222
|
+
inT32 max_num_edges,
|
|
223
|
+
inT32 reserved_edges) {
|
|
224
|
+
FILE *file;
|
|
225
|
+
EDGE_REF edge;
|
|
226
|
+
inT32 num_edges;
|
|
227
|
+
inT32 node_count = 0;
|
|
228
|
+
NODE_MAP node_map;
|
|
229
|
+
EDGE_REF old_index;
|
|
230
|
+
uinT32 temp_record_32;
|
|
231
|
+
|
|
232
|
+
if (debug) print_string ("write_squished_dawg");
|
|
233
|
+
|
|
234
|
+
node_map = build_node_map (dawg, &node_count, FALSE, max_num_edges,
|
|
235
|
+
reserved_edges);
|
|
236
|
+
|
|
237
|
+
#ifdef WIN32
|
|
238
|
+
file = open_file(filename, "wb");
|
|
239
|
+
#else
|
|
240
|
+
file = open_file(filename, "w");
|
|
241
|
+
#endif
|
|
242
|
+
|
|
243
|
+
num_edges = 0; /* Count number of edges */
|
|
244
|
+
for (edge=0; edge<max_num_edges; edge++)
|
|
245
|
+
if (forward_edge (dawg, edge))
|
|
246
|
+
num_edges++;
|
|
247
|
+
|
|
248
|
+
num_edges = htonl(num_edges);
|
|
249
|
+
fwrite (&num_edges, sizeof (inT32), 1, file); /* Write edge count to file */
|
|
250
|
+
num_edges = ntohl(num_edges);
|
|
251
|
+
|
|
252
|
+
printf ("%d nodes in DAWG\n", node_count);
|
|
253
|
+
printf ("%d edges in DAWG\n", num_edges);
|
|
254
|
+
|
|
255
|
+
if (num_edges > MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE) {
|
|
256
|
+
cprintf("Error: squished DAWG is too big to be written (%d edges > %d).\n",
|
|
257
|
+
num_edges, MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE);
|
|
258
|
+
exit(1);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
for (edge=0; edge<max_num_edges; edge++) {
|
|
262
|
+
/* Write forward edges */
|
|
263
|
+
if (forward_edge (dawg, edge)) {
|
|
264
|
+
do {
|
|
265
|
+
old_index = next_node (dawg,edge);
|
|
266
|
+
set_next_edge (dawg, edge, node_map [next_node (dawg, edge)]);
|
|
267
|
+
temp_record_32 = htonl((uinT32) edge_of (dawg,edge));
|
|
268
|
+
fwrite (&temp_record_32, sizeof (uinT32), 1, file);
|
|
269
|
+
set_next_edge (dawg, edge, old_index);
|
|
270
|
+
} edge_loop (dawg, edge);
|
|
271
|
+
|
|
272
|
+
if (backward_edge (dawg, edge)) /* Skip back links */
|
|
273
|
+
edge_loop (dawg, edge);
|
|
274
|
+
|
|
275
|
+
edge--;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
free (node_map);
|
|
280
|
+
fclose (file);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
#if 0
|
|
284
|
+
/**********************************************************************
|
|
285
|
+
* main
|
|
286
|
+
*
|
|
287
|
+
* Test the DAWG functions.
|
|
288
|
+
**********************************************************************/
|
|
289
|
+
|
|
290
|
+
main (argc, argv)
|
|
291
|
+
int argc;
|
|
292
|
+
char **argv;
|
|
293
|
+
{
|
|
294
|
+
extern int optind;
|
|
295
|
+
extern char *optarg;
|
|
296
|
+
int option;
|
|
297
|
+
time_t start_time;
|
|
298
|
+
time_t end_time;
|
|
299
|
+
FILE *word_file;
|
|
300
|
+
char string [CHARS_PER_LINE];
|
|
301
|
+
inT32 word_count = 0;
|
|
302
|
+
inT32 max_num_edges = 700000;
|
|
303
|
+
inT32 reserved_edges = 50000;
|
|
304
|
+
EDGE_ARRAY dawg;
|
|
305
|
+
char *wordfile = "WORDS";
|
|
306
|
+
char *dawgfile = "DAWG";
|
|
307
|
+
char filename [CHARS_PER_LINE];
|
|
308
|
+
int baselength;
|
|
309
|
+
|
|
310
|
+
start_time = time (&start_time);
|
|
311
|
+
|
|
312
|
+
dawg = (EDGE_ARRAY) malloc (sizeof (EDGE_RECORD) * max_num_edges);
|
|
313
|
+
if (dawg == NULL) {
|
|
314
|
+
printf ("error: Could not allocate enough memory for DAWG ");
|
|
315
|
+
printf ("(%ld,%03ld bytes needed)\n",
|
|
316
|
+
sizeof (EDGE_RECORD) * max_num_edges / 1000,
|
|
317
|
+
sizeof (EDGE_RECORD) * max_num_edges % 1000);
|
|
318
|
+
exit (1);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
if (argc > 1) {
|
|
322
|
+
strcpy (filename, argv[1]);
|
|
323
|
+
}
|
|
324
|
+
else {
|
|
325
|
+
strcpy (filename, "WORDS");
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
baselength = strlen (filename);
|
|
329
|
+
|
|
330
|
+
/* strcpy (filename+baselength, ".ful");
|
|
331
|
+
read_full_dawg (filename, dawg, max_num_edges);
|
|
332
|
+
*/
|
|
333
|
+
strcpy (filename+baselength, ".lst");
|
|
334
|
+
printf ("Building Dawg from word list in file, '%s'\n", filename);
|
|
335
|
+
read_word_list (filename, dawg, max_num_edges, reserved_edges);
|
|
336
|
+
|
|
337
|
+
strcpy (filename+baselength, ".ful");
|
|
338
|
+
printf ("Writing full Trie file, '%s'\n", filename);
|
|
339
|
+
write_full_dawg (filename, dawg, max_num_edges);
|
|
340
|
+
|
|
341
|
+
strcpy (filename+baselength, ".opt");
|
|
342
|
+
trie_to_dawg (dawg, max_num_edges, reserved_edges);
|
|
343
|
+
printf ("Writing full DAWG file, '%s'\n", filename);
|
|
344
|
+
write_full_dawg (filename, dawg, max_num_edges);
|
|
345
|
+
|
|
346
|
+
strcpy (filename+baselength, ".squ");
|
|
347
|
+
printf ("Writing squished file, '%s'\n", filename);
|
|
348
|
+
write_squished_dawg (filename, dawg, max_num_edges, reserved_edges);
|
|
349
|
+
|
|
350
|
+
end_time = time (&end_time);
|
|
351
|
+
printf ("Seconds Elapsed = %4.1lf\n",
|
|
352
|
+
difftime (end_time, start_time));
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
while ((option = getopt (argc, argv, "e:c:d:n:s:t:v")) != EOF)
|
|
356
|
+
switch (option) {
|
|
357
|
+
|
|
358
|
+
case 'c' : {
|
|
359
|
+
printf ("makedawg -c %s %s\n", optarg, argv[optind]);
|
|
360
|
+
|
|
361
|
+
printf ("Reading Dawg file, '%s'\n", optarg);
|
|
362
|
+
read_dawg (optarg, dawg, max_num_edges);
|
|
363
|
+
|
|
364
|
+
max_new_attempts = 1000;
|
|
365
|
+
compact_dawg (dawg, max_num_edges, reserved_edges);
|
|
366
|
+
|
|
367
|
+
printf ("Writing full file, '%s'\n", argv[optind]);
|
|
368
|
+
write_full_dawg (argv[optind++], dawg, max_num_edges);
|
|
369
|
+
|
|
370
|
+
break;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
case 'd' : {
|
|
374
|
+
printf ("makedawg -d %s %s\n", optarg, argv[optind]);
|
|
375
|
+
|
|
376
|
+
printf ("Reading Dawg file, '%s'\n", optarg);
|
|
377
|
+
read_dawg (optarg, dawg, max_num_edges);
|
|
378
|
+
trie_to_dawg (dawg, max_num_edges, reserved_edges);
|
|
379
|
+
|
|
380
|
+
printf ("Writing full file, '%s'\n", argv[optind]);
|
|
381
|
+
write_full_dawg (argv[optind++], dawg, max_num_edges);
|
|
382
|
+
|
|
383
|
+
break;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
case 'n' : {
|
|
387
|
+
printf ("makedawg -n %s %s\n", optarg, argv[optind]);
|
|
388
|
+
|
|
389
|
+
printf ("Building Dawg from word list in file, '%s'\n", optarg);
|
|
390
|
+
read_word_list (optarg, dawg, max_num_edges, reserved_edges);
|
|
391
|
+
|
|
392
|
+
printf ("Writing full Dawg file, '%s'\n", argv[optind]);
|
|
393
|
+
write_full_dawg (argv[optind++], dawg, max_num_edges);
|
|
394
|
+
|
|
395
|
+
break;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
case 's' : {
|
|
399
|
+
printf ("makedawg -s %s %s\n", optarg, argv[optind]);
|
|
400
|
+
|
|
401
|
+
printf ("Reading Dawg file, '%s'\n", optarg);
|
|
402
|
+
read_dawg (optarg, dawg, max_num_edges);
|
|
403
|
+
|
|
404
|
+
printf ("Writing squished file, '%s'\n", argv[optind]);
|
|
405
|
+
write_squished_dawg (argv[optind++], dawg, max_num_edges, reserved_edges);
|
|
406
|
+
|
|
407
|
+
break;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
case 'v' : {
|
|
411
|
+
debug = 1;
|
|
412
|
+
break;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
case 't' : {
|
|
416
|
+
read_squished_dawg (optarg, dawg, max_num_edges);
|
|
417
|
+
|
|
418
|
+
if (optind < argc)
|
|
419
|
+
check_for_words (dawg, argv[optind++]);
|
|
420
|
+
else
|
|
421
|
+
check_for_words (dawg, NULL);
|
|
422
|
+
|
|
423
|
+
break;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
case 'e' : {
|
|
427
|
+
read_dawg (optarg, dawg, max_num_edges);
|
|
428
|
+
|
|
429
|
+
if (optind < argc)
|
|
430
|
+
check_for_words (dawg, argv[optind++]);
|
|
431
|
+
else
|
|
432
|
+
check_for_words (dawg, NULL);
|
|
433
|
+
|
|
434
|
+
break;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
default : {
|
|
438
|
+
printf ("usage: makedawg -c <old-dawg> <new-dawg>\n");
|
|
439
|
+
printf (" -d <old-dawg> <new-dawg>\n");
|
|
440
|
+
printf (" -n <words> <dawg> \n");
|
|
441
|
+
printf (" -s <old-dawg> <new-dawg>\n");
|
|
442
|
+
printf (" -e <dawg> <words> \n");
|
|
443
|
+
printf (" -t <dawg> <words> \n");
|
|
444
|
+
printf (" -v \n");
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
#endif
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/* -*-C-*-
|
|
2
|
+
********************************************************************************
|
|
3
|
+
*
|
|
4
|
+
* File: makedawg.h
|
|
5
|
+
* Description: Create a Directed Accyclic Word Graph
|
|
6
|
+
* Author: Mark Seaman, SW Productivity
|
|
7
|
+
* Created: Fri Oct 16 14:37:00 1987
|
|
8
|
+
* Modified: Wed Jul 17 17:18:49 1991 (Mark Seaman) marks@hpgrlt
|
|
9
|
+
* Language: C
|
|
10
|
+
* Package: N/A
|
|
11
|
+
* Status: Reusable Software Component
|
|
12
|
+
*
|
|
13
|
+
* (c) Copyright 1987, Hewlett-Packard Company, all rights reserved.
|
|
14
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
15
|
+
** you may not use this file except in compliance with the License.
|
|
16
|
+
** You may obtain a copy of the License at
|
|
17
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
18
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
** See the License for the specific language governing permissions and
|
|
22
|
+
** limitations under the License.
|
|
23
|
+
*
|
|
24
|
+
********************************************************************************
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
#ifndef MAKEDAWG_H
|
|
28
|
+
#define MAKEDAWG_H
|
|
29
|
+
|
|
30
|
+
/*
|
|
31
|
+
----------------------------------------------------------------------
|
|
32
|
+
I n c l u d e s
|
|
33
|
+
----------------------------------------------------------------------
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
#include "general.h"
|
|
37
|
+
#include "dawg.h"
|
|
38
|
+
#include "trie.h"
|
|
39
|
+
|
|
40
|
+
/*
|
|
41
|
+
----------------------------------------------------------------------
|
|
42
|
+
T y p e s
|
|
43
|
+
----------------------------------------------------------------------
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
/*
|
|
47
|
+
----------------------------------------------------------------------
|
|
48
|
+
V a r i a b l e s
|
|
49
|
+
----------------------------------------------------------------------
|
|
50
|
+
*/
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
/*
|
|
54
|
+
----------------------------------------------------------------------
|
|
55
|
+
M a c r o s
|
|
56
|
+
----------------------------------------------------------------------
|
|
57
|
+
*/
|
|
58
|
+
|
|
59
|
+
/*
|
|
60
|
+
----------------------------------------------------------------------
|
|
61
|
+
F u n c t i o n s
|
|
62
|
+
----------------------------------------------------------------------
|
|
63
|
+
*/
|
|
64
|
+
|
|
65
|
+
NODE_MAP build_node_map(EDGE_ARRAY dawg,
|
|
66
|
+
inT32 *num_nodes,
|
|
67
|
+
inT32 both_links,
|
|
68
|
+
inT32 max_num_edges,
|
|
69
|
+
inT32 reserved_edges);
|
|
70
|
+
|
|
71
|
+
void compact_dawg(EDGE_ARRAY dawg,
|
|
72
|
+
inT32 max_num_edges,
|
|
73
|
+
inT32 reserved_edges);
|
|
74
|
+
|
|
75
|
+
void delete_node(EDGE_ARRAY dawg,
|
|
76
|
+
NODE_REF node);
|
|
77
|
+
|
|
78
|
+
void write_squished_dawg(const char *filename,
|
|
79
|
+
EDGE_ARRAY dawg,
|
|
80
|
+
inT32 max_num_edges,
|
|
81
|
+
inT32 reserved_edges);
|
|
82
|
+
|
|
83
|
+
#endif
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/******************************************************************************
|
|
2
|
+
** Filename: matchdefs.h
|
|
3
|
+
** Purpose: Generic interface definitions for feature matchers.
|
|
4
|
+
** Author: Dan Johnson
|
|
5
|
+
** History: Fri Jan 19 09:21:25 1990, DSJ, Created.
|
|
6
|
+
**
|
|
7
|
+
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
******************************************************************************/
|
|
18
|
+
#ifndef MATCHDEFS_H
|
|
19
|
+
#define MATCHDEFS_H
|
|
20
|
+
|
|
21
|
+
/**----------------------------------------------------------------------------
|
|
22
|
+
Include Files and Type Defines
|
|
23
|
+
----------------------------------------------------------------------------**/
|
|
24
|
+
#include "general.h"
|
|
25
|
+
#include <stdio.h>
|
|
26
|
+
#include "unichar.h"
|
|
27
|
+
|
|
28
|
+
/* define the maximum number of classes defined for any matcher
|
|
29
|
+
and the maximum class id for any matcher. This must be changed
|
|
30
|
+
if more different classes need to be classified */
|
|
31
|
+
#define MAX_NUM_CLASSES 8192
|
|
32
|
+
#define MAX_CLASS_ID (MAX_NUM_CLASSES - 1)
|
|
33
|
+
|
|
34
|
+
/* a CLASS_ID is the ascii character to be associated with a class */
|
|
35
|
+
typedef UNICHAR_ID CLASS_ID;
|
|
36
|
+
#define NO_CLASS (0)
|
|
37
|
+
|
|
38
|
+
/* define a type for the index (rather than the class id) of a class.
|
|
39
|
+
Class indexes are sequentially defined, while class id's are defined
|
|
40
|
+
by the ascii character set. */
|
|
41
|
+
typedef inT16 CLASS_INDEX;
|
|
42
|
+
typedef CLASS_INDEX CLASS_TO_INDEX[MAX_CLASS_ID + 1];
|
|
43
|
+
typedef CLASS_ID INDEX_TO_CLASS[MAX_NUM_CLASSES];
|
|
44
|
+
#define ILLEGAL_CLASS (-1)
|
|
45
|
+
|
|
46
|
+
/* a PROTO_ID is the index of a prototype within it's class. Valid proto
|
|
47
|
+
id's are 0 to N-1 where N is the number of prototypes that make up the
|
|
48
|
+
class. */
|
|
49
|
+
typedef inT16 PROTO_ID;
|
|
50
|
+
#define NO_PROTO (-1)
|
|
51
|
+
|
|
52
|
+
/* FEATURE_ID is the index of a feature within a character description
|
|
53
|
+
The feature id ranges from 0 to N-1 where N is the number
|
|
54
|
+
of features in a character description. */
|
|
55
|
+
typedef uinT8 FEATURE_ID;
|
|
56
|
+
#define NO_FEATURE 255
|
|
57
|
+
#define NOISE_FEATURE 254
|
|
58
|
+
#define MISSING_PROTO 254
|
|
59
|
+
#define MAX_NUM_FEAT 40
|
|
60
|
+
#define MAX_FEATURE_ID 250
|
|
61
|
+
|
|
62
|
+
/* a RATING is the match rating returned by a classifier.
|
|
63
|
+
Higher is better. */
|
|
64
|
+
typedef FLOAT32 RATING;
|
|
65
|
+
|
|
66
|
+
/* a CERTAINTY is an indication of the degree of confidence of the
|
|
67
|
+
classifier. Higher is better. 0 means the match is as good as the
|
|
68
|
+
mean of the matches seen in training. -1 means the match was one
|
|
69
|
+
standard deviation worse than the training matches, etc. */
|
|
70
|
+
typedef FLOAT32 CERTAINTY;
|
|
71
|
+
|
|
72
|
+
/* define a data structure to hold a single match result */
|
|
73
|
+
typedef struct
|
|
74
|
+
{
|
|
75
|
+
CLASS_ID Class;
|
|
76
|
+
RATING Rating;
|
|
77
|
+
CERTAINTY Certainty;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
MATCH_RESULT;
|
|
82
|
+
|
|
83
|
+
/* define a data structure for holding an array of match results */
|
|
84
|
+
typedef MATCH_RESULT SORTED_CLASSES[MAX_CLASS_ID + 1];
|
|
85
|
+
|
|
86
|
+
/**----------------------------------------------------------------------------
|
|
87
|
+
Public Function Prototypes
|
|
88
|
+
----------------------------------------------------------------------------**/
|
|
89
|
+
/* all feature matchers that are to be used with the high level
|
|
90
|
+
classifier must support the following interface. The names will, of
|
|
91
|
+
course, be unique for each different matcher. Note also that
|
|
92
|
+
FEATURE_STRUCT is a data structure that is defined specifically for
|
|
93
|
+
each feature extractor/matcher pair.
|
|
94
|
+
|
|
95
|
+
void InitClassifier ();
|
|
96
|
+
|
|
97
|
+
void InitClassifierVars ();
|
|
98
|
+
|
|
99
|
+
int TweekClassifier (char *Params);
|
|
100
|
+
|
|
101
|
+
void InitQuickGuess (FEATURE_STRUCT *CharFeatures);
|
|
102
|
+
|
|
103
|
+
CLASS_ID NextQuickGuess ();
|
|
104
|
+
|
|
105
|
+
void MatchCharToClass (CLASS_ID
|
|
106
|
+
ClassID,
|
|
107
|
+
FEATURE_STRUCT
|
|
108
|
+
*CharFeatures,
|
|
109
|
+
MATCH_RESULT
|
|
110
|
+
*MatchResult);
|
|
111
|
+
|
|
112
|
+
void DebugMatch (CLASS_ID
|
|
113
|
+
ClassID,
|
|
114
|
+
FEATURE_STRUCT
|
|
115
|
+
*CharFeatures,
|
|
116
|
+
MATCH_RESULT
|
|
117
|
+
*MatchResult);
|
|
118
|
+
|
|
119
|
+
*/
|
|
120
|
+
|
|
121
|
+
/* misc test functions for proto id's and feature id's */
|
|
122
|
+
#define IsValidFeature(Fid) ((Fid) < MAX_FEATURE_ID)
|
|
123
|
+
#define IsValidProto(Pid) ((Pid) >= 0)
|
|
124
|
+
|
|
125
|
+
#if defined(__STDC__) || defined(__cplusplus)
|
|
126
|
+
# define _ARGS(s) s
|
|
127
|
+
#else
|
|
128
|
+
# define _ARGS(s) ()
|
|
129
|
+
#endif
|
|
130
|
+
|
|
131
|
+
/* matchdefs.c */
|
|
132
|
+
int CompareMatchResults
|
|
133
|
+
_ARGS ((MATCH_RESULT * Result1, MATCH_RESULT * Result2));
|
|
134
|
+
|
|
135
|
+
void PrintMatchResult _ARGS ((FILE * File, MATCH_RESULT * MatchResult));
|
|
136
|
+
|
|
137
|
+
void PrintMatchResults
|
|
138
|
+
_ARGS ((FILE * File, int N, MATCH_RESULT MatchResults[]));
|
|
139
|
+
|
|
140
|
+
#undef _ARGS
|
|
141
|
+
|
|
142
|
+
/**----------------------------------------------------------------------------
|
|
143
|
+
Global Data Definitions and Declarations
|
|
144
|
+
----------------------------------------------------------------------------**/
|
|
145
|
+
#endif
|