tesseract_bin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
/* -*-C-*-
|
|
2
|
+
********************************************************************************
|
|
3
|
+
*
|
|
4
|
+
* File: dawg.h (Formerly dawg.h)
|
|
5
|
+
* Description:
|
|
6
|
+
* Author: Mark Seaman, SW Productivity
|
|
7
|
+
* Created: Fri Oct 16 14:37:00 1987
|
|
8
|
+
* Modified: Wed Jun 19 16:50:24 1991 (Mark Seaman) marks@hpgrlt
|
|
9
|
+
* Language: C
|
|
10
|
+
* Package: N/A
|
|
11
|
+
* Status: Reusable Software Component
|
|
12
|
+
*
|
|
13
|
+
* (c) Copyright 1987, Hewlett-Packard Company.
|
|
14
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
15
|
+
** you may not use this file except in compliance with the License.
|
|
16
|
+
** You may obtain a copy of the License at
|
|
17
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
18
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
** See the License for the specific language governing permissions and
|
|
22
|
+
** limitations under the License.
|
|
23
|
+
*
|
|
24
|
+
*********************************************************************************/
|
|
25
|
+
|
|
26
|
+
#ifndef DAWG_H
|
|
27
|
+
#define DAWG_H
|
|
28
|
+
|
|
29
|
+
/*----------------------------------------------------------------------
|
|
30
|
+
I n c l u d e s
|
|
31
|
+
----------------------------------------------------------------------*/
|
|
32
|
+
#include <ctype.h>
|
|
33
|
+
#include "general.h"
|
|
34
|
+
|
|
35
|
+
/*----------------------------------------------------------------------
|
|
36
|
+
T y p e s
|
|
37
|
+
----------------------------------------------------------------------*/
|
|
38
|
+
/* #define MAX_WERD_LENGTH (inT32) 40 */
|
|
39
|
+
/* #define MAX_NODE_EDGES_DISPLAY (inT32) 100 */
|
|
40
|
+
/* #define LAST_FLAG (inT32) 1 */
|
|
41
|
+
/* #define DIRECTION_FLAG (inT32) 2 */
|
|
42
|
+
/* #define WERD_END_FLAG (inT32) 4 */
|
|
43
|
+
|
|
44
|
+
/* #define LETTER_START_BIT 0 */
|
|
45
|
+
/* #define FLAG_START_BIT 8 */
|
|
46
|
+
/* #define NEXT_EDGE_START_BIT 11 */
|
|
47
|
+
|
|
48
|
+
/* #define NO_EDGE (inT32) 0x001fffff */
|
|
49
|
+
|
|
50
|
+
/* #define NEXT_EDGE_MASK (inT32) 0xfffff800 */
|
|
51
|
+
/* #define FLAGS_MASK (inT32) 0x00000700 */
|
|
52
|
+
/* #define LETTER_MASK (inT32) 0x000000ff */
|
|
53
|
+
|
|
54
|
+
/* #define REFFORMAT "%d" */
|
|
55
|
+
|
|
56
|
+
/* typedef uinT32 EDGE_RECORD; */
|
|
57
|
+
/* typedef EDGE_RECORD *EDGE_ARRAY; */
|
|
58
|
+
/* typedef inT32 EDGE_REF; */
|
|
59
|
+
/* typedef inT32 NODE_REF; */
|
|
60
|
+
|
|
61
|
+
#define MAX_WERD_LENGTH (inT64) 40
|
|
62
|
+
#define MAX_NODE_EDGES_DISPLAY (inT64) 100
|
|
63
|
+
#define LAST_FLAG (inT64) 1
|
|
64
|
+
#define DIRECTION_FLAG (inT64) 2
|
|
65
|
+
#define WERD_END_FLAG (inT64) 4
|
|
66
|
+
|
|
67
|
+
#define LETTER_START_BIT 0
|
|
68
|
+
#define FLAG_START_BIT 8
|
|
69
|
+
#define NEXT_EDGE_START_BIT 11
|
|
70
|
+
|
|
71
|
+
#ifdef __MSW32__
|
|
72
|
+
#define NO_EDGE (inT64) 0x001fffffffffffffi64
|
|
73
|
+
#define NEXT_EDGE_MASK (inT64) 0xfffffffffffff800i64
|
|
74
|
+
#define FLAGS_MASK (inT64) 0x0000000000000700i64
|
|
75
|
+
#define LETTER_MASK (inT64) 0x00000000000000ffi64
|
|
76
|
+
#else
|
|
77
|
+
#define NO_EDGE (inT64) 0x001fffffffffffffll
|
|
78
|
+
#define NEXT_EDGE_MASK (inT64) 0xfffffffffffff800ll
|
|
79
|
+
#define FLAGS_MASK (inT64) 0x0000000000000700ll
|
|
80
|
+
#define LETTER_MASK (inT64) 0x00000000000000ffll
|
|
81
|
+
#endif
|
|
82
|
+
|
|
83
|
+
#define MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE 3000000
|
|
84
|
+
|
|
85
|
+
#define REFFORMAT "%lld"
|
|
86
|
+
|
|
87
|
+
typedef uinT64 EDGE_RECORD;
|
|
88
|
+
typedef EDGE_RECORD *EDGE_ARRAY;
|
|
89
|
+
typedef inT64 EDGE_REF;
|
|
90
|
+
typedef inT64 NODE_REF;
|
|
91
|
+
|
|
92
|
+
/*---------------------------------------------------------------------
|
|
93
|
+
V a r i a b l e s
|
|
94
|
+
----------------------------------------------------------------------*/
|
|
95
|
+
extern inT32 case_sensative;
|
|
96
|
+
extern inT32 debug;
|
|
97
|
+
|
|
98
|
+
/*----------------------------------------------------------------------
|
|
99
|
+
M a c r o s
|
|
100
|
+
----------------------------------------------------------------------*/
|
|
101
|
+
/**********************************************************************
|
|
102
|
+
* edge_of
|
|
103
|
+
*
|
|
104
|
+
* Access the edge that is indexed by the requested edge number.
|
|
105
|
+
**********************************************************************/
|
|
106
|
+
|
|
107
|
+
#define edge_of(edges,e) \
|
|
108
|
+
((edges)[e])
|
|
109
|
+
|
|
110
|
+
/**********************************************************************
|
|
111
|
+
* print_edge
|
|
112
|
+
*
|
|
113
|
+
* Print the contents of a single edge entry in the DAWG.
|
|
114
|
+
**********************************************************************/
|
|
115
|
+
|
|
116
|
+
#define print_edge(dawg,edge) \
|
|
117
|
+
printf ("%7d : next = %7d, char = '%c', %s %s %s\n", \
|
|
118
|
+
edge, next_node (dawg, edge), edge_letter (dawg, edge), \
|
|
119
|
+
(forward_edge (dawg, edge) ? "FORWARD" : " "), \
|
|
120
|
+
(last_edge (dawg, edge) ? "LAST" : " "), \
|
|
121
|
+
(end_of_word (dawg, edge) ? "EOW" : "")) \
|
|
122
|
+
|
|
123
|
+
/**********************************************************************
|
|
124
|
+
* next_node
|
|
125
|
+
*
|
|
126
|
+
* The next node visited in the DAWG by following this edge.
|
|
127
|
+
**********************************************************************/
|
|
128
|
+
|
|
129
|
+
#define next_node(edges,e) \
|
|
130
|
+
(((edges)[e] & NEXT_EDGE_MASK) >> NEXT_EDGE_START_BIT)
|
|
131
|
+
|
|
132
|
+
/**********************************************************************
|
|
133
|
+
* set_next_edge
|
|
134
|
+
*
|
|
135
|
+
* Set the next node link for this edge in the DAWG.
|
|
136
|
+
**********************************************************************/
|
|
137
|
+
|
|
138
|
+
#define set_next_edge(edges,e,value) \
|
|
139
|
+
((edges)[e] = ((edges)[e] & (~NEXT_EDGE_MASK)) |\
|
|
140
|
+
((value << NEXT_EDGE_START_BIT) & NEXT_EDGE_MASK))
|
|
141
|
+
|
|
142
|
+
/**********************************************************************
|
|
143
|
+
* empty_edge_spot
|
|
144
|
+
*
|
|
145
|
+
* Return TRUE if this edge spot in this location is unoccupied.
|
|
146
|
+
**********************************************************************/
|
|
147
|
+
|
|
148
|
+
#define empty_edge_spot(edges,e) \
|
|
149
|
+
((edges)[e] == NEXT_EDGE_MASK)
|
|
150
|
+
|
|
151
|
+
/**********************************************************************
|
|
152
|
+
* set_empty_edge
|
|
153
|
+
*
|
|
154
|
+
* Return TRUE if this edge spot in this location is unoccupied.
|
|
155
|
+
**********************************************************************/
|
|
156
|
+
|
|
157
|
+
#define set_empty_edge(edges,e) \
|
|
158
|
+
((edges)[e] = NEXT_EDGE_MASK)
|
|
159
|
+
|
|
160
|
+
/**********************************************************************
|
|
161
|
+
* clear_all_edges
|
|
162
|
+
*
|
|
163
|
+
* Go through all the edges in the DAWG and clear out each one.
|
|
164
|
+
**********************************************************************/
|
|
165
|
+
|
|
166
|
+
#define clear_all_edges(dawg,edge,max_num_edges) \
|
|
167
|
+
for (edge=0; edge<max_num_edges; edge++) \
|
|
168
|
+
set_empty_edge (dawg, edge);
|
|
169
|
+
|
|
170
|
+
/**********************************************************************
|
|
171
|
+
* edge_occupied
|
|
172
|
+
*
|
|
173
|
+
* Return TRUE if this edge spot in this location is occupied.
|
|
174
|
+
**********************************************************************/
|
|
175
|
+
|
|
176
|
+
#define edge_occupied(edges,e) \
|
|
177
|
+
((edges)[e] != NEXT_EDGE_MASK)
|
|
178
|
+
|
|
179
|
+
/**********************************************************************
|
|
180
|
+
* edge_flags
|
|
181
|
+
*
|
|
182
|
+
* The letter choice that corresponds to this edge in the DAWG.
|
|
183
|
+
**********************************************************************/
|
|
184
|
+
|
|
185
|
+
#define edge_flags(edges,e) \
|
|
186
|
+
(((edges)[e] & FLAGS_MASK) >> FLAG_START_BIT)
|
|
187
|
+
|
|
188
|
+
/**********************************************************************
|
|
189
|
+
* edge_letter
|
|
190
|
+
*
|
|
191
|
+
* The letter choice that corresponds to this edge in the DAWG.
|
|
192
|
+
**********************************************************************/
|
|
193
|
+
|
|
194
|
+
#define edge_letter(edges,e) \
|
|
195
|
+
(static_cast<int>(((edges)[e] & LETTER_MASK) >> LETTER_START_BIT))
|
|
196
|
+
|
|
197
|
+
/**********************************************************************
|
|
198
|
+
* letter_of_edge
|
|
199
|
+
*
|
|
200
|
+
* The letter choice that corresponds to this edge in the DAWG.
|
|
201
|
+
**********************************************************************/
|
|
202
|
+
|
|
203
|
+
#define letter_of_edge(edge) \
|
|
204
|
+
(static_cast<int>((edge & LETTER_MASK) >> LETTER_START_BIT))
|
|
205
|
+
|
|
206
|
+
/**********************************************************************
|
|
207
|
+
* last_edge
|
|
208
|
+
*
|
|
209
|
+
* Return TRUE if this edge is the last edge in the sequence. This is
|
|
210
|
+
* TRUE for the last one in both the forward and backward part.
|
|
211
|
+
**********************************************************************/
|
|
212
|
+
|
|
213
|
+
#define last_edge(edges,e) \
|
|
214
|
+
((edges)[e] & (LAST_FLAG << FLAG_START_BIT))
|
|
215
|
+
|
|
216
|
+
/**********************************************************************
|
|
217
|
+
* end_of_word
|
|
218
|
+
*
|
|
219
|
+
* Return TRUE if this edge marks the end of a word.
|
|
220
|
+
**********************************************************************/
|
|
221
|
+
|
|
222
|
+
#define end_of_word(edges,e) \
|
|
223
|
+
((edges)[e] & (WERD_END_FLAG << FLAG_START_BIT))
|
|
224
|
+
|
|
225
|
+
/**********************************************************************
|
|
226
|
+
* forward_edge
|
|
227
|
+
*
|
|
228
|
+
* Return TRUE if this edge is in the forward direction.
|
|
229
|
+
**********************************************************************/
|
|
230
|
+
|
|
231
|
+
#define forward_edge(edges,e) \
|
|
232
|
+
((edges)[e] & (DIRECTION_FLAG << FLAG_START_BIT) && \
|
|
233
|
+
edge_occupied (edges,e))
|
|
234
|
+
|
|
235
|
+
/**********************************************************************
|
|
236
|
+
* backward_edge
|
|
237
|
+
*
|
|
238
|
+
* Return TRUE if this edge is in the backward direction.
|
|
239
|
+
**********************************************************************/
|
|
240
|
+
|
|
241
|
+
#define backward_edge(edges,e) \
|
|
242
|
+
(! ((edges)[e] & (DIRECTION_FLAG << FLAG_START_BIT)) && \
|
|
243
|
+
edge_occupied (edges,e))
|
|
244
|
+
|
|
245
|
+
/**********************************************************************
|
|
246
|
+
* edge_loop
|
|
247
|
+
*
|
|
248
|
+
* Loop for each of the edges in the forward direction. This macro
|
|
249
|
+
* can be used in the following way:
|
|
250
|
+
*********************************************************************/
|
|
251
|
+
|
|
252
|
+
#define edge_loop(edges,e) \
|
|
253
|
+
while (! last_edge (edges,e++))
|
|
254
|
+
|
|
255
|
+
/**********************************************************************
|
|
256
|
+
* case_is_okay
|
|
257
|
+
*
|
|
258
|
+
* Check the case of this character in the character string to make
|
|
259
|
+
* sure that there is not a problem with the case.
|
|
260
|
+
**********************************************************************/
|
|
261
|
+
// TODO(tkielbus) Replace islalpha, islower & isupper by unicode versions.
|
|
262
|
+
// However the lengths information is not available at this point in the
|
|
263
|
+
// code. We will probably get rid of the dictionaries at some point anyway.
|
|
264
|
+
#define case_is_okay(word,i) \
|
|
265
|
+
(i ? \
|
|
266
|
+
((isupper(word[i]) && islower(word[i-1])) ? \
|
|
267
|
+
FALSE : \
|
|
268
|
+
((islower(word[i]) && isupper(word[i-1]) && \
|
|
269
|
+
i>1 && isalpha (word[i-2])) ? \
|
|
270
|
+
FALSE : \
|
|
271
|
+
TRUE)) : \
|
|
272
|
+
TRUE)
|
|
273
|
+
|
|
274
|
+
/**********************************************************************
|
|
275
|
+
* trailing_punc
|
|
276
|
+
*
|
|
277
|
+
* Check for leading punctuation.
|
|
278
|
+
**********************************************************************/
|
|
279
|
+
|
|
280
|
+
#define trailing_punc(ch) \
|
|
281
|
+
((ch == '}' ) || \
|
|
282
|
+
(ch == ':' ) || \
|
|
283
|
+
(ch == ';' ) || \
|
|
284
|
+
(ch == '-' ) || \
|
|
285
|
+
(ch == ']' ) || \
|
|
286
|
+
(ch == '!' ) || \
|
|
287
|
+
(ch == '?' ) || \
|
|
288
|
+
(ch == '`' ) || \
|
|
289
|
+
(ch == ',' ) || \
|
|
290
|
+
(ch == '.' ) || \
|
|
291
|
+
(ch == ')' ) || \
|
|
292
|
+
(ch == '\"' ) || \
|
|
293
|
+
(ch == '\'' ))
|
|
294
|
+
|
|
295
|
+
/**********************************************************************
|
|
296
|
+
* leading_punc
|
|
297
|
+
*
|
|
298
|
+
* Check for leading punctuation.
|
|
299
|
+
**********************************************************************/
|
|
300
|
+
|
|
301
|
+
#define leading_punc(ch) \
|
|
302
|
+
((ch == '\"' ) || \
|
|
303
|
+
(ch == '(' ) || \
|
|
304
|
+
(ch == '{' ) || \
|
|
305
|
+
(ch == '[' ) || \
|
|
306
|
+
(ch == '`' ) || \
|
|
307
|
+
(ch == '\'' ))
|
|
308
|
+
|
|
309
|
+
/*----------------------------------------------------------------------
|
|
310
|
+
F u n c t i o n s
|
|
311
|
+
----------------------------------------------------------------------*/
|
|
312
|
+
EDGE_REF edge_char_of(EDGE_ARRAY dawg,
|
|
313
|
+
NODE_REF node,
|
|
314
|
+
int character,
|
|
315
|
+
int word_end);
|
|
316
|
+
|
|
317
|
+
inT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node);
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
inT32 def_letter_is_okay(EDGE_ARRAY dawg,
|
|
321
|
+
NODE_REF *node,
|
|
322
|
+
inT32 char_index,
|
|
323
|
+
char prevchar,
|
|
324
|
+
const char *word,
|
|
325
|
+
inT32 word_end);
|
|
326
|
+
|
|
327
|
+
/*
|
|
328
|
+
* Allow for externally provided letter_is_okay.
|
|
329
|
+
*/
|
|
330
|
+
typedef inT32 (*LETTER_OK_FUNC)(EDGE_ARRAY, NODE_REF*, inT32, char, const char*,
|
|
331
|
+
inT32);
|
|
332
|
+
extern LETTER_OK_FUNC letter_is_okay;
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
inT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node);
|
|
336
|
+
|
|
337
|
+
void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node);
|
|
338
|
+
|
|
339
|
+
EDGE_ARRAY read_squished_dawg(const char *filename);
|
|
340
|
+
|
|
341
|
+
inT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, inT32 char_index);
|
|
342
|
+
|
|
343
|
+
inT32 word_in_dawg(EDGE_ARRAY dawg, const char *string);
|
|
344
|
+
|
|
345
|
+
/*
|
|
346
|
+
#if defined(__STDC__) || defined(__cplusplus) || MAC_OR_DOS
|
|
347
|
+
# define _ARGS(s) s
|
|
348
|
+
#else
|
|
349
|
+
# define _ARGS(s) ()
|
|
350
|
+
#endif*/
|
|
351
|
+
|
|
352
|
+
/* dawg.c
|
|
353
|
+
EDGE_REF edge_char_of
|
|
354
|
+
_ARGS((EDGE_ARRAY dawg,
|
|
355
|
+
NODE_REF node,
|
|
356
|
+
int character,
|
|
357
|
+
int word_end));
|
|
358
|
+
|
|
359
|
+
inT32 edges_in_node
|
|
360
|
+
_ARGS((EDGE_ARRAY dawg,
|
|
361
|
+
NODE_REF node));
|
|
362
|
+
|
|
363
|
+
inT32 def_letter_is_okay
|
|
364
|
+
_ARGS((EDGE_ARRAY dawg,
|
|
365
|
+
NODE_REF *node,
|
|
366
|
+
inT32 char_index,
|
|
367
|
+
char *word,
|
|
368
|
+
inT32 word_end));
|
|
369
|
+
|
|
370
|
+
inT32 num_forward_edges
|
|
371
|
+
_ARGS((EDGE_ARRAY dawg,
|
|
372
|
+
NODE_REF node));
|
|
373
|
+
|
|
374
|
+
void print_dawg_node
|
|
375
|
+
_ARGS((EDGE_ARRAY dawg,
|
|
376
|
+
NODE_REF node));
|
|
377
|
+
|
|
378
|
+
void read_squished_dawg
|
|
379
|
+
_ARGS((char *filename,
|
|
380
|
+
EDGE_ARRAY dawg,
|
|
381
|
+
inT32 max_num_edges));
|
|
382
|
+
|
|
383
|
+
inT32 verify_trailing_punct
|
|
384
|
+
_ARGS((EDGE_ARRAY dawg,
|
|
385
|
+
char *word,
|
|
386
|
+
inT32 char_index));
|
|
387
|
+
|
|
388
|
+
inT32 word_in_dawg
|
|
389
|
+
_ARGS((EDGE_ARRAY dawg,
|
|
390
|
+
char *string));
|
|
391
|
+
|
|
392
|
+
#undef _ARGS
|
|
393
|
+
*/
|
|
394
|
+
#endif
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/* -*-C-*-
|
|
2
|
+
********************************************************************************
|
|
3
|
+
*
|
|
4
|
+
* File: hyphen.c (Formerly hyphen.c)
|
|
5
|
+
* Description:
|
|
6
|
+
* Author: Mark Seaman, OCR Technology
|
|
7
|
+
* Created: Fri Oct 16 14:37:00 1987
|
|
8
|
+
* Modified: Thu Mar 14 11:09:43 1991 (Mark Seaman) marks@hpgrlt
|
|
9
|
+
* Language: C
|
|
10
|
+
* Package: N/A
|
|
11
|
+
* Status: Reusable Software Component
|
|
12
|
+
*
|
|
13
|
+
* (c) Copyright 1987, Hewlett-Packard Company.
|
|
14
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
15
|
+
** you may not use this file except in compliance with the License.
|
|
16
|
+
** You may obtain a copy of the License at
|
|
17
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
18
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
** See the License for the specific language governing permissions and
|
|
22
|
+
** limitations under the License.
|
|
23
|
+
*
|
|
24
|
+
*********************************************************************************/
|
|
25
|
+
/*----------------------------------------------------------------------
|
|
26
|
+
I n c l u d e s
|
|
27
|
+
----------------------------------------------------------------------*/
|
|
28
|
+
#include "const.h"
|
|
29
|
+
#include "hyphen.h"
|
|
30
|
+
#include "tordvars.h"
|
|
31
|
+
#include "callcpp.h"
|
|
32
|
+
#include <math.h>
|
|
33
|
+
|
|
34
|
+
/*----------------------------------------------------------------------
|
|
35
|
+
V a r i a b l e s
|
|
36
|
+
----------------------------------------------------------------------*/
|
|
37
|
+
int last_word_on_line = 0;
|
|
38
|
+
char *hyphen_string = 0;
|
|
39
|
+
char *hyphen_unichar_lengths = 0;
|
|
40
|
+
int *hyphen_unichar_offsets = NULL;
|
|
41
|
+
float hyphen_rating = MAXFLOAT;
|
|
42
|
+
NODE_REF hyphen_state = 0;
|
|
43
|
+
|
|
44
|
+
/*----------------------------------------------------------------------
|
|
45
|
+
F u n c t i o n s
|
|
46
|
+
---------------------------------------------------------------------*/
|
|
47
|
+
/**********************************************************************
|
|
48
|
+
* set_hyphen_word
|
|
49
|
+
*
|
|
50
|
+
* If this hyphenated word choice is better than the last one then add
|
|
51
|
+
* it as the new word choice. This string can be used on the next
|
|
52
|
+
* line to permute the other half of the word.
|
|
53
|
+
**********************************************************************/
|
|
54
|
+
void set_hyphen_word(char *word, char *unichar_lengths, int *unichar_offsets,
|
|
55
|
+
float rating, NODE_REF state) {
|
|
56
|
+
int char_index = strlen (unichar_lengths) - 1;
|
|
57
|
+
|
|
58
|
+
if (display_ratings)
|
|
59
|
+
cprintf ("set hyphen word = %s\n", word);
|
|
60
|
+
|
|
61
|
+
if (hyphen_rating > rating && char_index > 0) {
|
|
62
|
+
word[unichar_offsets[char_index]] = '\0';
|
|
63
|
+
unichar_lengths[char_index] = 0;
|
|
64
|
+
|
|
65
|
+
if (hyphen_string)
|
|
66
|
+
{
|
|
67
|
+
strfree(hyphen_string);
|
|
68
|
+
strfree(hyphen_unichar_lengths);
|
|
69
|
+
Efree(hyphen_unichar_offsets);
|
|
70
|
+
}
|
|
71
|
+
hyphen_string = strsave (word);
|
|
72
|
+
hyphen_unichar_lengths = strsave (unichar_lengths);
|
|
73
|
+
hyphen_unichar_offsets = (int *)
|
|
74
|
+
Emalloc((strlen(unichar_lengths)) * sizeof (int));
|
|
75
|
+
memcpy(hyphen_unichar_offsets, unichar_offsets,
|
|
76
|
+
(strlen(unichar_lengths)) * sizeof (int));
|
|
77
|
+
|
|
78
|
+
hyphen_state = state;
|
|
79
|
+
hyphen_rating = rating;
|
|
80
|
+
|
|
81
|
+
word[unichar_offsets[char_index]] = '-';
|
|
82
|
+
unichar_lengths[char_index] = 1;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/* -*-C-*-
|
|
2
|
+
********************************************************************************
|
|
3
|
+
*
|
|
4
|
+
* File: hyphen.h (Formerly hyphen.h)
|
|
5
|
+
* Description:
|
|
6
|
+
* Author: Mark Seaman, SW Productivity
|
|
7
|
+
* Created: Fri Oct 16 14:37:00 1987
|
|
8
|
+
* Modified: Mon Jan 14 17:52:50 1991 (Mark Seaman) marks@hpgrlt
|
|
9
|
+
* Language: C
|
|
10
|
+
* Package: N/A
|
|
11
|
+
* Status: Reusable Software Component
|
|
12
|
+
*
|
|
13
|
+
* (c) Copyright 1987, Hewlett-Packard Company.
|
|
14
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
15
|
+
** you may not use this file except in compliance with the License.
|
|
16
|
+
** You may obtain a copy of the License at
|
|
17
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
18
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
** See the License for the specific language governing permissions and
|
|
22
|
+
** limitations under the License.
|
|
23
|
+
*
|
|
24
|
+
*********************************************************************************/
|
|
25
|
+
#ifndef HYPHEN_H
|
|
26
|
+
#define HYPHEN_H
|
|
27
|
+
|
|
28
|
+
/*----------------------------------------------------------------------
|
|
29
|
+
I n c l u d e s
|
|
30
|
+
----------------------------------------------------------------------*/
|
|
31
|
+
#include "choices.h"
|
|
32
|
+
#include "emalloc.h"
|
|
33
|
+
#include "dawg.h"
|
|
34
|
+
|
|
35
|
+
/*----------------------------------------------------------------------
|
|
36
|
+
V a r i a b l e s
|
|
37
|
+
----------------------------------------------------------------------*/
|
|
38
|
+
extern int last_word_on_line;
|
|
39
|
+
extern char *hyphen_string;
|
|
40
|
+
extern char *hyphen_unichar_lengths;
|
|
41
|
+
extern int *hyphen_unichar_offsets;
|
|
42
|
+
extern float hyphen_rating;
|
|
43
|
+
extern NODE_REF hyphen_state;
|
|
44
|
+
|
|
45
|
+
/*----------------------------------------------------------------------
|
|
46
|
+
M a c r o s
|
|
47
|
+
----------------------------------------------------------------------*/
|
|
48
|
+
/**********************************************************************
|
|
49
|
+
* set_last_word
|
|
50
|
+
*
|
|
51
|
+
* Set the flag that indicated that this is the last word on a line.
|
|
52
|
+
**********************************************************************/
|
|
53
|
+
|
|
54
|
+
#define set_last_word() \
|
|
55
|
+
last_word_on_line = TRUE
|
|
56
|
+
|
|
57
|
+
/**********************************************************************
|
|
58
|
+
* reset_hyphen_word
|
|
59
|
+
*
|
|
60
|
+
* Erase the hyphenation word that my have been stored at this location.
|
|
61
|
+
**********************************************************************/
|
|
62
|
+
|
|
63
|
+
#define reset_hyphen_word() \
|
|
64
|
+
if (last_word_on_line == FALSE) { \
|
|
65
|
+
if (hyphen_string) strfree (hyphen_string); \
|
|
66
|
+
if (hyphen_unichar_lengths) strfree (hyphen_unichar_lengths); \
|
|
67
|
+
if (hyphen_unichar_offsets) Efree (hyphen_unichar_offsets); \
|
|
68
|
+
hyphen_string = NULL; \
|
|
69
|
+
hyphen_unichar_lengths = NULL; \
|
|
70
|
+
hyphen_unichar_offsets = NULL; \
|
|
71
|
+
hyphen_rating = MAX_FLOAT32; \
|
|
72
|
+
hyphen_state = 0; \
|
|
73
|
+
} \
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
/**********************************************************************
|
|
77
|
+
* reset_last_word
|
|
78
|
+
*
|
|
79
|
+
* Reset the flag that indicated that this is the last word on a line.
|
|
80
|
+
**********************************************************************/
|
|
81
|
+
|
|
82
|
+
#define reset_last_word() \
|
|
83
|
+
last_word_on_line = FALSE
|
|
84
|
+
|
|
85
|
+
/**********************************************************************
|
|
86
|
+
* is_last_word
|
|
87
|
+
*
|
|
88
|
+
* Test the flag that indicated that this is the last word on a line.
|
|
89
|
+
**********************************************************************/
|
|
90
|
+
|
|
91
|
+
#define is_last_word() \
|
|
92
|
+
(last_word_on_line)
|
|
93
|
+
|
|
94
|
+
/**********************************************************************
|
|
95
|
+
* hyphen_base_size
|
|
96
|
+
*
|
|
97
|
+
* Size of the base word (the part on the line before) of a hyphenated
|
|
98
|
+
* coumpound word.
|
|
99
|
+
**********************************************************************/
|
|
100
|
+
|
|
101
|
+
#define hyphen_base_size() \
|
|
102
|
+
((! is_last_word () && hyphen_string) ? \
|
|
103
|
+
(strlen (hyphen_unichar_lengths)) : \
|
|
104
|
+
(0)) \
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
/**********************************************************************
|
|
108
|
+
* hyphen_tail
|
|
109
|
+
*
|
|
110
|
+
* Return the a pointer to the part of the word that was not on the
|
|
111
|
+
* previous line. This routine is used for words that were split
|
|
112
|
+
* between lines and hyphenated.
|
|
113
|
+
**********************************************************************/
|
|
114
|
+
|
|
115
|
+
#define hyphen_tail(word) \
|
|
116
|
+
(&word[hyphen_base_size() > 0 ? \
|
|
117
|
+
(hyphen_unichar_offsets[hyphen_base_size() - 1] + \
|
|
118
|
+
hyphen_unichar_lengths[hyphen_base_size() - 1]) : 0]) \
|
|
119
|
+
|
|
120
|
+
/*----------------------------------------------------------------------
|
|
121
|
+
Public Function Prototypes
|
|
122
|
+
----------------------------------------------------------------------*/
|
|
123
|
+
void set_hyphen_word(char *word, char *unichar_lengths, int *unichar_offsets,
|
|
124
|
+
float rating, NODE_REF state);
|
|
125
|
+
#endif
|