tesseract_bin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
#ifndef TESSCLAS_H
|
|
2
|
+
#define TESSCLAS_H 1
|
|
3
|
+
|
|
4
|
+
#define SPLINESIZE 23 /*max spline parts to a line */
|
|
5
|
+
|
|
6
|
+
#define TBLOBFLAGS 4 /*No of flags in a blob */
|
|
7
|
+
#define MAX_WO_CLASSES 3
|
|
8
|
+
#define EDGEPTFLAGS 4 /*concavity,length etc. */
|
|
9
|
+
|
|
10
|
+
typedef struct
|
|
11
|
+
{
|
|
12
|
+
double a; /*x squared */
|
|
13
|
+
double b; /*x */
|
|
14
|
+
double c; /*constant */
|
|
15
|
+
} QUAD_SPEC; /*definiton of quadratic */
|
|
16
|
+
|
|
17
|
+
typedef struct
|
|
18
|
+
{
|
|
19
|
+
int segments; /*no of spline segments */
|
|
20
|
+
int xstarts[SPLINESIZE]; /*start x coords */
|
|
21
|
+
QUAD_SPEC quads[SPLINESIZE]; /*quadratic sections */
|
|
22
|
+
} SPLINE_SPEC; /*quadratic spline */
|
|
23
|
+
|
|
24
|
+
typedef struct
|
|
25
|
+
{
|
|
26
|
+
short x; /*absolute x coord */
|
|
27
|
+
short y; /*absolute y coord */
|
|
28
|
+
} TPOINT;
|
|
29
|
+
typedef TPOINT VECTOR; /*structure for coordinates */
|
|
30
|
+
|
|
31
|
+
typedef struct
|
|
32
|
+
{
|
|
33
|
+
char dx; /*compact vectors */
|
|
34
|
+
char dy;
|
|
35
|
+
} BYTEVEC;
|
|
36
|
+
|
|
37
|
+
typedef struct edgeptstruct
|
|
38
|
+
{
|
|
39
|
+
TPOINT pos; /*position */
|
|
40
|
+
VECTOR vec; /*vector to next point */
|
|
41
|
+
char flags[EDGEPTFLAGS]; /*concavity, length etc */
|
|
42
|
+
struct edgeptstruct *next; /*anticlockwise element */
|
|
43
|
+
struct edgeptstruct *prev; /*clockwise element */
|
|
44
|
+
} EDGEPT; /*point on expanded outline */
|
|
45
|
+
|
|
46
|
+
typedef struct blobstruct
|
|
47
|
+
{
|
|
48
|
+
struct olinestruct *outlines; /*list of outlines in blob */
|
|
49
|
+
char flags[TBLOBFLAGS]; /*blob flags */
|
|
50
|
+
char correct; /*correct text */
|
|
51
|
+
char guess; /*best guess */
|
|
52
|
+
/*quickie classification */
|
|
53
|
+
unsigned char classes[MAX_WO_CLASSES];
|
|
54
|
+
/*quickie ratings */
|
|
55
|
+
unsigned char values[MAX_WO_CLASSES];
|
|
56
|
+
struct blobstruct *next; /*next blob in block */
|
|
57
|
+
} TBLOB; /*blob structure */
|
|
58
|
+
|
|
59
|
+
typedef struct olinestruct
|
|
60
|
+
{
|
|
61
|
+
TPOINT topleft; /*top left of loop */
|
|
62
|
+
TPOINT botright; /*bottom right of loop */
|
|
63
|
+
TPOINT start; /*start of loop */
|
|
64
|
+
BYTEVEC *compactloop; /*ptr to compacted loop */
|
|
65
|
+
EDGEPT *loop; /*edgeloop */
|
|
66
|
+
void *node; /*1st node on outline */
|
|
67
|
+
struct olinestruct *next; /*next at this level */
|
|
68
|
+
struct olinestruct *child; /*inner outline */
|
|
69
|
+
} TESSLINE; /*outline structure */
|
|
70
|
+
|
|
71
|
+
typedef struct wordstruct
|
|
72
|
+
{
|
|
73
|
+
struct textrowstruct *row; /*row it came from */
|
|
74
|
+
char *correct; /*correct word string */
|
|
75
|
+
char *guess; /*guess word string */
|
|
76
|
+
TBLOB *blobs; /*blobs in word */
|
|
77
|
+
int blanks; /*blanks before word */
|
|
78
|
+
int blobcount; /*no of blobs in word */
|
|
79
|
+
struct wordstruct *next; /*next word */
|
|
80
|
+
} TWERD; /*word structure */
|
|
81
|
+
|
|
82
|
+
typedef struct textrowstruct
|
|
83
|
+
{
|
|
84
|
+
int blobcount; /** count of blobs in row. **/
|
|
85
|
+
TBLOB *blobs; /*list of blobs in row */
|
|
86
|
+
TWERD *words; /*list of words in row */
|
|
87
|
+
int mean_y; /** y coordinate of centre of row **/
|
|
88
|
+
int max_y; /** y coordinate of top of row **/
|
|
89
|
+
int min_y; /** y coordinate of bottom of row **/
|
|
90
|
+
SPLINE_SPEC xheight; /*top of row */
|
|
91
|
+
SPLINE_SPEC baseline; /*bottom of row */
|
|
92
|
+
float descdrop; /*descender drop */
|
|
93
|
+
float ascrise; /*ascender rise */
|
|
94
|
+
float lineheight; /*average xheight-baseline */
|
|
95
|
+
int kerning; /*kerning of row */
|
|
96
|
+
int space; /*spacing of row */
|
|
97
|
+
float space_threshold; /*Bayesian space limit */
|
|
98
|
+
int p_spaced; /*proportinal flag */
|
|
99
|
+
int b_space; /*block spacing */
|
|
100
|
+
int b_kern; /*block kerning */
|
|
101
|
+
struct textrowstruct *next; /*next row in block */
|
|
102
|
+
} TEXTROW;
|
|
103
|
+
|
|
104
|
+
typedef struct blockstruct /** list of coordinates **/
|
|
105
|
+
{
|
|
106
|
+
TBLOB *blobs; /*blobs in block */
|
|
107
|
+
TEXTROW *rows; /*rows in block */
|
|
108
|
+
int blobcount; /*no of blobs */
|
|
109
|
+
short xmin;
|
|
110
|
+
short xmax;
|
|
111
|
+
short ymin;
|
|
112
|
+
short ymax;
|
|
113
|
+
char type; /** block type **/
|
|
114
|
+
char p_spaced; /** flag to show propertianal spacing **/
|
|
115
|
+
short rowcount; /** number of rows **/
|
|
116
|
+
short leading; /** space between rows **/
|
|
117
|
+
short kerning; /** space between characters **/
|
|
118
|
+
short space; /** distance between char centres **/
|
|
119
|
+
short minwidth; /*min width of char in block */
|
|
120
|
+
short p_size; /** point size of text **/
|
|
121
|
+
short l_margin; /** posn of left margin **/
|
|
122
|
+
short italic; /** flag to show italic block **/
|
|
123
|
+
short spurious; /** percentage of spurious characters **/
|
|
124
|
+
struct blockstruct *next; /*next text block */
|
|
125
|
+
} TEXTBLOCK; /*block from image */
|
|
126
|
+
|
|
127
|
+
/**********************************************************************
|
|
128
|
+
* iterate_blobs
|
|
129
|
+
*
|
|
130
|
+
* Visit all the words in a list using a local variable.
|
|
131
|
+
**********************************************************************/
|
|
132
|
+
|
|
133
|
+
#define iterate_blobs(blob,blobs) \
|
|
134
|
+
for (blob = blobs; blob != NULL; blob = blob->next)
|
|
135
|
+
#endif
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* File: getopt.c
|
|
3
|
+
* Description: Re-implementation of the unix code.
|
|
4
|
+
* Author: Ray Smith
|
|
5
|
+
* Created: Tue Nov 28 05:52:50 MST 1995
|
|
6
|
+
*
|
|
7
|
+
* (C) Copyright 1995, Hewlett-Packard Co.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
*
|
|
18
|
+
**********************************************************************/
|
|
19
|
+
|
|
20
|
+
#include "mfcpch.h" //precompiled headers
|
|
21
|
+
#include <string.h>
|
|
22
|
+
#include <stdio.h>
|
|
23
|
+
#include "tessopt.h"
|
|
24
|
+
#include "notdll.h" //must be last include
|
|
25
|
+
|
|
26
|
+
int tessoptind;
|
|
27
|
+
char *tessoptarg;
|
|
28
|
+
|
|
29
|
+
/**********************************************************************
|
|
30
|
+
* tessopt
|
|
31
|
+
*
|
|
32
|
+
* parse command line args.
|
|
33
|
+
**********************************************************************/
|
|
34
|
+
|
|
35
|
+
int tessopt ( //parse args
|
|
36
|
+
inT32 argc, //arg count
|
|
37
|
+
char *argv[], //args
|
|
38
|
+
const char *arglist //string of arg chars
|
|
39
|
+
) {
|
|
40
|
+
const char *arg; //arg char
|
|
41
|
+
|
|
42
|
+
if (tessoptind == 0)
|
|
43
|
+
tessoptind = 1;
|
|
44
|
+
if (tessoptind < argc && argv[tessoptind][0] == '-') {
|
|
45
|
+
arg = strchr (arglist, argv[tessoptind][1]);
|
|
46
|
+
if (arg == NULL || *arg == ':')
|
|
47
|
+
return '?'; //dud option
|
|
48
|
+
tessoptind++;
|
|
49
|
+
tessoptarg = argv[tessoptind];
|
|
50
|
+
if (arg[1] == ':') {
|
|
51
|
+
if (argv[tessoptind - 1][2] != '\0')
|
|
52
|
+
//immediately after
|
|
53
|
+
tessoptarg = argv[tessoptind - 1] + 2;
|
|
54
|
+
else
|
|
55
|
+
tessoptind++;
|
|
56
|
+
}
|
|
57
|
+
return *arg;
|
|
58
|
+
}
|
|
59
|
+
else
|
|
60
|
+
return EOF;
|
|
61
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* File: getopt.h
|
|
3
|
+
* Description: Re-implementation of the unix code.
|
|
4
|
+
* Author: Ray Smith
|
|
5
|
+
* Created: Tue Nov 28 05:52:50 MST 1995
|
|
6
|
+
*
|
|
7
|
+
* (C) Copyright 1995, Hewlett-Packard Co.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
*
|
|
18
|
+
**********************************************************************/
|
|
19
|
+
|
|
20
|
+
#include "host.h"
|
|
21
|
+
#include "notdll.h" //must be last include
|
|
22
|
+
|
|
23
|
+
extern int tessoptind;
|
|
24
|
+
extern char *tessoptarg;
|
|
25
|
+
|
|
26
|
+
int tessopt ( //parse args
|
|
27
|
+
inT32 argc, //arg count
|
|
28
|
+
char *argv[], //args
|
|
29
|
+
const char *arglist //string of arg chars
|
|
30
|
+
);
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* File: tprintf.c
|
|
3
|
+
* Description: Trace version of printf - portable between UX and NT
|
|
4
|
+
* Author: Phil Cheatle
|
|
5
|
+
* Created: Wed Jun 28 15:01:15 BST 1995
|
|
6
|
+
*
|
|
7
|
+
* (C) Copyright 1995, Hewlett-Packard Ltd.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
*
|
|
18
|
+
**********************************************************************/
|
|
19
|
+
#include "mfcpch.h" //precompiled headers
|
|
20
|
+
#include <stdio.h>
|
|
21
|
+
#include <stdarg.h>
|
|
22
|
+
#include "strngs.h"
|
|
23
|
+
#include "varable.h"
|
|
24
|
+
#include "debugwin.h"
|
|
25
|
+
//#include "ipeerr.h"
|
|
26
|
+
#include "tprintf.h"
|
|
27
|
+
|
|
28
|
+
#define MAX_MSG_LEN 1024
|
|
29
|
+
|
|
30
|
+
#define EXTERN
|
|
31
|
+
DLLSYM STRING_VAR (debug_file, "", "File to send tprintf output to");
|
|
32
|
+
DLLSYM BOOL_VAR (debug_window_on, FALSE,
|
|
33
|
+
"Send tprintf to window unless file set");
|
|
34
|
+
|
|
35
|
+
DLLSYM void
|
|
36
|
+
tprintf ( //Trace printf
|
|
37
|
+
const char *format, ... //special message
|
|
38
|
+
) {
|
|
39
|
+
va_list args; //variable args
|
|
40
|
+
static FILE *debugfp = NULL; //debug file
|
|
41
|
+
//debug window
|
|
42
|
+
static DEBUG_WIN *debugwin = NULL;
|
|
43
|
+
inT32 offset = 0; //into message
|
|
44
|
+
static char msg[MAX_MSG_LEN + 1];
|
|
45
|
+
|
|
46
|
+
va_start(args, format); //variable list
|
|
47
|
+
#ifdef __MSW32__
|
|
48
|
+
//Format into msg
|
|
49
|
+
offset += _vsnprintf (msg + offset, MAX_MSG_LEN - offset, format, args);
|
|
50
|
+
#else
|
|
51
|
+
//Format into msg
|
|
52
|
+
offset += vsprintf (msg + offset, format, args);
|
|
53
|
+
#endif
|
|
54
|
+
va_end(args);
|
|
55
|
+
|
|
56
|
+
if (debugfp == NULL && strlen (debug_file.string ()) > 0)
|
|
57
|
+
debugfp = fopen (debug_file.string (), "w");
|
|
58
|
+
else if (debugfp != NULL && strlen (debug_file.string ()) == 0) {
|
|
59
|
+
fclose(debugfp);
|
|
60
|
+
debugfp = NULL;
|
|
61
|
+
}
|
|
62
|
+
if (debugfp != NULL)
|
|
63
|
+
fprintf (debugfp, "%s", msg);
|
|
64
|
+
else {
|
|
65
|
+
|
|
66
|
+
if (debug_window_on) {
|
|
67
|
+
if (debugwin == NULL)
|
|
68
|
+
//in pixels
|
|
69
|
+
debugwin = new DEBUG_WIN ("Debug Window", DEBUG_WIN_XPOS, DEBUG_WIN_YPOS,
|
|
70
|
+
//in pixels
|
|
71
|
+
DEBUG_WIN_XSIZE, DEBUG_WIN_YSIZE,
|
|
72
|
+
debug_lines);
|
|
73
|
+
debugwin->dprintf (msg);
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
fprintf (stderr, "%s", msg);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
/*************************************************************************
|
|
83
|
+
* pause_continue()
|
|
84
|
+
* UI for a debugging pause - to see an intermediate state
|
|
85
|
+
* Returns TRUE to continue as normal to the next pause in the current mode;
|
|
86
|
+
* FALSE to quit the current pausing mode.
|
|
87
|
+
*************************************************************************/
|
|
88
|
+
|
|
89
|
+
DLLSYM BOOL8
|
|
90
|
+
//special message
|
|
91
|
+
pause_continue (const char *format, ...
|
|
92
|
+
) {
|
|
93
|
+
va_list args; //variable args
|
|
94
|
+
char msg[1000];
|
|
95
|
+
STRING str = STRING ("DEBUG PAUSE:\n");
|
|
96
|
+
|
|
97
|
+
va_start(args, format); //variable list
|
|
98
|
+
vsprintf(msg, format, args); //Format into msg
|
|
99
|
+
va_end(args);
|
|
100
|
+
|
|
101
|
+
#ifdef GRAPHICS_DISABLED
|
|
102
|
+
// No interaction allowed -> simply go on
|
|
103
|
+
return true;
|
|
104
|
+
#else
|
|
105
|
+
|
|
106
|
+
#ifdef __UNIX__
|
|
107
|
+
printf ("%s\n", msg);
|
|
108
|
+
printf ("Type \"c\" to cancel, anything else to continue: ");
|
|
109
|
+
char c = getchar ();
|
|
110
|
+
return (c != 'c');
|
|
111
|
+
#endif
|
|
112
|
+
|
|
113
|
+
#ifdef __MSW32__
|
|
114
|
+
str +=
|
|
115
|
+
STRING (msg) + STRING ("\nUse OK to continue, CANCEL to stop pausing");
|
|
116
|
+
// return AfxMessageBox( str.string(), MB_OKCANCEL ) == IDOK;
|
|
117
|
+
return::MessageBox (NULL, msg, "IMGAPP",
|
|
118
|
+
MB_APPLMODAL | MB_OKCANCEL) == IDOK;
|
|
119
|
+
#endif
|
|
120
|
+
|
|
121
|
+
#endif
|
|
122
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* File: tprintf.c
|
|
3
|
+
* Description: Trace version of printf - portable between UX and NT
|
|
4
|
+
* Author: Phil Cheatle
|
|
5
|
+
* Created: Wed Jun 28 15:01:15 BST 1995
|
|
6
|
+
*
|
|
7
|
+
* (C) Copyright 1995, Hewlett-Packard Ltd.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
*
|
|
18
|
+
**********************************************************************/
|
|
19
|
+
|
|
20
|
+
#ifndef TPRINTF_H
|
|
21
|
+
#define TPRINTF_H
|
|
22
|
+
|
|
23
|
+
#include "varable.h"
|
|
24
|
+
|
|
25
|
+
extern DLLSYM STRING_VAR_H (debug_file, "", "File to send tprintf output to");
|
|
26
|
+
extern DLLSYM BOOL_VAR_H (debug_window_on, TRUE,
|
|
27
|
+
"Send tprintf to window unless file set");
|
|
28
|
+
|
|
29
|
+
DLLSYM void tprintf ( //Trace printf
|
|
30
|
+
const char *format, ... //special message
|
|
31
|
+
);
|
|
32
|
+
//special message
|
|
33
|
+
DLLSYM BOOL8 pause_continue (const char *format, ...
|
|
34
|
+
);
|
|
35
|
+
#endif
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
///////////////////////////////////////////////////////////////////////
|
|
2
|
+
// File: unichar.cpp
|
|
3
|
+
// Description: Unicode character/ligature class.
|
|
4
|
+
// Author: Ray Smith
|
|
5
|
+
// Created: Wed Jun 28 17:05:01 PDT 2006
|
|
6
|
+
//
|
|
7
|
+
// (C) Copyright 2006, Google Inc.
|
|
8
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
// you may not use this file except in compliance with the License.
|
|
10
|
+
// You may obtain a copy of the License at
|
|
11
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
// See the License for the specific language governing permissions and
|
|
16
|
+
// limitations under the License.
|
|
17
|
+
//
|
|
18
|
+
///////////////////////////////////////////////////////////////////////
|
|
19
|
+
|
|
20
|
+
#include "unichar.h"
|
|
21
|
+
|
|
22
|
+
#define UNI_MAX_LEGAL_UTF32 0x0010FFFF
|
|
23
|
+
|
|
24
|
+
// Construct from a utf8 string. If len<0 then the string is null terminated.
|
|
25
|
+
// If the string is too long to fit in the UNICHAR then it takes only what
|
|
26
|
+
// will fit. Checks for illegal input and stops at an illegal sequence.
|
|
27
|
+
// The resulting UNICHAR may be empty.
|
|
28
|
+
UNICHAR::UNICHAR(const char* utf8_str, int len) {
|
|
29
|
+
int total_len = 0;
|
|
30
|
+
int step = 0;
|
|
31
|
+
if (len < 0) {
|
|
32
|
+
for (len = 0; utf8_str[len] != 0 && len < UNICHAR_LEN; ++len);
|
|
33
|
+
}
|
|
34
|
+
for (total_len = 0; total_len < len; total_len += step) {
|
|
35
|
+
step = utf8_step(utf8_str + total_len);
|
|
36
|
+
if (total_len + step > UNICHAR_LEN)
|
|
37
|
+
break; // Too long.
|
|
38
|
+
if (step == 0)
|
|
39
|
+
break; // Illegal first byte.
|
|
40
|
+
int i;
|
|
41
|
+
for (i = 1; i < step; ++i)
|
|
42
|
+
if ((utf8_str[total_len + i] & 0xc0) != 0x80)
|
|
43
|
+
break;
|
|
44
|
+
if (i < step)
|
|
45
|
+
break; // Illegal surrogate
|
|
46
|
+
}
|
|
47
|
+
memcpy(chars, utf8_str, total_len);
|
|
48
|
+
if (total_len < UNICHAR_LEN) {
|
|
49
|
+
chars[UNICHAR_LEN - 1] = total_len;
|
|
50
|
+
while (total_len < UNICHAR_LEN - 1)
|
|
51
|
+
chars[total_len++] = 0;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Construct from a single UCS4 character. Illegal values are ignored,
|
|
56
|
+
// resulting in an empty UNICHAR.
|
|
57
|
+
UNICHAR::UNICHAR(int unicode) {
|
|
58
|
+
const int bytemask = 0xBF;
|
|
59
|
+
const int bytemark = 0x80;
|
|
60
|
+
|
|
61
|
+
if (unicode < 0x80) {
|
|
62
|
+
chars[UNICHAR_LEN - 1] = 1;
|
|
63
|
+
chars[2] = 0;
|
|
64
|
+
chars[1] = 0;
|
|
65
|
+
chars[0] = static_cast<char>(unicode);
|
|
66
|
+
} else if (unicode < 0x800) {
|
|
67
|
+
chars[UNICHAR_LEN - 1] = 2;
|
|
68
|
+
chars[2] = 0;
|
|
69
|
+
chars[1] = static_cast<char>((unicode | bytemark) & bytemask);
|
|
70
|
+
unicode >>= 6;
|
|
71
|
+
chars[0] = static_cast<char>(unicode | 0xc0);
|
|
72
|
+
} else if (unicode < 0x10000) {
|
|
73
|
+
chars[UNICHAR_LEN - 1] = 3;
|
|
74
|
+
chars[2] = static_cast<char>((unicode | bytemark) & bytemask);
|
|
75
|
+
unicode >>= 6;
|
|
76
|
+
chars[1] = static_cast<char>((unicode | bytemark) & bytemask);
|
|
77
|
+
unicode >>= 6;
|
|
78
|
+
chars[0] = static_cast<char>(unicode | 0xe0);
|
|
79
|
+
} else if (unicode <= UNI_MAX_LEGAL_UTF32) {
|
|
80
|
+
chars[UNICHAR_LEN - 1] = 4;
|
|
81
|
+
chars[3] = static_cast<char>((unicode | bytemark) & bytemask);
|
|
82
|
+
unicode >>= 6;
|
|
83
|
+
chars[2] = static_cast<char>((unicode | bytemark) & bytemask);
|
|
84
|
+
unicode >>= 6;
|
|
85
|
+
chars[1] = static_cast<char>((unicode | bytemark) & bytemask);
|
|
86
|
+
unicode >>= 6;
|
|
87
|
+
chars[0] = static_cast<char>(unicode | 0xf0);
|
|
88
|
+
} else {
|
|
89
|
+
memset(chars, 0, UNICHAR_LEN);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Get the first character as UCS-4.
|
|
94
|
+
int UNICHAR::first_uni() const {
|
|
95
|
+
static const int utf8_offsets[5] = {
|
|
96
|
+
0, 0, 0x3080, 0xE2080, 0x3C82080
|
|
97
|
+
};
|
|
98
|
+
int uni = 0;
|
|
99
|
+
int len = utf8_step(chars);
|
|
100
|
+
const char* src = chars;
|
|
101
|
+
|
|
102
|
+
switch (len) {
|
|
103
|
+
default:
|
|
104
|
+
break;
|
|
105
|
+
case 4:
|
|
106
|
+
uni += static_cast<unsigned char>(*src++);
|
|
107
|
+
uni <<= 6;
|
|
108
|
+
case 3:
|
|
109
|
+
uni += static_cast<unsigned char>(*src++);
|
|
110
|
+
uni <<= 6;
|
|
111
|
+
case 2:
|
|
112
|
+
uni += static_cast<unsigned char>(*src++);
|
|
113
|
+
uni <<= 6;
|
|
114
|
+
case 1:
|
|
115
|
+
uni += static_cast<unsigned char>(*src++);
|
|
116
|
+
}
|
|
117
|
+
uni -= utf8_offsets[len];
|
|
118
|
+
return uni;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Get a terminated UTF8 string: Must delete[] it after use.
|
|
122
|
+
char* UNICHAR::utf8_str() const {
|
|
123
|
+
int len = utf8_len();
|
|
124
|
+
char* str = new char[len + 1];
|
|
125
|
+
memcpy(str, chars, len);
|
|
126
|
+
str[len] = 0;
|
|
127
|
+
return str;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Get the number of bytes in the first character of the given utf8 string.
|
|
131
|
+
int UNICHAR::utf8_step(const char* utf8_str) {
|
|
132
|
+
static const char utf8_bytes[256] = {
|
|
133
|
+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
134
|
+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
135
|
+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
136
|
+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
137
|
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
138
|
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
139
|
+
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
140
|
+
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
return utf8_bytes[static_cast<unsigned char>(*utf8_str)];
|
|
144
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
///////////////////////////////////////////////////////////////////////
|
|
2
|
+
// File: unichar.h
|
|
3
|
+
// Description: Unicode character/ligature class.
|
|
4
|
+
// Author: Ray Smith
|
|
5
|
+
// Created: Wed Jun 28 17:05:01 PDT 2006
|
|
6
|
+
//
|
|
7
|
+
// (C) Copyright 2006, Google Inc.
|
|
8
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
// you may not use this file except in compliance with the License.
|
|
10
|
+
// You may obtain a copy of the License at
|
|
11
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
// See the License for the specific language governing permissions and
|
|
16
|
+
// limitations under the License.
|
|
17
|
+
//
|
|
18
|
+
///////////////////////////////////////////////////////////////////////
|
|
19
|
+
|
|
20
|
+
#ifndef TESSERACT_CCUTIL_UNICHAR_H__
|
|
21
|
+
#define TESSERACT_CCUTIL_UNICHAR_H__
|
|
22
|
+
|
|
23
|
+
#include <memory.h>
|
|
24
|
+
|
|
25
|
+
// Maximum number of characters that can be stored in a UNICHAR. Must be
|
|
26
|
+
// at least 4. Must not exceed 31 without changing the coding of length.
|
|
27
|
+
#define UNICHAR_LEN 24
|
|
28
|
+
|
|
29
|
+
// A UNICHAR_ID is the unique id of a unichar.
|
|
30
|
+
typedef int UNICHAR_ID;
|
|
31
|
+
|
|
32
|
+
// A variable to indicate an invalid or uninitialized unichar id.
|
|
33
|
+
static const int INVALID_UNICHAR_ID = -1;
|
|
34
|
+
// A special unichar that corresponds to INVALID_UNICHAR_ID.
|
|
35
|
+
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
|
|
36
|
+
|
|
37
|
+
// The UNICHAR class holds a single classification result. This may be
|
|
38
|
+
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
|
|
39
|
+
// multple Unicode characters representing the NFKC expansion of a ligature
|
|
40
|
+
// such as fi, ffl etc. These are also stored as utf8.
|
|
41
|
+
class UNICHAR {
|
|
42
|
+
public:
|
|
43
|
+
UNICHAR() {
|
|
44
|
+
memset(chars, 0, UNICHAR_LEN);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Construct from a utf8 string. If len<0 then the string is null terminated.
|
|
48
|
+
// If the string is too long to fit in the UNICHAR then it takes only what
|
|
49
|
+
// will fit.
|
|
50
|
+
UNICHAR(const char* utf8_str, int len);
|
|
51
|
+
|
|
52
|
+
// Construct from a single UCS4 character.
|
|
53
|
+
explicit UNICHAR(int unicode);
|
|
54
|
+
|
|
55
|
+
// Default copy constructor and operator= are OK.
|
|
56
|
+
|
|
57
|
+
// Get the first character as UCS-4.
|
|
58
|
+
int first_uni() const;
|
|
59
|
+
|
|
60
|
+
// Get the length of the UTF8 string.
|
|
61
|
+
int utf8_len() const {
|
|
62
|
+
int len = chars[UNICHAR_LEN - 1];
|
|
63
|
+
return len >=0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Get a UTF8 string, but NOT NULL terminated.
|
|
67
|
+
const char* utf8() const {
|
|
68
|
+
return chars;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Get a terminated UTF8 string: Must delete[] it after use.
|
|
72
|
+
char* utf8_str() const;
|
|
73
|
+
|
|
74
|
+
// Get the number of bytes in the first character of the given utf8 string.
|
|
75
|
+
static int utf8_step(const char* utf8_str);
|
|
76
|
+
|
|
77
|
+
private:
|
|
78
|
+
// A UTF-8 representation of 1 or more Unicode characters.
|
|
79
|
+
// The last element (chars[UNICHAR_LEN - 1]) is a length if
|
|
80
|
+
// its value < UNICHAR_LEN, otherwise it is a genuine character.
|
|
81
|
+
char chars[UNICHAR_LEN];
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
#endif // TESSERACT_CCUTIL_UNICHAR_H__
|