tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,825 @@
1
+ /**********************************************************************
2
+ * File: fixxht.cpp (Formerly fixxht.c)
3
+ * Description: Improve x_ht and look out for case inconsistencies
4
+ * Author: Phil Cheatle
5
+ * Created: Thu Aug 5 14:11:08 BST 1993
6
+ *
7
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #include "mfcpch.h"
21
+ #include <string.h>
22
+ #include <ctype.h>
23
+ #include "varable.h"
24
+ #include "tessvars.h"
25
+ #include "control.h"
26
+ #include "reject.h"
27
+ #include "fixxht.h"
28
+ #include "secname.h"
29
+
30
+ #define EXTERN
31
+
32
+ EXTERN double_VAR (x_ht_fraction_of_caps_ht, 0.7,
33
+ "Fract of cps ht est of xht");
34
+ EXTERN double_VAR (x_ht_variation, 0.35,
35
+ "Err band as fract of caps/xht dist");
36
+ EXTERN double_VAR (x_ht_sub_variation, 0.5,
37
+ "Err band as fract of caps/xht dist");
38
+ EXTERN BOOL_VAR (rej_trial_ambigs, TRUE,
39
+ "reject x-ht ambigs when under trial");
40
+ EXTERN BOOL_VAR (x_ht_conservative_ambigs, FALSE,
41
+ "Dont rely on ambigs + maxht");
42
+ EXTERN BOOL_VAR (x_ht_check_est, TRUE, "Cross check estimates");
43
+ EXTERN BOOL_VAR (x_ht_case_flip, FALSE, "Flip or reject suspect case");
44
+ EXTERN BOOL_VAR (x_ht_include_dodgy_blobs, TRUE,
45
+ "Include blobs with possible noise?");
46
+ EXTERN BOOL_VAR (x_ht_limit_flip_trials, TRUE,
47
+ "Dont do trial flips when ambigs are close to xht?");
48
+ EXTERN BOOL_VAR (rej_use_check_block_occ, TRUE,
49
+ "Analyse rejection behaviour");
50
+
51
+ EXTERN STRING_VAR (chs_non_ambig_caps_ht,
52
+ "!#$%&()/12346789?ABDEFGHIKLNQRT[]\\bdfhkl",
53
+ "Reliable ascenders");
54
+ EXTERN STRING_VAR (chs_x_ht, "acegmnopqrsuvwxyz", "X height chars");
55
+ EXTERN STRING_VAR (chs_non_ambig_x_ht, "aenqr", "reliable X height chars");
56
+ EXTERN STRING_VAR (chs_ambig_caps_x, "cCmMoO05sSuUvVwWxXzZ",
57
+ "X ht or caps ht chars");
58
+ EXTERN STRING_VAR (chs_bl_ambig_caps_x, "pPyY", " Caps or descender ambigs");
59
+
60
+ /* The following arent used in this module but are used in applybox.c */
61
+ EXTERN STRING_VAR (chs_caps_ht,
62
+ "!#$%&()/0123456789?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]\\bdfhkl{|}",
63
+ "Ascender chars");
64
+ EXTERN STRING_VAR (chs_desc, "gjpqy", "Descender chars");
65
+ EXTERN STRING_VAR (chs_non_ambig_bl,
66
+ "!#$%&01246789?ABCDEFGHIKLMNORSTUVWXYZabcdehiklmnorstuvwxz",
67
+ "Reliable baseline chars");
68
+ EXTERN STRING_VAR (chs_odd_top, "ijt", "Chars with funny ascender region");
69
+ EXTERN STRING_VAR (chs_odd_bot, "()35JQ[]\\/{}|", "Chars with funny base");
70
+
71
+ /* The following arent used but are defined for completeness */
72
+ EXTERN STRING_VAR (chs_bl,
73
+ "!#$%&()/01246789?ABCDEFGHIJKLMNOPRSTUVWXYZ[]\\abcdefhiklmnorstuvwxz{}",
74
+ "Baseline chars");
75
+ EXTERN STRING_VAR (chs_non_ambig_desc, "gq", "Reliable descender chars");
76
+
77
+ /*************************************************************************
78
+ * re_estimate_x_ht()
79
+ *
80
+ * Walk the blobs in the word together with the text string and reject map.
81
+ * NOTE: All evaluation is done on the baseline normalised word. This is so that
82
+ * the TBOX class can be used (integer). The reasons for this are:
83
+ * a) We must use the outword - ie the Tess result
84
+ * b) The outword is always converted to integer representation as that is how
85
+ * Tess works
86
+ * c) We would like to use the TBOX class, cos its there - this is integer
87
+ * precision.
88
+ * d) If we de-normed the outword we would get rounding errors and would find
89
+ * that integers are too imprecise (x-height around 15 pixels instead of a
90
+ * scale of 128 in bln form.
91
+ * CONVINCED?
92
+ *
93
+ * A) Try to re-estimatate x-ht and caps ht from confirmed pts in word.
94
+ *
95
+ * FOR each non reject blob
96
+ * IF char is baseline posn ambiguous
97
+ * Remove ambiguity by comparing its posn with respect to baseline.
98
+ * IF char is a confirmed x-ht char
99
+ * Add x-ht posn to confirmed_x_ht pts for word
100
+ * IF char is a confirmed caps-ht char
101
+ * Add blob_ht to caps ht pts for word
102
+ *
103
+ * IF Std Dev of caps hts < 2 (AND # samples > 0)
104
+ * Use mean as caps ht estimate (Dont use median as we can expect a
105
+ * fair variation between the heights of the NON_AMBIG_CAPS_HT_CHS)
106
+ * IF Std Dev of caps hts >= 2 (AND # samples > 0)
107
+ * Suspect small caps font.
108
+ * Look for 2 clusters, each with Std Dev < 2.
109
+ * IF 2 clusters found
110
+ * Pick the smaller median as the caps ht estimate of the smallcaps.
111
+ *
112
+ * IF failed to estimate a caps ht
113
+ * Use the median caps ht if there is one,
114
+ * ELSE use the caps ht estimate of the previous word. NO!!!
115
+ *
116
+ *
117
+ * IF there are confirmed x-height chars
118
+ * Estimate confirmed x-height as the median value
119
+ * ELSE IF there is a confirmed caps ht
120
+ * Estimate confirmed x-height as a fraction of confirmed caps ht value
121
+ * ELSE
122
+ * Use the value for the previous word or the row value if this is the
123
+ * first word in the block. NO!!!
124
+ *
125
+ * B) Add in case ambiguous blobs based on confirmed x-ht/caps ht, changing case
126
+ * as necessary. Reestimate caps ht and x-ht as in A, using the extended
127
+ * clusters.
128
+ *
129
+ * C) If word contains rejects, and x-ht estimate significantly differs from
130
+ * original estimate, return TRUE so that the word can be rematched
131
+ *************************************************************************/
132
+
133
+ void re_estimate_x_ht( //improve for 1 word
134
+ WERD_RES *word_res, //word to do
135
+ float *trial_x_ht //new match value
136
+ ) {
137
+ PBLOB_IT blob_it;
138
+ inT16 blob_ht_above_baseline;
139
+
140
+ const char *word_str;
141
+ inT16 i;
142
+ inT16 offset;
143
+
144
+ STATS all_blobs_ht (0, 300); //every blob in word
145
+ STATS x_ht (0, 300); //confirmed pts in wd
146
+ STATS caps_ht (0, 300); //confirmed pts in wd
147
+ STATS case_ambig (0, 300); //lower case ambigs
148
+
149
+ inT16 rej_blobs_count = 0;
150
+ inT16 rej_blobs_max_height = 0;
151
+ inT32 rej_blobs_max_area = 0;
152
+ float x_ht_ok_variation;
153
+ float max_blob_ht;
154
+ float marginally_above_x_ht;
155
+
156
+ TBOX blob_box; //blob bounding box
157
+ float est_x_ht = 0.0; //word estimate
158
+ float est_caps_ht = 0.0; //word estimate
159
+ //based on hard data?
160
+ BOOL8 est_caps_ht_certain = FALSE;
161
+ BOOL8 est_x_ht_certain = FALSE;//based on hard data?
162
+ BOOL8 trial = FALSE; //Sepeculative values?
163
+ BOOL8 no_comment = FALSE; //No change in xht
164
+ float ambig_lc_x_est;
165
+ float ambig_uc_caps_est;
166
+ inT16 x_ht_ambigs = 0;
167
+ inT16 caps_ht_ambigs = 0;
168
+
169
+ /* Calculate default variation of blob x_ht from bln x_ht for bln word */
170
+ x_ht_ok_variation =
171
+ (bln_x_height / x_ht_fraction_of_caps_ht - bln_x_height) * x_ht_variation;
172
+
173
+ word_str = word_res->best_choice->string ().string ();
174
+ /*
175
+ Cycle blobs, allocating to one of the stats sets when possible.
176
+ */
177
+ blob_it.set_to_list (word_res->outword->blob_list ());
178
+ for (blob_it.mark_cycle_pt (), i = 0, offset = 0;
179
+ !blob_it.cycled_list (); blob_it.forward (),
180
+ offset += word_res->best_choice->lengths()[i++]) {
181
+ if (!dodgy_blob (blob_it.data ())) {
182
+ blob_box = blob_it.data ()->bounding_box ();
183
+ blob_ht_above_baseline = blob_box.top () - bln_baseline_offset;
184
+ all_blobs_ht.add (blob_ht_above_baseline, 1);
185
+
186
+ if (word_res->reject_map[i].rejected ()) {
187
+ rej_blobs_count++;
188
+ if (blob_box.height () > rej_blobs_max_height)
189
+ rej_blobs_max_height = blob_box.height ();
190
+ if (blob_box.area () > rej_blobs_max_area)
191
+ rej_blobs_max_area = blob_box.area ();
192
+ }
193
+ else {
194
+ if (STRING (chs_non_ambig_x_ht).contains (word_str[offset]))
195
+ x_ht.add (blob_ht_above_baseline, 1);
196
+
197
+ if (STRING (chs_non_ambig_caps_ht).contains (word_str[offset]))
198
+ caps_ht.add (blob_ht_above_baseline, 1);
199
+
200
+ if (STRING (chs_ambig_caps_x).contains (word_str[offset])) {
201
+ case_ambig.add (blob_ht_above_baseline, 1);
202
+ if (STRING (chs_x_ht).contains (word_str[offset]))
203
+ x_ht_ambigs++;
204
+ else
205
+ caps_ht_ambigs++;
206
+ }
207
+
208
+ if (STRING (chs_bl_ambig_caps_x).contains (word_str[offset])) {
209
+ if (STRING (chs_x_ht).contains (word_str[offset])) {
210
+ /* confirm x_height provided > 15% total height below baseline */
211
+ if ((bln_baseline_offset - blob_box.bottom ()) /
212
+ (float) blob_box.height () > 0.15)
213
+ x_ht.add (blob_ht_above_baseline, 1);
214
+ }
215
+ else {
216
+ /* confirm caps_height provided < 5% total height below baseline */
217
+ if ((bln_baseline_offset - blob_box.bottom ()) /
218
+ (float) blob_box.height () < 0.05)
219
+ caps_ht.add (blob_ht_above_baseline, 1);
220
+ }
221
+ }
222
+ }
223
+ }
224
+ }
225
+ est_caps_ht = estimate_from_stats (caps_ht);
226
+ est_x_ht = estimate_from_stats (x_ht);
227
+ est_ambigs(word_res, case_ambig, &ambig_lc_x_est, &ambig_uc_caps_est);
228
+ max_blob_ht = all_blobs_ht.ile (0.9999);
229
+
230
+ #ifndef SECURE_NAMES
231
+ if (debug_x_ht_level >= 20) {
232
+ tprintf ("Mode20:A: %s ", word_str);
233
+ word_res->reject_map.print (debug_fp);
234
+ tprintf (" XHT:%f CAP:%f MAX:%f AMBIG X:%f CAP:%f\n",
235
+ est_x_ht, est_caps_ht, max_blob_ht,
236
+ ambig_lc_x_est, ambig_uc_caps_est);
237
+ }
238
+ #endif
239
+ if (!x_ht_conservative_ambigs &&
240
+ (ambig_lc_x_est > 0) &&
241
+ (ambig_lc_x_est == ambig_uc_caps_est) &&
242
+ (max_blob_ht > ambig_lc_x_est + x_ht_ok_variation)) {
243
+ //may be zero but believe xht
244
+ ambig_uc_caps_est = est_caps_ht;
245
+ #ifndef SECURE_NAMES
246
+ if (debug_x_ht_level >= 20)
247
+ tprintf ("Mode20:B: Fiddle ambig_uc_caps_est to %f\n",
248
+ ambig_lc_x_est);
249
+ #endif
250
+ }
251
+
252
+ /* Now make some estimates */
253
+
254
+ if ((est_x_ht > 0) ||
255
+ (est_caps_ht > 0) ||
256
+ ((ambig_lc_x_est > 0) && (ambig_lc_x_est != ambig_uc_caps_est))) {
257
+ /* There is some sensible data to go on so make the most of it. */
258
+ if (debug_x_ht_level >= 20)
259
+ tprintf ("Mode20:C: Sensible Data\n", ambig_lc_x_est);
260
+ if (est_x_ht > 0) {
261
+ est_x_ht_certain = TRUE;
262
+ if (est_caps_ht == 0) {
263
+ if ((ambig_uc_caps_est > ambig_lc_x_est) &&
264
+ (ambig_uc_caps_est > est_x_ht + x_ht_ok_variation))
265
+ est_caps_ht = ambig_uc_caps_est;
266
+ else
267
+ est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht;
268
+ }
269
+ if (case_ambig.get_total () > 0)
270
+ improve_estimate(word_res, est_x_ht, est_caps_ht, x_ht, caps_ht);
271
+ est_caps_ht_certain = caps_ht.get_total () > 0;
272
+ #ifndef SECURE_NAMES
273
+ if (debug_x_ht_level >= 20)
274
+ tprintf ("Mode20:D: Est from xht XHT:%f CAP:%f\n",
275
+ est_x_ht, est_caps_ht);
276
+ #endif
277
+ }
278
+ else if (est_caps_ht > 0) {
279
+ est_caps_ht_certain = TRUE;
280
+ if ((ambig_lc_x_est > 0) &&
281
+ (ambig_lc_x_est < est_caps_ht - x_ht_ok_variation))
282
+ est_x_ht = ambig_lc_x_est;
283
+ else
284
+ est_x_ht = est_caps_ht * x_ht_fraction_of_caps_ht;
285
+ if (ambig_lc_x_est + ambig_uc_caps_est > 0)
286
+ improve_estimate(word_res, est_x_ht, est_caps_ht, x_ht, caps_ht);
287
+ est_x_ht_certain = x_ht.get_total () > 0;
288
+ #ifndef SECURE_NAMES
289
+ if (debug_x_ht_level >= 20)
290
+ tprintf ("Mode20:E: Est from caps XHT:%f CAP:%f\n",
291
+ est_x_ht, est_caps_ht);
292
+ #endif
293
+ }
294
+ else {
295
+ /* Do something based on case ambig chars alone - we have guessed that the
296
+ ambigs are lower case. */
297
+ est_x_ht = ambig_lc_x_est;
298
+ est_x_ht_certain = TRUE;
299
+ if (ambig_uc_caps_est > ambig_lc_x_est) {
300
+ est_caps_ht = ambig_uc_caps_est;
301
+ est_caps_ht_certain = TRUE;
302
+ }
303
+ else
304
+ est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht;
305
+
306
+ #ifndef SECURE_NAMES
307
+ if (debug_x_ht_level >= 20)
308
+ tprintf ("Mode20:F: Est from ambigs XHT:%f CAP:%f\n",
309
+ est_x_ht, est_caps_ht);
310
+ #endif
311
+ }
312
+ /* Check for sane interpretation of evidence:
313
+ Try shifting caps ht if min certain caps ht is not significantly greater
314
+ than the estimated x ht or the max certain x ht is not significantly less
315
+ than the estimated caps ht. */
316
+ if (x_ht_check_est) {
317
+ if ((caps_ht.get_total () > 0) &&
318
+ (est_x_ht + x_ht_ok_variation >= caps_ht.ile (0.0001))) {
319
+ trial = TRUE;
320
+ est_caps_ht = est_x_ht;
321
+ est_x_ht = x_ht_fraction_of_caps_ht * est_caps_ht;
322
+
323
+ #ifndef SECURE_NAMES
324
+ if (debug_x_ht_level >= 20)
325
+ tprintf ("Mode20:G: Trial XHT:%f CAP:%f\n",
326
+ est_x_ht, est_caps_ht);
327
+ #endif
328
+ }
329
+ else if ((x_ht.get_total () > 0) &&
330
+ (est_caps_ht - x_ht_ok_variation <= x_ht.ile (0.9999))) {
331
+ trial = TRUE;
332
+ est_x_ht = est_caps_ht;
333
+ est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht;
334
+ #ifndef SECURE_NAMES
335
+ if (debug_x_ht_level >= 20)
336
+ tprintf ("Mode20:H: Trial XHT:%f CAP:%f\n",
337
+ est_x_ht, est_caps_ht);
338
+ #endif
339
+ }
340
+ }
341
+ }
342
+
343
+ else {
344
+ /* There is no sensible data so we're in the dark. */
345
+
346
+ marginally_above_x_ht = bln_x_height +
347
+ x_ht_ok_variation * x_ht_sub_variation;
348
+ /*
349
+ If there are no rejects, or the only rejects have a narrow height, or have
350
+ a small area compared to a normal char, then estimate the x-height as the
351
+ original one. (I.e dont fiddle about if the only rejects look like
352
+ punctuation) - we use max height as mean or median will be too low if
353
+ there are only two blobs - Eg "F."
354
+ */
355
+
356
+ if (debug_x_ht_level >= 20)
357
+ tprintf ("Mode20:I: In the dark\n");
358
+
359
+ if ((rej_blobs_count == 0) ||
360
+ (rej_blobs_max_height < 0.3 * max_blob_ht) ||
361
+ (rej_blobs_max_area < 0.3 * max_blob_ht * max_blob_ht)) {
362
+ no_comment = TRUE;
363
+ if (debug_x_ht_level >= 20)
364
+ tprintf ("Mode20:J: No comment due to no rejects\n");
365
+ }
366
+ else if (x_ht_limit_flip_trials &&
367
+ ((max_blob_ht < marginally_above_x_ht) ||
368
+ ((ambig_lc_x_est > 0) &&
369
+ (ambig_lc_x_est == ambig_uc_caps_est) &&
370
+ (ambig_lc_x_est < marginally_above_x_ht)))) {
371
+ no_comment = TRUE;
372
+ if (debug_x_ht_level >= 20)
373
+ tprintf ("Mode20:K: No comment as close to xht %f < %f\n",
374
+ ambig_lc_x_est, marginally_above_x_ht);
375
+ }
376
+ else if (x_ht_conservative_ambigs && (ambig_uc_caps_est > 0)) {
377
+ trial = TRUE;
378
+ est_caps_ht = ambig_lc_x_est;
379
+ est_x_ht = x_ht_fraction_of_caps_ht * est_caps_ht;
380
+
381
+ #ifndef SECURE_NAMES
382
+ if (debug_x_ht_level >= 20)
383
+ tprintf ("Mode20:L: Trial XHT:%f CAP:%f\n",
384
+ est_x_ht, est_caps_ht);
385
+ #endif
386
+ }
387
+ /*
388
+ If the top of the word is nowhere near where we expect ascenders to be
389
+ (less than half the x_ht -> caps_ht distance) - suspect an all caps word
390
+ at the x-ht. Estimate x-ht accordingly - but only as a TRIAL!
391
+ NOTE we do NOT check location of baseline. Commas can descend as much as
392
+ real descenders so we would need to do something to make sure that any
393
+ disqualifying descenders were not at the end.
394
+ */
395
+ else {
396
+ if (max_blob_ht <
397
+ (bln_x_height + bln_x_height / x_ht_fraction_of_caps_ht) / 2.0) {
398
+ trial = TRUE;
399
+ est_x_ht = x_ht_fraction_of_caps_ht * max_blob_ht;
400
+ est_caps_ht = max_blob_ht;
401
+
402
+ #ifndef SECURE_NAMES
403
+ if (debug_x_ht_level >= 20)
404
+ tprintf ("Mode20:M: Trial XHT:%f CAP:%f\n",
405
+ est_x_ht, est_caps_ht);
406
+ #endif
407
+ }
408
+ else {
409
+ no_comment = TRUE;
410
+ if (debug_x_ht_level >= 20)
411
+ tprintf ("Mode20:N: No comment as nothing else matched\n");
412
+ }
413
+ }
414
+ }
415
+
416
+ /* Sanity check - reject word if fails */
417
+
418
+ if (!no_comment &&
419
+ ((est_x_ht > 2 * bln_x_height) ||
420
+ (est_x_ht / word_res->denorm.scale () <= min_sane_x_ht_pixels) ||
421
+ (est_caps_ht <= est_x_ht) || (est_caps_ht >= 2.5 * est_x_ht))) {
422
+ no_comment = TRUE;
423
+ if (!trial && rej_use_xht) {
424
+ if (debug_x_ht_level >= 2) {
425
+ tprintf ("Sanity check rejecting %s ", word_str);
426
+ word_res->reject_map.print (debug_fp);
427
+ tprintf ("\n");
428
+ }
429
+ word_res->reject_map.rej_word_xht_fixup ();
430
+
431
+ }
432
+ if (debug_x_ht_level >= 20)
433
+ tprintf ("Mode20:O: No comment as nothing else matched\n");
434
+ }
435
+
436
+ if (no_comment || trial) {
437
+ word_res->x_height = bln_x_height / word_res->denorm.scale ();
438
+ word_res->guessed_x_ht = TRUE;
439
+ word_res->caps_height = (bln_x_height / x_ht_fraction_of_caps_ht) /
440
+ word_res->denorm.scale ();
441
+ word_res->guessed_caps_ht = TRUE;
442
+ /*
443
+ Reject ambigs in the current word if we are uncertain and:
444
+ there are rejects OR
445
+ there is only one char which is an ambig OR
446
+ there is conflict between the case of the ambigs even though there is
447
+ no height separation Eg "Ms" recognised from "MS"
448
+ */
449
+ if (rej_trial_ambigs &&
450
+ ((word_res->reject_map.reject_count () > 0) ||
451
+ (word_res->reject_map.length () == 1) ||
452
+ ((x_ht_ambigs > 0) && (caps_ht_ambigs > 0)))) {
453
+ #ifndef SECURE_NAMES
454
+ if (debug_x_ht_level >= 2) {
455
+ tprintf ("TRIAL Rej Ambigs %s ", word_str);
456
+ word_res->reject_map.print (debug_fp);
457
+ }
458
+ #endif
459
+ reject_ambigs(word_res);
460
+ if (debug_x_ht_level >= 2) {
461
+ tprintf (" ");
462
+ word_res->reject_map.print (debug_fp);
463
+ tprintf ("\n");
464
+ }
465
+ }
466
+ }
467
+ else {
468
+ word_res->x_height = est_x_ht / word_res->denorm.scale ();
469
+ word_res->guessed_x_ht = !est_x_ht_certain;
470
+ word_res->caps_height = est_caps_ht / word_res->denorm.scale ();
471
+ word_res->guessed_caps_ht = !est_caps_ht_certain;
472
+ }
473
+
474
+ if (!no_comment && (fabs (est_x_ht - bln_x_height) > x_ht_ok_variation))
475
+ *trial_x_ht = est_x_ht / word_res->denorm.scale ();
476
+ else
477
+ *trial_x_ht = 0.0;
478
+
479
+ #ifndef SECURE_NAMES
480
+ if (((*trial_x_ht > 0) && (debug_x_ht_level >= 3)) ||
481
+ (debug_x_ht_level >= 5)) {
482
+ tprintf ("%s ", word_str);
483
+ word_res->reject_map.print (debug_fp);
484
+ tprintf
485
+ (" X:%0.2f Cps:%0.2f Mxht:%0.2f RJ MxHt:%d MxAr:%d Rematch:%c\n",
486
+ est_x_ht, est_caps_ht, max_blob_ht, rej_blobs_max_height,
487
+ rej_blobs_max_area, *trial_x_ht > 0 ? '*' : ' ');
488
+ }
489
+ #endif
490
+
491
+ }
492
+
493
+
494
+ /*************************************************************************
495
+ * check_block_occ()
496
+ * Checks word for coarse block occupancy, rejecting more chars and flipping
497
+ * case of case ambiguous chars as required.
498
+ *************************************************************************/
499
+
500
+ void check_block_occ(WERD_RES *word_res) {
501
+ PBLOB_IT blob_it;
502
+ STRING new_string;
503
+ STRING new_string_lengths(word_res->best_choice->lengths());
504
+ // char new_string_lengths[word_res->best_choice->lengths().length() + 1];
505
+ REJMAP new_map = word_res->reject_map;
506
+ WERD_CHOICE *new_choice;
507
+
508
+ const char *word_str = word_res->best_choice->string ().string ();
509
+ inT16 i;
510
+ inT16 offset;
511
+ inT16 reject_count = 0;
512
+ char confirmed_char[UNICHAR_LEN + 1];
513
+ char temp_char[UNICHAR_LEN + 1];
514
+ float x_ht;
515
+ float caps_ht;
516
+
517
+ new_string_lengths[0] = 0;
518
+
519
+ if (word_res->x_height > 0)
520
+ x_ht = word_res->x_height * word_res->denorm.scale ();
521
+ else
522
+ x_ht = bln_x_height;
523
+
524
+ if (word_res->caps_height > 0)
525
+ caps_ht = word_res->caps_height * word_res->denorm.scale ();
526
+ else
527
+ caps_ht = x_ht / x_ht_fraction_of_caps_ht;
528
+
529
+ blob_it.set_to_list (word_res->outword->blob_list ());
530
+
531
+ for (blob_it.mark_cycle_pt (), i = 0, offset = 0;
532
+ !blob_it.cycled_list (); blob_it.forward (),
533
+ offset += word_res->best_choice->lengths()[i++]) {
534
+ strncpy(temp_char, word_str + offset,
535
+ word_res->best_choice->lengths()[i]); //default copy
536
+ temp_char[word_res->best_choice->lengths()[i]] = '\0';
537
+ if (word_res->reject_map[i].accepted ()) {
538
+ check_blob_occ (temp_char,
539
+ blob_it.data ()->bounding_box ().
540
+ top () - bln_baseline_offset, x_ht,
541
+ caps_ht, confirmed_char);
542
+
543
+ if (strcmp(confirmed_char, "") == 0) {
544
+ if (rej_use_check_block_occ) {
545
+ new_map[i].setrej_xht_fixup ();
546
+ reject_count++;
547
+ }
548
+ }
549
+ else
550
+ strcpy(temp_char, confirmed_char);
551
+ }
552
+ new_string += temp_char;
553
+ new_string_lengths[i] = strlen(temp_char);
554
+ new_string_lengths[i + 1] = 0;
555
+
556
+ }
557
+ if ((reject_count > 0) || (new_string != word_str)) {
558
+ if (debug_x_ht_level >= 2) {
559
+ tprintf ("Shape Verification: %s ", word_str);
560
+ word_res->reject_map.print (debug_fp);
561
+ tprintf (" -> %s ", new_string.string ());
562
+ new_map.print (debug_fp);
563
+ tprintf ("\n");
564
+ }
565
+ new_choice = new WERD_CHOICE (new_string.string (),
566
+ new_string_lengths.string(),
567
+ word_res->best_choice->rating (),
568
+ word_res->best_choice->certainty (),
569
+ word_res->best_choice->permuter ());
570
+ delete word_res->best_choice;
571
+ word_res->best_choice = new_choice;
572
+ word_res->reject_map = new_map;
573
+ }
574
+ }
575
+
576
+
577
+ /*************************************************************************
578
+ * check_blob_occ()
579
+ *
580
+ * Checks blob for position relative to position above baseline
581
+ * Return 0 for reject, or (possibly case shifted) confirmed char
582
+ *************************************************************************/
583
+
584
+ void check_blob_occ(char* proposed_char,
585
+ inT16 blob_ht_above_baseline,
586
+ float x_ht,
587
+ float caps_ht,
588
+ char* confirmed_char) {
589
+ BOOL8 blob_definite_x_ht;
590
+ BOOL8 blob_definite_caps_ht;
591
+ float acceptable_variation;
592
+
593
+ acceptable_variation = (caps_ht - x_ht) * x_ht_variation;
594
+ /* ??? REJECT if expected descender and nothing significantly below BL */
595
+
596
+ /* ??? REJECT if expected ascender and nothing significantly above x-ht */
597
+
598
+ /*
599
+ IF AMBIG_CAPS_X_CHS
600
+ IF blob is definitely an ascender ( > xht + xht err )AND
601
+ char is an x-ht char
602
+ THEN
603
+ flip case
604
+ IF blob is defintiely an x-ht ( <= xht + xht err ) AND
605
+ char is an ascender char
606
+ THEN
607
+ flip case
608
+ */
609
+ blob_definite_x_ht = blob_ht_above_baseline <= x_ht + acceptable_variation;
610
+ blob_definite_caps_ht = blob_ht_above_baseline >=
611
+ caps_ht - acceptable_variation;
612
+
613
+ if (STRING (chs_ambig_caps_x).contains (*proposed_char)) {
614
+ if ((!blob_definite_x_ht && !blob_definite_caps_ht) ||
615
+ ((strcmp(proposed_char, "0") == 0) && !blob_definite_caps_ht) ||
616
+ ((strcmp(proposed_char, "o") == 0) && !blob_definite_x_ht)) {
617
+ strcpy(confirmed_char, "");
618
+ return;
619
+ }
620
+
621
+ else if (blob_definite_caps_ht &&
622
+ STRING (chs_x_ht).contains (*proposed_char)) {
623
+ if (x_ht_case_flip) {
624
+ //flip to upper case
625
+ proposed_char[0] = (char) toupper (*proposed_char);
626
+ return;
627
+ } else {
628
+ strcpy(confirmed_char, "");
629
+ return;
630
+ }
631
+ }
632
+
633
+ else if (blob_definite_x_ht &&
634
+ !STRING (chs_x_ht).contains (*proposed_char)) {
635
+ if (x_ht_case_flip) {
636
+ //flip to lower case
637
+ proposed_char[0] = (char) tolower (*proposed_char);
638
+ } else {
639
+ strcpy(confirmed_char, "");
640
+ return;
641
+ }
642
+ }
643
+ }
644
+ else
645
+ if ((STRING (chs_non_ambig_x_ht).contains (*proposed_char)
646
+ && !blob_definite_x_ht)
647
+ || (STRING (chs_non_ambig_caps_ht).contains (*proposed_char)
648
+ && !blob_definite_caps_ht)) {
649
+ strcpy(confirmed_char, "");
650
+ return;
651
+ }
652
+ strcpy(confirmed_char, proposed_char);
653
+ return;
654
+ }
655
+
656
+
657
+ float estimate_from_stats(STATS &stats) {
658
+ if (stats.get_total () <= 0)
659
+ return 0.0;
660
+ else if (stats.get_total () >= 3)
661
+ return stats.ile (0.5); //median
662
+ else
663
+ return stats.mean ();
664
+ }
665
+
666
+
667
+ void improve_estimate(WERD_RES *word_res,
668
+ float &est_x_ht,
669
+ float &est_caps_ht,
670
+ STATS &x_ht,
671
+ STATS &caps_ht) {
672
+ PBLOB_IT blob_it;
673
+ inT16 blob_ht_above_baseline;
674
+
675
+ const char *word_str;
676
+ inT16 i;
677
+ inT16 offset;
678
+ TBOX blob_box; //blob bounding box
679
+ char confirmed_char[UNICHAR_LEN + 1];
680
+ char temp_char[UNICHAR_LEN + 1];
681
+ float new_val;
682
+
683
+ /* IMPROVE estimates here - if good estimates, and case ambig chars,
684
+ rescan blobs to fix case ambig blobs, re-estimate hts ??? maybe always do
685
+ it after deciding x-height
686
+ */
687
+
688
+ blob_it.set_to_list (word_res->outword->blob_list ());
689
+ word_str = word_res->best_choice->string ().string ();
690
+ for (blob_it.mark_cycle_pt (), i = 0, offset = 0;
691
+ !blob_it.cycled_list (); blob_it.forward (),
692
+ offset += word_res->best_choice->lengths()[i++]) {
693
+ if ((STRING (chs_ambig_caps_x).contains (word_str[offset])) &&
694
+ (!dodgy_blob (blob_it.data ()))) {
695
+ blob_box = blob_it.data ()->bounding_box ();
696
+ blob_ht_above_baseline = blob_box.top () - bln_baseline_offset;
697
+ strncpy(temp_char, word_str + offset,
698
+ word_res->best_choice->lengths()[i]);
699
+ temp_char[word_res->best_choice->lengths()[i]] = '\0';
700
+ check_blob_occ (temp_char,
701
+ blob_ht_above_baseline,
702
+ est_x_ht, est_caps_ht, confirmed_char);
703
+ if (strcmp(confirmed_char, "") != 0) {
704
+ if (STRING (chs_x_ht).contains (*confirmed_char))
705
+ x_ht.add (blob_ht_above_baseline, 1);
706
+ else
707
+ caps_ht.add (blob_ht_above_baseline, 1);
708
+ }
709
+ }
710
+ }
711
+ new_val = estimate_from_stats (x_ht);
712
+ if (new_val > 0)
713
+ est_x_ht = new_val;
714
+ new_val = estimate_from_stats (caps_ht);
715
+ if (new_val > 0)
716
+ est_caps_ht = new_val;
717
+ }
718
+
719
+
720
+ void reject_ambigs( //rej any accepted xht ambig chars
721
+ WERD_RES *word) {
722
+ const char *word_str;
723
+ int i = 0;
724
+
725
+ word_str = word->best_choice->string ().string ();
726
+ while (*word_str != '\0') {
727
+ if (STRING (chs_ambig_caps_x).contains (*word_str))
728
+ word->reject_map[i].setrej_xht_fixup ();
729
+ word_str += word->best_choice->lengths()[i++];
730
+ }
731
+ }
732
+
733
+
734
+ void est_ambigs( //xht ambig ht stats
735
+ WERD_RES *word_res,
736
+ STATS &stats,
737
+ float *ambig_lc_x_est, //xht est
738
+ float *ambig_uc_caps_est //caps est
739
+ ) {
740
+ float x_ht_ok_variation;
741
+ STATS short_ambigs (0, 300);
742
+ STATS tall_ambigs (0, 300);
743
+ PBLOB_IT blob_it;
744
+ TBOX blob_box; //blob bounding box
745
+ inT16 blob_ht_above_baseline;
746
+
747
+ const char *word_str;
748
+ inT16 i;
749
+ inT16 offset;
750
+ float min; //min ambig ch ht
751
+ float max; //max ambig ch ht
752
+ float short_limit; // for lower case
753
+ float tall_limit; // for upper case
754
+
755
+ x_ht_ok_variation =
756
+ (bln_x_height / x_ht_fraction_of_caps_ht - bln_x_height) * x_ht_variation;
757
+
758
+ if (stats.get_total () == 0) {
759
+ *ambig_lc_x_est = 0;
760
+ *ambig_uc_caps_est = 0;
761
+ }
762
+ else {
763
+ min = stats.ile (0.0);
764
+ max = stats.ile (0.99999);
765
+ if ((max - min) < x_ht_ok_variation) {
766
+ *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean ();
767
+ //close enough
768
+ }
769
+ else {
770
+ /* Try reclustering into lower and upper case chars */
771
+ short_limit = min + (max - min) * x_ht_variation;
772
+ tall_limit = max - (max - min) * x_ht_variation;
773
+ word_str = word_res->best_choice->string ().string ();
774
+ blob_it.set_to_list (word_res->outword->blob_list ());
775
+ for (blob_it.mark_cycle_pt (), i = 0, offset = 0;
776
+ !blob_it.cycled_list (); blob_it.forward (),
777
+ offset += word_res->best_choice->lengths()[i++]) {
778
+ if (word_res->reject_map[i].accepted () &&
779
+ STRING (chs_ambig_caps_x).contains (word_str[offset]) &&
780
+ (!dodgy_blob (blob_it.data ()))) {
781
+ blob_box = blob_it.data ()->bounding_box ();
782
+ blob_ht_above_baseline =
783
+ blob_box.top () - bln_baseline_offset;
784
+ if (blob_ht_above_baseline <= short_limit)
785
+ short_ambigs.add (blob_ht_above_baseline, 1);
786
+ else if (blob_ht_above_baseline >= tall_limit)
787
+ tall_ambigs.add (blob_ht_above_baseline, 1);
788
+ }
789
+ }
790
+ *ambig_lc_x_est = short_ambigs.mean ();
791
+ *ambig_uc_caps_est = tall_ambigs.mean ();
792
+ /* Cop out if we havent got sensible clusters. */
793
+ if (*ambig_uc_caps_est - *ambig_lc_x_est <= x_ht_ok_variation)
794
+ *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean ();
795
+ //close enough
796
+ }
797
+ }
798
+ }
799
+
800
+
801
+ /*************************************************************************
802
+ * dodgy_blob()
803
+ * Returns true if the blob has more than one outline, one above the other.
804
+ * These are dodgy as the top blob could be noise, causing the bounding box xht
805
+ * to be misleading
806
+ *************************************************************************/
807
+
808
+ BOOL8 dodgy_blob(PBLOB *blob) {
809
+ OUTLINE_IT outline_it = blob->out_list ();
810
+ inT16 highest_bottom = -MAX_INT16;
811
+ inT16 lowest_top = MAX_INT16;
812
+ TBOX outline_box;
813
+
814
+ if (x_ht_include_dodgy_blobs)
815
+ return FALSE; //no blob is ever dodgy
816
+ for (outline_it.mark_cycle_pt ();
817
+ !outline_it.cycled_list (); outline_it.forward ()) {
818
+ outline_box = outline_it.data ()->bounding_box ();
819
+ if (lowest_top > outline_box.top ())
820
+ lowest_top = outline_box.top ();
821
+ if (highest_bottom < outline_box.bottom ())
822
+ highest_bottom = outline_box.bottom ();
823
+ }
824
+ return highest_bottom >= lowest_top;
825
+ }