tesseract_bin 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,989 @@
1
+ /******************************************************************
2
+ * File: fixspace.cpp (Formerly fixspace.c)
3
+ * Description: Implements a pass over the page res, exploring the alternative
4
+ * spacing possibilities, trying to use context to improve the
5
+ word spacing
6
+ * Author: Phil Cheatle
7
+ * Created: Thu Oct 21 11:38:43 BST 1993
8
+ *
9
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
10
+ ** Licensed under the Apache License, Version 2.0 (the "License");
11
+ ** you may not use this file except in compliance with the License.
12
+ ** You may obtain a copy of the License at
13
+ ** http://www.apache.org/licenses/LICENSE-2.0
14
+ ** Unless required by applicable law or agreed to in writing, software
15
+ ** distributed under the License is distributed on an "AS IS" BASIS,
16
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ ** See the License for the specific language governing permissions and
18
+ ** limitations under the License.
19
+ *
20
+ **********************************************************************/
21
+
22
+ #include "mfcpch.h"
23
+ #include <ctype.h>
24
+ #include "reject.h"
25
+ #include "statistc.h"
26
+ #include "genblob.h"
27
+ #include "control.h"
28
+ #include "fixspace.h"
29
+ #include "tessvars.h"
30
+ #include "tessbox.h"
31
+ #include "secname.h"
32
+ #include "globals.h"
33
+
34
+ #define EXTERN
35
+
36
+ EXTERN BOOL_VAR (fixsp_check_for_fp_noise_space, TRUE,
37
+ "Try turning noise to space in fixed pitch");
38
+ EXTERN BOOL_VAR (fixsp_fp_eval, TRUE, "Use alternate evaluation for fp");
39
+ EXTERN BOOL_VAR (fixsp_noise_score_fixing, TRUE, "More sophisticated?");
40
+ EXTERN INT_VAR (fixsp_non_noise_limit, 1,
41
+ "How many non-noise blbs either side?");
42
+ EXTERN double_VAR (fixsp_small_outlines_size, 0.28, "Small if lt xht x this");
43
+
44
+ EXTERN BOOL_VAR (fixsp_ignore_punct, TRUE, "In uniform spacing calc");
45
+ EXTERN BOOL_VAR (fixsp_numeric_fix, TRUE, "Try to deal with numeric punct");
46
+ EXTERN BOOL_VAR (fixsp_prefer_joined_1s, TRUE, "Arbitrary boost");
47
+ EXTERN BOOL_VAR (tessedit_test_uniform_wd_spacing, FALSE,
48
+ "Limit context word spacing");
49
+ EXTERN BOOL_VAR (tessedit_prefer_joined_punct, FALSE,
50
+ "Reward punctation joins");
51
+ EXTERN INT_VAR (fixsp_done_mode, 1, "What constitues done for spacing");
52
+ EXTERN INT_VAR (debug_fix_space_level, 0, "Contextual fixspace debug");
53
+ EXTERN STRING_VAR (numeric_punctuation, ".,",
54
+ "Punct. chs expected WITHIN numbers");
55
+
56
+ #define PERFECT_WERDS 999
57
+ #define MAXSPACING 128 /*max expected spacing in pix */
58
+
59
+ /*************************************************************************
60
+ * fix_fuzzy_spaces()
61
+ * Walk over the page finding sequences of words joined by fuzzy spaces. Extract
62
+ * them as a sublist, process the sublist to find the optimal arrangement of
63
+ * spaces then replace the sublist in the ROW_RES.
64
+ *************************************************************************/
65
+
66
+ void fix_fuzzy_spaces( //find fuzzy words
67
+ volatile ETEXT_DESC *monitor, //progress monitor
68
+ inT32 word_count, //count of words in doc
69
+ PAGE_RES *page_res) {
70
+ BLOCK_RES_IT block_res_it; //iterators
71
+ ROW_RES_IT row_res_it;
72
+ WERD_RES_IT word_res_it_from;
73
+ WERD_RES_IT word_res_it_to;
74
+ WERD_RES *word_res;
75
+ WERD_RES_LIST fuzzy_space_words;
76
+ inT16 new_length;
77
+ BOOL8 prevent_null_wd_fixsp; //DONT process blobless wds
78
+ inT32 word_index; //current word
79
+
80
+ block_res_it.set_to_list (&page_res->block_res_list);
81
+ word_index = 0;
82
+ for (block_res_it.mark_cycle_pt ();
83
+ !block_res_it.cycled_list (); block_res_it.forward ()) {
84
+ row_res_it.set_to_list (&block_res_it.data ()->row_res_list);
85
+ for (row_res_it.mark_cycle_pt ();
86
+ !row_res_it.cycled_list (); row_res_it.forward ()) {
87
+ word_res_it_from.set_to_list (&row_res_it.data ()->word_res_list);
88
+ while (!word_res_it_from.at_last ()) {
89
+ word_res = word_res_it_from.data ();
90
+ while (!word_res_it_from.at_last () &&
91
+ !(word_res->combination ||
92
+ word_res_it_from.data_relative (1)->
93
+ word->flag (W_FUZZY_NON) ||
94
+ word_res_it_from.data_relative (1)->
95
+ word->flag (W_FUZZY_SP))) {
96
+ fix_sp_fp_word (word_res_it_from, row_res_it.data ()->row);
97
+ word_res = word_res_it_from.forward ();
98
+ word_index++;
99
+ if (monitor != NULL) {
100
+ monitor->ocr_alive = TRUE;
101
+ monitor->progress = 90 + 5 * word_index / word_count;
102
+ }
103
+ }
104
+
105
+ if (!word_res_it_from.at_last ()) {
106
+ word_res_it_to = word_res_it_from;
107
+ prevent_null_wd_fixsp =
108
+ word_res->word->gblob_list ()->empty ();
109
+ if (check_debug_pt (word_res, 60))
110
+ debug_fix_space_level.set_value (10);
111
+ word_res_it_to.forward ();
112
+ word_index++;
113
+ if (monitor != NULL) {
114
+ monitor->ocr_alive = TRUE;
115
+ monitor->progress = 90 + 5 * word_index / word_count;
116
+ }
117
+ while (!word_res_it_to.at_last () &&
118
+ (word_res_it_to.data_relative (1)->
119
+ word->flag (W_FUZZY_NON) ||
120
+ word_res_it_to.data_relative (1)->
121
+ word->flag (W_FUZZY_SP))) {
122
+ if (check_debug_pt (word_res, 60))
123
+ debug_fix_space_level.set_value (10);
124
+ if (word_res->word->gblob_list ()->empty ())
125
+ prevent_null_wd_fixsp = TRUE;
126
+ word_res = word_res_it_to.forward ();
127
+ }
128
+ if (check_debug_pt (word_res, 60))
129
+ debug_fix_space_level.set_value (10);
130
+ if (word_res->word->gblob_list ()->empty ())
131
+ prevent_null_wd_fixsp = TRUE;
132
+ if (prevent_null_wd_fixsp)
133
+ word_res_it_from = word_res_it_to;
134
+ else {
135
+ fuzzy_space_words.assign_to_sublist (&word_res_it_from,
136
+ &word_res_it_to);
137
+ fix_fuzzy_space_list (fuzzy_space_words,
138
+ row_res_it.data ()->row);
139
+ new_length = fuzzy_space_words.length ();
140
+ word_res_it_from.add_list_before (&fuzzy_space_words);
141
+ for (;
142
+ (!word_res_it_from.at_last () &&
143
+ (new_length > 0)); new_length--) {
144
+ word_res_it_from.forward ();
145
+ }
146
+ }
147
+ if (test_pt)
148
+ debug_fix_space_level.set_value (0);
149
+ }
150
+ fix_sp_fp_word (word_res_it_from, row_res_it.data ()->row);
151
+ //Last word in row
152
+ }
153
+ }
154
+ }
155
+ }
156
+
157
+
158
+ void fix_fuzzy_space_list( //space explorer
159
+ WERD_RES_LIST &best_perm,
160
+ ROW *row) {
161
+ inT16 best_score;
162
+ WERD_RES_LIST current_perm;
163
+ inT16 current_score;
164
+ BOOL8 improved = FALSE;
165
+
166
+ //default score
167
+ best_score = eval_word_spacing (best_perm);
168
+
169
+ dump_words (best_perm, best_score, 1, improved);
170
+
171
+ if (best_score != PERFECT_WERDS)
172
+ initialise_search(best_perm, current_perm);
173
+
174
+ while ((best_score != PERFECT_WERDS) && !current_perm.empty ()) {
175
+ match_current_words(current_perm, row);
176
+ current_score = eval_word_spacing (current_perm);
177
+ dump_words (current_perm, current_score, 2, improved);
178
+ if (current_score > best_score) {
179
+ best_perm.clear();
180
+ best_perm.deep_copy(&current_perm, &WERD_RES::deep_copy);
181
+ best_score = current_score;
182
+ improved = TRUE;
183
+ }
184
+ if (current_score < PERFECT_WERDS)
185
+ transform_to_next_perm(current_perm);
186
+ }
187
+ dump_words (best_perm, best_score, 3, improved);
188
+ }
189
+
190
+
191
+ void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
192
+ WERD_RES_IT src_it(&src_list);
193
+ WERD_RES_IT new_it(&new_list);
194
+ WERD_RES *src_wd;
195
+ WERD_RES *new_wd;
196
+
197
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
198
+ src_wd = src_it.data ();
199
+ if (!src_wd->combination) {
200
+ new_wd = new WERD_RES (*src_wd);
201
+ new_wd->combination = FALSE;
202
+ new_wd->part_of_combo = FALSE;
203
+ new_it.add_after_then_move (new_wd);
204
+ }
205
+ }
206
+ }
207
+
208
+
209
+ void match_current_words(WERD_RES_LIST &words, ROW *row) {
210
+ WERD_RES_IT word_it(&words);
211
+ WERD_RES *word;
212
+
213
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
214
+ word = word_it.data ();
215
+ if ((!word->part_of_combo) && (word->outword == NULL))
216
+ classify_word_pass2(word, row);
217
+ }
218
+ }
219
+
220
+
221
+ /*************************************************************************
222
+ * eval_word_spacing()
223
+ * The basic measure is the number of characters in contextually confirmed
224
+ * words. (I.e the word is done)
225
+ * If all words are contextually confirmed the evaluation is deemed perfect.
226
+ *
227
+ * Some fiddles are done to handle "1"s as these are VERY frequent causes of
228
+ * fuzzy spaces. The problem with the basic measure is that "561 63" would score
229
+ * the same as "56163", though given our knowledge that the space is fuzzy, and
230
+ * that there is a "1" next to the fuzzy space, we need to ensure that "56163"
231
+ * is prefered.
232
+ *
233
+ * The solution is to NOT COUNT the score of any word which has a digit at one
234
+ * end and a "1Il" as the character the other side of the space.
235
+ *
236
+ * Conversly, any character next to a "1" within a word is counted as a positive
237
+ * score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of
238
+ * the "1" joined). "56163" would score 7 - all chars in a numeric word + 2
239
+ * sides of a "1" joined.
240
+ *
241
+ * The joined 1 rule is applied to any word REGARDLESS of contextual
242
+ * confirmation. Thus "PS7a71 3/7a" scores 1 (neither word is contexutally
243
+ * confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2.
244
+ *
245
+ *************************************************************************/
246
+ inT16 eval_word_spacing(WERD_RES_LIST &word_res_list) {
247
+ WERD_RES_IT word_res_it(&word_res_list);
248
+ inT16 total_score = 0;
249
+ inT16 word_count = 0;
250
+ inT16 done_word_count = 0;
251
+ inT16 word_len;
252
+ inT16 i;
253
+ inT16 offset;
254
+ WERD_RES *word; //current word
255
+ inT16 prev_word_score = 0;
256
+ BOOL8 prev_word_done = FALSE;
257
+ BOOL8 prev_char_1 = FALSE; //prev ch a "1/I/l"?
258
+ BOOL8 prev_char_digit = FALSE; //prev ch 2..9 or 0
259
+ BOOL8 current_char_1 = FALSE;
260
+ BOOL8 current_word_ok_so_far;
261
+ STRING punct_chars = "!\"`',.:;";
262
+ BOOL8 prev_char_punct = FALSE;
263
+ BOOL8 current_char_punct = FALSE;
264
+ BOOL8 word_done = FALSE;
265
+
266
+ do {
267
+ word = word_res_it.data ();
268
+ word_done = fixspace_thinks_word_done (word);
269
+ word_count++;
270
+ if (word->tess_failed) {
271
+ total_score += prev_word_score;
272
+ if (prev_word_done)
273
+ done_word_count++;
274
+ prev_word_score = 0;
275
+ prev_char_1 = FALSE;
276
+ prev_char_digit = FALSE;
277
+ prev_word_done = FALSE;
278
+ }
279
+ else {
280
+ /*
281
+ Can we add the prev word score and potentially count this word?
282
+ Yes IF it didnt end in a 1 when the first char of this word is a digit
283
+ AND it didnt end in a digit when the first char of this word is a 1
284
+ */
285
+ word_len = word->reject_map.length ();
286
+ current_word_ok_so_far = FALSE;
287
+ if (!((prev_char_1 &&
288
+ digit_or_numeric_punct (word, 0)) ||
289
+ (prev_char_digit &&
290
+ ((word_done &&
291
+ (word->best_choice->lengths().string()[0] == 1 &&
292
+ word->best_choice->string ()[0] == '1')) ||
293
+ (!word_done &&
294
+ STRING (conflict_set_I_l_1).contains (word->best_choice->
295
+ string ()[0])))))) {
296
+ total_score += prev_word_score;
297
+ if (prev_word_done)
298
+ done_word_count++;
299
+ current_word_ok_so_far = word_done;
300
+ }
301
+
302
+ if ((current_word_ok_so_far) &&
303
+ (!tessedit_test_uniform_wd_spacing ||
304
+ ((word->best_choice->permuter () == NUMBER_PERM) ||
305
+ uniformly_spaced (word)))) {
306
+ prev_word_done = TRUE;
307
+ prev_word_score = word_len;
308
+ }
309
+ else {
310
+ prev_word_done = FALSE;
311
+ prev_word_score = 0;
312
+ }
313
+
314
+ if (fixsp_prefer_joined_1s) {
315
+ /* Add 1 to total score for every joined 1 regardless of context and
316
+ rejtn */
317
+
318
+ for (i = 0, prev_char_1 = FALSE; i < word_len; i++) {
319
+ current_char_1 = word->best_choice->string ()[i] == '1';
320
+ if (prev_char_1 || (current_char_1 && (i > 0)))
321
+ total_score++;
322
+ prev_char_1 = current_char_1;
323
+ }
324
+ }
325
+
326
+ /* Add 1 to total score for every joined punctuation regardless of context
327
+ and rejtn */
328
+ if (tessedit_prefer_joined_punct) {
329
+ for (i = 0, offset = 0, prev_char_punct = FALSE; i < word_len;
330
+ offset += word->best_choice->lengths()[i++]) {
331
+ current_char_punct =
332
+ punct_chars.contains (word->best_choice->string ()[offset]);
333
+ if (prev_char_punct || (current_char_punct && (i > 0)))
334
+ total_score++;
335
+ prev_char_punct = current_char_punct;
336
+ }
337
+ }
338
+ prev_char_digit = digit_or_numeric_punct (word, word_len - 1);
339
+ for (i = 0, offset = 0; i < word_len - 1;
340
+ offset += word->best_choice->lengths()[i++]);
341
+ prev_char_1 =
342
+ ((word_done
343
+ && (word->best_choice->string ()[offset] == '1'))
344
+ || (!word_done
345
+ && STRING (conflict_set_I_l_1).contains (word->best_choice->
346
+ string ()[offset])));
347
+ }
348
+ /* Find next word */
349
+ do
350
+ word_res_it.forward ();
351
+ while (word_res_it.data ()->part_of_combo);
352
+ }
353
+ while (!word_res_it.at_first ());
354
+ total_score += prev_word_score;
355
+ if (prev_word_done)
356
+ done_word_count++;
357
+ if (done_word_count == word_count)
358
+ return PERFECT_WERDS;
359
+ else
360
+ return total_score;
361
+ }
362
+
363
+
364
+ BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position) {
365
+ int i;
366
+ int offset;
367
+
368
+ for (i = 0, offset = 0; i < char_position;
369
+ offset += word->best_choice->lengths()[i++]);
370
+ return (unicharset.get_isdigit(word->best_choice->string().string() + offset,
371
+ word->best_choice->lengths()[i]) ||
372
+ (fixsp_numeric_fix &&
373
+ (word->best_choice->permuter () == NUMBER_PERM) &&
374
+ STRING (numeric_punctuation).contains
375
+ (word->best_choice->string().string()[offset])));
376
+ }
377
+
378
+
379
+ /*************************************************************************
380
+ * transform_to_next_perm()
381
+ * Examines the current word list to find the smallest word gap size. Then walks
382
+ * the word list closing any gaps of this size by either inserted new
383
+ * combination words, or extending existing ones.
384
+ *
385
+ * The routine COULD be limited to stop it building words longer than N blobs.
386
+ *
387
+ * If there are no more gaps then it DELETES the entire list and returns the
388
+ * empty list to cause termination.
389
+ *************************************************************************/
390
+ void transform_to_next_perm(WERD_RES_LIST &words) {
391
+ WERD_RES_IT word_it(&words);
392
+ WERD_RES_IT prev_word_it(&words);
393
+ WERD_RES *word;
394
+ WERD_RES *prev_word;
395
+ WERD_RES *combo;
396
+ WERD *copy_word;
397
+ inT16 prev_right = -1;
398
+ TBOX box;
399
+ inT16 gap;
400
+ inT16 min_gap = MAX_INT16;
401
+
402
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
403
+ word = word_it.data ();
404
+ if (!word->part_of_combo) {
405
+ box = word->word->bounding_box ();
406
+ if (prev_right >= 0) {
407
+ gap = box.left () - prev_right;
408
+ if (gap < min_gap)
409
+ min_gap = gap;
410
+ }
411
+ prev_right = box.right ();
412
+ }
413
+ }
414
+ if (min_gap < MAX_INT16) {
415
+ prev_right = -1; //back to start
416
+ word_it.set_to_list (&words);
417
+ for (; //cant use cycle pt due to inserted combos at start of list
418
+ (prev_right < 0) || !word_it.at_first (); word_it.forward ()) {
419
+ word = word_it.data ();
420
+ if (!word->part_of_combo) {
421
+ box = word->word->bounding_box ();
422
+ if (prev_right >= 0) {
423
+ gap = box.left () - prev_right;
424
+ if (gap <= min_gap) {
425
+ prev_word = prev_word_it.data ();
426
+ if (prev_word->combination)
427
+ combo = prev_word;
428
+ else {
429
+ /* Make a new combination and insert before the first word being joined */
430
+ copy_word = new WERD;
431
+ *copy_word = *(prev_word->word);
432
+ //deep copy
433
+ combo = new WERD_RES (copy_word);
434
+ combo->combination = TRUE;
435
+ combo->x_height = prev_word->x_height;
436
+ prev_word->part_of_combo = TRUE;
437
+ prev_word_it.add_before_then_move (combo);
438
+ }
439
+ combo->word->set_flag (W_EOL, word->word->flag (W_EOL));
440
+ if (word->combination) {
441
+ combo->word->join_on (word->word);
442
+ //Move blbs to combo
443
+ //old combo no longer needed
444
+ delete word_it.extract ();
445
+ }
446
+ else {
447
+ //Cpy current wd to combo
448
+ combo->copy_on (word);
449
+ word->part_of_combo = TRUE;
450
+ }
451
+ combo->done = FALSE;
452
+ if (combo->outword != NULL) {
453
+ delete combo->outword;
454
+ delete combo->best_choice;
455
+ delete combo->raw_choice;
456
+ combo->outword = NULL;
457
+ combo->best_choice = NULL;
458
+ combo->raw_choice = NULL;
459
+ }
460
+ }
461
+ else
462
+ //catch up
463
+ prev_word_it = word_it;
464
+ }
465
+ prev_right = box.right ();
466
+ }
467
+ }
468
+ }
469
+ else
470
+ words.clear (); //signal termination
471
+ }
472
+
473
+
474
+ void dump_words(WERD_RES_LIST &perm, inT16 score, inT16 mode, BOOL8 improved) {
475
+ WERD_RES_IT word_res_it(&perm);
476
+ static STRING initial_str;
477
+
478
+ if (debug_fix_space_level > 0) {
479
+ if (mode == 1) {
480
+ initial_str = "";
481
+ for (word_res_it.mark_cycle_pt ();
482
+ !word_res_it.cycled_list (); word_res_it.forward ()) {
483
+ if (!word_res_it.data ()->part_of_combo) {
484
+ initial_str += word_res_it.data ()->best_choice->string ();
485
+ initial_str += ' ';
486
+ }
487
+ }
488
+ }
489
+
490
+ #ifndef SECURE_NAMES
491
+ if (debug_fix_space_level > 1) {
492
+ switch (mode) {
493
+ case 1:
494
+ tprintf ("EXTRACTED (%d): \"", score);
495
+ break;
496
+ case 2:
497
+ tprintf ("TESTED (%d): \"", score);
498
+ break;
499
+ case 3:
500
+ tprintf ("RETURNED (%d): \"", score);
501
+ break;
502
+ }
503
+
504
+ for (word_res_it.mark_cycle_pt ();
505
+ !word_res_it.cycled_list (); word_res_it.forward ()) {
506
+ if (!word_res_it.data ()->part_of_combo)
507
+ tprintf("%s/%1d ",
508
+ word_res_it.data ()->best_choice->string ().
509
+ string (),
510
+ (int) word_res_it.data ()->best_choice->permuter ());
511
+ }
512
+ tprintf ("\"\n");
513
+ }
514
+ else if (improved) {
515
+ tprintf ("FIX SPACING \"%s\" => \"", initial_str.string ());
516
+ for (word_res_it.mark_cycle_pt ();
517
+ !word_res_it.cycled_list (); word_res_it.forward ()) {
518
+ if (!word_res_it.data ()->part_of_combo)
519
+ tprintf ("%s/%1d ",
520
+ word_res_it.data ()->best_choice->string ().
521
+ string (),
522
+ (int) word_res_it.data ()->best_choice->permuter ());
523
+ }
524
+ tprintf ("\"\n");
525
+ }
526
+ #endif
527
+ }
528
+ }
529
+
530
+
531
+ /*************************************************************************
532
+ * uniformly_spaced()
533
+ * Return true if one of the following are true:
534
+ * - All inter-char gaps are the same width
535
+ * - The largest gap is no larger than twice the mean/median of the others
536
+ * - The largest gap is < 64/5 = 13 and all others are <= 0
537
+ * **** REMEMBER - WE'RE NOW WORKING WITH A BLN WERD !!!
538
+ *************************************************************************/
539
+ BOOL8 uniformly_spaced( //sensible word
540
+ WERD_RES *word) {
541
+ PBLOB_IT blob_it;
542
+ TBOX box;
543
+ inT16 prev_right = -MAX_INT16;
544
+ inT16 gap;
545
+ inT16 max_gap = -MAX_INT16;
546
+ inT16 max_gap_count = 0;
547
+ STATS gap_stats (0, MAXSPACING);
548
+ BOOL8 result;
549
+ const ROW *row = word->denorm.row ();
550
+ float max_non_space;
551
+ float normalised_max_nonspace;
552
+ inT16 i = 0;
553
+ inT16 offset = 0;
554
+ STRING punct_chars = "\"`',.:;";
555
+
556
+ blob_it.set_to_list (word->outword->blob_list ());
557
+
558
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
559
+ box = blob_it.data ()->bounding_box ();
560
+ if ((prev_right > -MAX_INT16) &&
561
+ (!fixsp_ignore_punct ||
562
+ (!punct_chars.contains (word->best_choice->string ()
563
+ [offset - word->best_choice->lengths()[i - 1]]) &&
564
+ !punct_chars.contains (word->best_choice->string ()[offset])))) {
565
+ gap = box.left () - prev_right;
566
+ if (gap < max_gap)
567
+ gap_stats.add (gap, 1);
568
+ else if (gap == max_gap)
569
+ max_gap_count++;
570
+ else {
571
+ if (max_gap_count > 0)
572
+ gap_stats.add (max_gap, max_gap_count);
573
+ max_gap = gap;
574
+ max_gap_count = 1;
575
+ }
576
+ }
577
+ prev_right = box.right ();
578
+ offset += word->best_choice->lengths()[i++];
579
+ }
580
+
581
+ max_non_space = (row->space () + 3 * row->kern ()) / 4;
582
+ normalised_max_nonspace = max_non_space * bln_x_height / row->x_height ();
583
+
584
+ result = ((gap_stats.get_total () == 0) ||
585
+ (max_gap <= normalised_max_nonspace) ||
586
+ ((gap_stats.get_total () > 2) &&
587
+ (max_gap <= 2 * gap_stats.median ())) ||
588
+ ((gap_stats.get_total () <= 2) &&
589
+ (max_gap <= 2 * gap_stats.mean ())));
590
+ #ifndef SECURE_NAMES
591
+ if ((debug_fix_space_level > 1)) {
592
+ if (result)
593
+ tprintf
594
+ ("ACCEPT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n",
595
+ word->best_choice->string ().string (), normalised_max_nonspace,
596
+ max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (),
597
+ gap_stats.median ());
598
+ else
599
+ tprintf
600
+ ("REJECT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n",
601
+ word->best_choice->string ().string (), normalised_max_nonspace,
602
+ max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (),
603
+ gap_stats.median ());
604
+ }
605
+ #endif
606
+
607
+ return result;
608
+ }
609
+
610
+
611
+ BOOL8 fixspace_thinks_word_done(WERD_RES *word) {
612
+ if (word->done)
613
+ return TRUE;
614
+
615
+ /*
616
+ Use all the standard pass 2 conditions for mode 5 in set_done() in
617
+ reject.c BUT DONT REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DONT
618
+ CARE WHETHER WE HAVE of/at on/an etc.
619
+ */
620
+ if ((fixsp_done_mode > 0) &&
621
+ (word->tess_accepted ||
622
+ ((fixsp_done_mode == 2) &&
623
+ (word->reject_map.reject_count () == 0)) ||
624
+ (fixsp_done_mode == 3)) &&
625
+ (strchr (word->best_choice->string ().string (), ' ') == NULL) &&
626
+ ((word->best_choice->permuter () == SYSTEM_DAWG_PERM) ||
627
+ (word->best_choice->permuter () == FREQ_DAWG_PERM) ||
628
+ (word->best_choice->permuter () == USER_DAWG_PERM) ||
629
+ (word->best_choice->permuter () == NUMBER_PERM)))
630
+ return TRUE;
631
+ else
632
+ return FALSE;
633
+ }
634
+
635
+
636
+ /*************************************************************************
637
+ * fix_sp_fp_word()
638
+ * Test the current word to see if it can be split by deleting noise blobs. If
639
+ * so, do the buisiness.
640
+ * Return with the iterator pointing to the same place if the word is unchanged,
641
+ * or the last of the replacement words.
642
+ *************************************************************************/
643
+ void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row) {
644
+ WERD_RES *word_res;
645
+ WERD_RES_LIST sub_word_list;
646
+ WERD_RES_IT sub_word_list_it(&sub_word_list);
647
+ inT16 blob_index;
648
+ inT16 new_length;
649
+ float junk;
650
+
651
+ word_res = word_res_it.data ();
652
+ if (!fixsp_check_for_fp_noise_space ||
653
+ word_res->word->flag (W_REP_CHAR) ||
654
+ word_res->combination ||
655
+ word_res->part_of_combo || !word_res->word->flag (W_DONT_CHOP))
656
+ return;
657
+
658
+ blob_index = worst_noise_blob (word_res, &junk);
659
+ if (blob_index < 0)
660
+ return;
661
+
662
+ #ifndef SECURE_NAMES
663
+ if (debug_fix_space_level > 1) {
664
+ tprintf ("FP fixspace working on \"%s\"\n",
665
+ word_res->best_choice->string ().string ());
666
+ }
667
+ #endif
668
+ gblob_sort_list ((PBLOB_LIST *) word_res->word->rej_cblob_list (), FALSE);
669
+ sub_word_list_it.add_after_stay_put (word_res_it.extract ());
670
+ fix_noisy_space_list(sub_word_list, row);
671
+ new_length = sub_word_list.length ();
672
+ word_res_it.add_list_before (&sub_word_list);
673
+ for (; (!word_res_it.at_last () && (new_length > 1)); new_length--) {
674
+ word_res_it.forward ();
675
+ }
676
+ }
677
+
678
+
679
+ void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row) {
680
+ inT16 best_score;
681
+ WERD_RES_IT best_perm_it(&best_perm);
682
+ WERD_RES_LIST current_perm;
683
+ WERD_RES_IT current_perm_it(&current_perm);
684
+ WERD_RES *old_word_res;
685
+ WERD_RES *new_word_res;
686
+ inT16 current_score;
687
+ BOOL8 improved = FALSE;
688
+
689
+ //default score
690
+ best_score = fp_eval_word_spacing (best_perm);
691
+
692
+ dump_words (best_perm, best_score, 1, improved);
693
+
694
+ new_word_res = new WERD_RES;
695
+ old_word_res = best_perm_it.data ();
696
+ //Kludge to force deep copy
697
+ old_word_res->combination = TRUE;
698
+ *new_word_res = *old_word_res; //deep copy
699
+ //Undo kludge
700
+ old_word_res->combination = FALSE;
701
+ //Undo kludge
702
+ new_word_res->combination = FALSE;
703
+ current_perm_it.add_to_end (new_word_res);
704
+
705
+ break_noisiest_blob_word(current_perm);
706
+
707
+ while ((best_score != PERFECT_WERDS) && !current_perm.empty ()) {
708
+ match_current_words(current_perm, row);
709
+ current_score = fp_eval_word_spacing (current_perm);
710
+ dump_words (current_perm, current_score, 2, improved);
711
+ if (current_score > best_score) {
712
+ best_perm.clear();
713
+ best_perm.deep_copy(&current_perm, &WERD_RES::deep_copy);
714
+ best_score = current_score;
715
+ improved = TRUE;
716
+ }
717
+ if (current_score < PERFECT_WERDS)
718
+ break_noisiest_blob_word(current_perm);
719
+ }
720
+ dump_words (best_perm, best_score, 3, improved);
721
+ }
722
+
723
+
724
+ /*************************************************************************
725
+ * break_noisiest_blob_word()
726
+ * Find the word with the blob which looks like the worst noise.
727
+ * Break the word into two, deleting the noise blob.
728
+ *************************************************************************/
729
+ void break_noisiest_blob_word(WERD_RES_LIST &words) {
730
+ WERD_RES_IT word_it(&words);
731
+ WERD_RES_IT worst_word_it;
732
+ float worst_noise_score = 9999;
733
+ int worst_blob_index = -1; //noisiest blb of noisiest wd
734
+ int blob_index; //of wds noisiest blb
735
+ float noise_score; //of wds noisiest blb
736
+ WERD_RES *word_res;
737
+ C_BLOB_IT blob_it;
738
+ C_BLOB_IT rej_cblob_it;
739
+ C_BLOB_LIST new_blob_list;
740
+ C_BLOB_IT new_blob_it;
741
+ C_BLOB_IT new_rej_cblob_it;
742
+ WERD *new_word;
743
+ inT16 start_of_noise_blob;
744
+ inT16 i;
745
+
746
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
747
+ blob_index = worst_noise_blob (word_it.data (), &noise_score);
748
+ if ((blob_index > -1) && (worst_noise_score > noise_score)) {
749
+ worst_noise_score = noise_score;
750
+ worst_blob_index = blob_index;
751
+ worst_word_it = word_it;
752
+ }
753
+ }
754
+ if (worst_blob_index < 0) {
755
+ words.clear (); //signal termination
756
+ return;
757
+ }
758
+
759
+ /* Now split the worst_word_it */
760
+
761
+ word_res = worst_word_it.data ();
762
+
763
+ /* Move blobs before noise blob to a new bloblist */
764
+
765
+ new_blob_it.set_to_list (&new_blob_list);
766
+ blob_it.set_to_list (word_res->word->cblob_list ());
767
+ for (i = 0; i < worst_blob_index; i++, blob_it.forward ()) {
768
+ new_blob_it.add_after_then_move (blob_it.extract ());
769
+ }
770
+ start_of_noise_blob = blob_it.data ()->bounding_box ().left ();
771
+ delete blob_it.extract (); //throw out noise blb
772
+
773
+ new_word = new WERD (&new_blob_list, word_res->word);
774
+ new_word->set_flag (W_EOL, FALSE);
775
+ word_res->word->set_flag (W_BOL, FALSE);
776
+ word_res->word->set_blanks (1);//After break
777
+
778
+ new_rej_cblob_it.set_to_list (new_word->rej_cblob_list ());
779
+ rej_cblob_it.set_to_list (word_res->word->rej_cblob_list ());
780
+ for (;
781
+ (!rej_cblob_it.empty () &&
782
+ (rej_cblob_it.data ()->bounding_box ().left () <
783
+ start_of_noise_blob)); rej_cblob_it.forward ()) {
784
+ new_rej_cblob_it.add_after_then_move (rej_cblob_it.extract ());
785
+ }
786
+
787
+ worst_word_it.add_before_then_move (new WERD_RES (new_word));
788
+
789
+ word_res->done = FALSE;
790
+ if (word_res->outword != NULL) {
791
+ delete word_res->outword;
792
+ delete word_res->best_choice;
793
+ delete word_res->raw_choice;
794
+ word_res->outword = NULL;
795
+ word_res->best_choice = NULL;
796
+ word_res->raw_choice = NULL;
797
+ }
798
+ }
799
+
800
+
801
+ inT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score) {
802
+ PBLOB_IT blob_it;
803
+ inT16 blob_count;
804
+ float noise_score[512];
805
+ int i;
806
+ int min_noise_blob; //1st contender
807
+ int max_noise_blob; //last contender
808
+ int non_noise_count;
809
+ int worst_noise_blob; //Worst blob
810
+ float small_limit = bln_x_height * fixsp_small_outlines_size;
811
+ float non_noise_limit = bln_x_height * 0.8;
812
+
813
+ blob_it.set_to_list (word_res->outword->blob_list ());
814
+ //normalised
815
+ blob_count = blob_it.length ();
816
+ ASSERT_HOST (blob_count <= 512);
817
+ if (blob_count < 5)
818
+ return -1; //too short to split
819
+ /* Get the noise scores for all blobs */
820
+
821
+ #ifndef SECURE_NAMES
822
+ if (debug_fix_space_level > 5)
823
+ tprintf ("FP fixspace Noise metrics for \"%s\": ",
824
+ word_res->best_choice->string ().string ());
825
+ #endif
826
+
827
+ for (i = 0; i < blob_count; i++, blob_it.forward ()) {
828
+ if (word_res->reject_map[i].accepted ())
829
+ noise_score[i] = non_noise_limit;
830
+ else
831
+ noise_score[i] = blob_noise_score (blob_it.data ());
832
+
833
+ if (debug_fix_space_level > 5)
834
+ tprintf ("%1.1f ", noise_score[i]);
835
+ }
836
+ if (debug_fix_space_level > 5)
837
+ tprintf ("\n");
838
+
839
+ /* Now find the worst one which is far enough away from the end of the word */
840
+
841
+ non_noise_count = 0;
842
+ for (i = 0;
843
+ (i < blob_count) && (non_noise_count < fixsp_non_noise_limit); i++) {
844
+ if (noise_score[i] >= non_noise_limit)
845
+ non_noise_count++;
846
+ }
847
+ if (non_noise_count < fixsp_non_noise_limit)
848
+ return -1;
849
+ min_noise_blob = i;
850
+
851
+ non_noise_count = 0;
852
+ for (i = blob_count - 1;
853
+ (i >= 0) && (non_noise_count < fixsp_non_noise_limit); i--) {
854
+ if (noise_score[i] >= non_noise_limit)
855
+ non_noise_count++;
856
+ }
857
+ if (non_noise_count < fixsp_non_noise_limit)
858
+ return -1;
859
+ max_noise_blob = i;
860
+
861
+ if (min_noise_blob > max_noise_blob)
862
+ return -1;
863
+
864
+ *worst_noise_score = small_limit;
865
+ worst_noise_blob = -1;
866
+ for (i = min_noise_blob; i <= max_noise_blob; i++) {
867
+ if (noise_score[i] < *worst_noise_score) {
868
+ worst_noise_blob = i;
869
+ *worst_noise_score = noise_score[i];
870
+ }
871
+ }
872
+ return worst_noise_blob;
873
+ }
874
+
875
+
876
+ float blob_noise_score(PBLOB *blob) {
877
+ OUTLINE_IT outline_it;
878
+ TBOX box; //BB of outline
879
+ inT16 outline_count = 0;
880
+ inT16 max_dimension;
881
+ inT16 largest_outline_dimension = 0;
882
+
883
+ outline_it.set_to_list (blob->out_list ());
884
+ for (outline_it.mark_cycle_pt ();
885
+ !outline_it.cycled_list (); outline_it.forward ()) {
886
+ outline_count++;
887
+ box = outline_it.data ()->bounding_box ();
888
+ if (box.height () > box.width ())
889
+ max_dimension = box.height ();
890
+ else
891
+ max_dimension = box.width ();
892
+
893
+ if (largest_outline_dimension < max_dimension)
894
+ largest_outline_dimension = max_dimension;
895
+ }
896
+
897
+ if (fixsp_noise_score_fixing) {
898
+ if (outline_count > 5)
899
+ //penalise LOTS of blobs
900
+ largest_outline_dimension *= 2;
901
+
902
+ box = blob->bounding_box ();
903
+
904
+ if ((box.bottom () > bln_baseline_offset * 4) ||
905
+ (box.top () < bln_baseline_offset / 2))
906
+ //Lax blob is if high or low
907
+ largest_outline_dimension /= 2;
908
+ }
909
+ return largest_outline_dimension;
910
+ }
911
+
912
+
913
+ void fixspace_dbg(WERD_RES *word) {
914
+ TBOX box = word->word->bounding_box ();
915
+ BOOL8 show_map_detail = FALSE;
916
+ inT16 i;
917
+
918
+ box.print ();
919
+ #ifndef SECURE_NAMES
920
+ tprintf (" \"%s\" ", word->best_choice->string ().string ());
921
+ tprintf ("Blob count: %d (word); %d/%d (outword)\n",
922
+ word->word->gblob_list ()->length (),
923
+ word->outword->gblob_list ()->length (),
924
+ word->outword->rej_blob_list ()->length ());
925
+ word->reject_map.print (debug_fp);
926
+ tprintf ("\n");
927
+ if (show_map_detail) {
928
+ tprintf ("\"%s\"\n", word->best_choice->string ().string ());
929
+ for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {
930
+ tprintf ("**** \"%c\" ****\n", word->best_choice->string ()[i]);
931
+ word->reject_map[i].full_print (debug_fp);
932
+ }
933
+ }
934
+
935
+ tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
936
+ tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
937
+ #endif
938
+ }
939
+
940
+
941
+ /*************************************************************************
942
+ * fp_eval_word_spacing()
943
+ * Evaluation function for fixed pitch word lists.
944
+ *
945
+ * Basically, count the number of "nice" characters - those which are in tess
946
+ * acceptable words or in dict words and are not rejected.
947
+ * Penalise any potential noise chars
948
+ *************************************************************************/
949
+
950
+ inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
951
+ WERD_RES_IT word_it(&word_res_list);
952
+ WERD_RES *word;
953
+ PBLOB_IT blob_it;
954
+ inT16 word_length;
955
+ inT16 score = 0;
956
+ inT16 i;
957
+ inT16 offset;
958
+ const char *chs;
959
+ float small_limit = bln_x_height * fixsp_small_outlines_size;
960
+
961
+ if (!fixsp_fp_eval)
962
+ return (eval_word_spacing (word_res_list));
963
+
964
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
965
+ word = word_it.data ();
966
+ word_length = word->reject_map.length ();
967
+ chs = word->best_choice->string ().string ();
968
+ if ((word->done ||
969
+ word->tess_accepted) ||
970
+ (word->best_choice->permuter () == SYSTEM_DAWG_PERM) ||
971
+ (word->best_choice->permuter () == FREQ_DAWG_PERM) ||
972
+ (word->best_choice->permuter () == USER_DAWG_PERM) ||
973
+ (safe_dict_word (chs) > 0)) {
974
+ blob_it.set_to_list (word->outword->blob_list ());
975
+ for (i = 0, offset = 0; i < word_length;
976
+ offset += word->best_choice->lengths()[i++], blob_it.forward ()) {
977
+ if ((chs[offset] == ' ') ||
978
+ (blob_noise_score (blob_it.data ()) < small_limit))
979
+ score -= 1; //penalise possibly erroneous non-space
980
+
981
+ else if (word->reject_map[i].accepted ())
982
+ score++;
983
+ }
984
+ }
985
+ }
986
+ if (score < 0)
987
+ score = 0;
988
+ return score;
989
+ }