tesseract_bin 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,907 @@
1
+ /**********************************************************************
2
+ * File: tordmain.cpp (Formerly textordp.c)
3
+ * Description: C++ top level textord code.
4
+ * Author: Ray Smith
5
+ * Created: Tue Jul 28 17:12:33 BST 1992
6
+ *
7
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #include "mfcpch.h"
21
+ #ifdef __UNIX__
22
+ #include <assert.h>
23
+ #endif
24
+ #include "stderr.h"
25
+ #include "globaloc.h"
26
+ #include "tessout.h"
27
+ #include "blread.h"
28
+ #include "blobbox.h"
29
+ //#include "lmedsq.h"
30
+ #include "edgblob.h"
31
+ //#include "adthsh.h"
32
+ #include "drawtord.h"
33
+ #include "makerow.h"
34
+ #include "wordseg.h"
35
+ #include "ocrclass.h"
36
+ #include "genblob.h"
37
+ #include "imgs.h"
38
+ //#include "bairdskw.h"
39
+ #include "tordmain.h"
40
+ #include "secname.h"
41
+ #include "pageseg.h"
42
+
43
+ const ERRCODE BLOCKLESS_BLOBS = "Warning:some blobs assigned to no block";
44
+
45
+ #define EXTERN
46
+
47
+ EXTERN BOOL_VAR (textord_no_rejects, FALSE, "Don't remove noise blobs");
48
+ EXTERN BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs");
49
+ EXTERN BOOL_VAR (textord_show_boxes, FALSE, "Display unsorted blobs");
50
+ EXTERN BOOL_VAR (textord_new_initial_xheight, TRUE,
51
+ "Use test xheight mechanism");
52
+ EXTERN BOOL_VAR (textord_exit_after, FALSE, "Exit after completing textord");
53
+ EXTERN INT_VAR (textord_max_noise_size, 7, "Pixel size of noise");
54
+ EXTERN double_VAR (textord_blob_size_bigile, 95,
55
+ "Percentile for large blobs");
56
+ EXTERN double_VAR (textord_noise_area_ratio, 0.7,
57
+ "Fraction of bounding box for noise");
58
+ EXTERN double_VAR (textord_blob_size_smallile, 20,
59
+ "Percentile for small blobs");
60
+ EXTERN double_VAR (textord_initialx_ile, 0.75,
61
+ "Ile of sizes for xheight guess");
62
+ EXTERN double_VAR (textord_initialasc_ile, 0.90,
63
+ "Ile of sizes for xheight guess");
64
+ EXTERN INT_VAR (textord_noise_sizefraction, 10,
65
+ "Fraction of size for maxima");
66
+ EXTERN double_VAR (textord_noise_sizelimit, 0.5,
67
+ "Fraction of x for big t count");
68
+ EXTERN INT_VAR (textord_noise_translimit, 16, "Transitions for normal blob");
69
+ EXTERN double_VAR (textord_noise_normratio, 2.0,
70
+ "Dot to norm ratio for deletion");
71
+ EXTERN BOOL_VAR (textord_noise_rejwords, TRUE, "Reject noise-like words");
72
+ EXTERN BOOL_VAR (textord_noise_rejrows, TRUE, "Reject noise-like rows");
73
+ EXTERN double_VAR (textord_noise_syfract, 0.2,
74
+ "xh fract error for norm blobs");
75
+ EXTERN double_VAR (textord_noise_sxfract, 0.4,
76
+ "xh fract width error for norm blobs");
77
+ EXTERN INT_VAR (textord_noise_sncount, 1, "super norm blobs to save row");
78
+ EXTERN double_VAR (textord_noise_rowratio, 6.0,
79
+ "Dot to norm ratio for deletion");
80
+
81
+ EXTERN BOOL_VAR (textord_noise_debug, FALSE, "Debug row garbage detector");
82
+ EXTERN double_VAR (textord_blshift_maxshift, 0.00, "Max baseline shift");
83
+ EXTERN double_VAR (textord_blshift_xfraction, 9.99,
84
+ "Min size of baseline shift");
85
+ EXTERN STRING_EVAR (tessedit_image_ext, ".tif", "Externsion for image file");
86
+
87
+ #ifndef EMBEDDED
88
+ EXTERN clock_t previous_cpu;
89
+ #endif
90
+
91
+ extern BOOL_VAR_H (polygon_tess_approximation, TRUE,
92
+ "Do tess poly instead of grey scale");
93
+
94
+ #define MAX_NEAREST_DIST 600 //for block skew stats
95
+ #define MAX_BLOB_TRANSITIONS100 //for nois stats
96
+
97
+ extern IMAGE page_image; //must be defined somewhere
98
+ extern BOOL_VAR_H (interactive_mode, TRUE, "Run interactively?");
99
+ extern /*"C" */ ETEXT_DESC *global_monitor; //progress monitor
100
+
101
+ /**********************************************************************
102
+ * read_and_textord
103
+ *
104
+ * Read a file of blocks n blobs and textord them.
105
+ **********************************************************************/
106
+
107
+ void read_and_textord( //read .pb file
108
+ const char *filename, //.pb file
109
+ BLOCK_LIST *blocks) {
110
+ int c; //input character
111
+ FILE *infp; //input file
112
+ BLOCK *block; //current block
113
+ TBOX page_box; //bounding_box
114
+ BLOCK_IT block_it = blocks; //iterator
115
+ //different orientations
116
+ TO_BLOCK_LIST land_blocks, port_blocks;
117
+
118
+ infp = fopen (filename, "r");
119
+ if (infp == NULL)
120
+ CANTOPENFILE.error ("read_and_textord", EXIT, filename);
121
+
122
+ while (((c = fgetc (infp)) != EOF) && (ungetc (c, infp) != EOF)) {
123
+ //get one
124
+ block = BLOCK::de_serialise (infp);
125
+ //add to list
126
+ block_it.add_after_then_move (block);
127
+ //find page size
128
+ page_box += block->bounding_box ();
129
+ }
130
+ fclose(infp);
131
+
132
+ assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);
133
+ filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);
134
+ filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);
135
+ textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);
136
+ }
137
+
138
+
139
+ /**********************************************************************
140
+ * edges_and_textord
141
+ *
142
+ * Read a file of blocks n blobs and textord them.
143
+ **********************************************************************/
144
+
145
+ void edges_and_textord( //read .pb file
146
+ const char *filename, //.pb file
147
+ BLOCK_LIST *blocks) {
148
+ BLOCK *block; //current block
149
+ const char *lastdot; //of name
150
+ STRING name = filename; //truncated name
151
+ ICOORD page_tr;
152
+ TBOX page_box; //bounding_box
153
+ PDBLK_CLIST pd_blocks; //copy of list
154
+ BLOCK_IT block_it = blocks; //iterator
155
+ PDBLK_C_IT pd_it = &pd_blocks; //iterator
156
+ //different orientations
157
+ TO_BLOCK_LIST land_blocks, port_blocks;
158
+ IMAGE thresh_image; //thresholded
159
+
160
+ lastdot = strrchr (name.string (), '.');
161
+ if (lastdot != NULL)
162
+ name[lastdot-name.string()] = '\0';
163
+ if (page_image.get_bpp () == 0) {
164
+ name += tessedit_image_ext;
165
+ if (page_image.read_header (name.string ()))
166
+ CANTOPENFILE.error ("edges_and_textord", EXIT, name.string ());
167
+ if (page_image.read (0))
168
+ READFAILED.error ("edges_and_textord", EXIT, name.string ());
169
+ name = filename;
170
+ lastdot = strrchr (name.string (), '.');
171
+ if (lastdot != NULL)
172
+ name[lastdot-name.string()] = '\0';
173
+ }
174
+ page_tr = ICOORD (page_image.get_xsize (), page_image.get_ysize ());
175
+ if (!read_pd_file (name, page_image.get_xsize (), page_image.get_ysize (),
176
+ blocks))
177
+ segment_page(blocks);
178
+ block_it.set_to_list (blocks);
179
+ if (global_monitor != NULL)
180
+ global_monitor->ocr_alive = TRUE;
181
+
182
+ if (page_image.get_bpp () > 1) {
183
+ set_global_loc_code(LOC_ADAPTIVE);
184
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
185
+ block_it.forward ()) {
186
+ block = block_it.data ();
187
+ pd_it.add_after_then_move (block);
188
+ }
189
+ // adaptive_threshold(&page_image,&pd_blocks,&thresh_image);
190
+ set_global_loc_code(LOC_EDGE_PROG);
191
+ #ifndef EMBEDDED
192
+ previous_cpu = clock ();
193
+ #endif
194
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
195
+ block_it.forward ()) {
196
+ block = block_it.data ();
197
+ if (!polygon_tess_approximation)
198
+ invert_image(&page_image);
199
+ #ifndef GRAPHICS_DISABLED
200
+ extract_edges(NULL, &page_image, &thresh_image, page_tr, block);
201
+ #else
202
+ extract_edges(&page_image, &thresh_image, page_tr, block);
203
+ #endif
204
+ page_box += block->bounding_box ();
205
+ }
206
+ page_image = thresh_image; //everyone else gets it
207
+ }
208
+ else {
209
+ set_global_loc_code(LOC_EDGE_PROG);
210
+ if (!page_image.white_high ())
211
+ invert_image(&page_image);
212
+
213
+ #ifndef EMBEDDED
214
+ previous_cpu = clock ();
215
+ #endif
216
+
217
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
218
+ block_it.forward ()) {
219
+ block = block_it.data ();
220
+ #ifndef GRAPHICS_DISABLED
221
+ extract_edges(NULL, &page_image, &page_image, page_tr, block);
222
+ #else
223
+ extract_edges(&page_image, &page_image, page_tr, block);
224
+ #endif
225
+ page_box += block->bounding_box ();
226
+ }
227
+ }
228
+ if (global_monitor != NULL) {
229
+ global_monitor->ocr_alive = TRUE;
230
+ global_monitor->progress = 10;
231
+ }
232
+
233
+ assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);
234
+ if (global_monitor != NULL)
235
+ global_monitor->ocr_alive = TRUE;
236
+ filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);
237
+ #ifndef EMBEDDED
238
+ previous_cpu = clock ();
239
+ #endif
240
+ filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);
241
+ if (global_monitor != NULL)
242
+ global_monitor->ocr_alive = TRUE;
243
+ textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);
244
+ }
245
+
246
+ /**********************************************************************
247
+ * assign_blobs_to_blocks2
248
+ *
249
+ * Make a list of TO_BLOCKs for portrait and landscape orientation.
250
+ **********************************************************************/
251
+
252
+ void assign_blobs_to_blocks2( //split into groups
253
+ BLOCK_LIST *blocks, //blocks to process
254
+ TO_BLOCK_LIST *land_blocks, //rotated for landscape
255
+ TO_BLOCK_LIST *port_blocks //output list
256
+ ) {
257
+ BLOCK *block; //current block
258
+ BLOBNBOX *newblob; //created blob
259
+ C_BLOB *blob; //current blob
260
+ BLOCK_IT block_it = blocks;
261
+ C_BLOB_IT blob_it; //iterator
262
+ BLOBNBOX_IT port_box_it; //iterator
263
+ //destination iterator
264
+ TO_BLOCK_IT port_block_it = port_blocks;
265
+ TO_BLOCK *port_block; //created block
266
+
267
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
268
+ block_it.forward ()) {
269
+ block = block_it.data ();
270
+ blob_it.set_to_list (block->blob_list ());
271
+ //make one
272
+ port_block = new TO_BLOCK (block);
273
+ //make one
274
+ port_box_it.set_to_list (&port_block->blobs);
275
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
276
+ blob_it.forward ()) {
277
+ blob = blob_it.extract ();
278
+ //convert blob
279
+ newblob = new BLOBNBOX (blob);
280
+ //add to list
281
+ port_box_it.add_after_then_move (newblob);
282
+ //convert blob
283
+ }
284
+ port_block_it.add_after_then_move (port_block);
285
+ }
286
+ }
287
+
288
+
289
+ /**********************************************************************
290
+ * filter_blobs
291
+ *
292
+ * Sort the blobs into sizes in all the blocks for later work.
293
+ **********************************************************************/
294
+
295
+ void filter_blobs( //split into groups
296
+ ICOORD page_tr, //top right
297
+ TO_BLOCK_LIST *blocks, //output list
298
+ BOOL8 testing_on //for plotting
299
+ ) {
300
+ TO_BLOCK_IT block_it = blocks; //destination iterator
301
+ TO_BLOCK *block; //created block
302
+
303
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
304
+ block_it.forward ()) {
305
+ block = block_it.data ();
306
+ block->line_size = filter_noise_blobs (&block->blobs,
307
+ &block->noise_blobs,
308
+ &block->small_blobs,
309
+ &block->large_blobs);
310
+ block->line_spacing =
311
+ block->line_size * (textord_merge_desc + textord_merge_x +
312
+ textord_merge_asc +
313
+ textord_merge_asc) / textord_merge_x;
314
+ block->line_size *= textord_min_linesize;
315
+ block->max_blob_size = block->line_size * textord_excess_blobsize;
316
+ #ifndef GRAPHICS_DISABLED
317
+ if (textord_show_blobs && testing_on) {
318
+ if (to_win == NULL)
319
+ create_to_win(page_tr);
320
+ plot_blob_list (to_win, &block->noise_blobs,
321
+ ScrollView::CORAL, ScrollView::BLUE);
322
+ plot_blob_list (to_win, &block->small_blobs,
323
+ ScrollView::GOLDENROD, ScrollView::YELLOW);
324
+ plot_blob_list (to_win, &block->large_blobs,
325
+ ScrollView::DARK_GREEN, ScrollView::YELLOW);
326
+ plot_blob_list (to_win, &block->blobs,
327
+ ScrollView::WHITE, ScrollView::BROWN);
328
+ }
329
+ if (textord_show_boxes && testing_on) {
330
+ if (to_win == NULL)
331
+ create_to_win(page_tr);
332
+ plot_box_list (to_win, &block->noise_blobs, ScrollView::WHITE);
333
+ plot_box_list (to_win, &block->small_blobs, ScrollView::WHITE);
334
+ plot_box_list (to_win, &block->large_blobs, ScrollView::WHITE);
335
+ plot_box_list (to_win, &block->blobs, ScrollView::WHITE);
336
+ }
337
+ #endif
338
+ }
339
+ }
340
+
341
+
342
+ /**********************************************************************
343
+ * filter_noise_blobs
344
+ *
345
+ * Move small blobs to a separate list.
346
+ **********************************************************************/
347
+
348
+ float filter_noise_blobs( //separate noise
349
+ BLOBNBOX_LIST *src_list, //origonal list
350
+ BLOBNBOX_LIST *noise_list, //noise list
351
+ BLOBNBOX_LIST *small_list, //small blobs
352
+ BLOBNBOX_LIST *large_list //large blobs
353
+ ) {
354
+ inT16 height; //height of blob
355
+ inT16 width; //of blob
356
+ BLOBNBOX_IT src_it = src_list; //iterators
357
+ BLOBNBOX_IT noise_it = noise_list;
358
+ BLOBNBOX_IT small_it = small_list;
359
+ BLOBNBOX_IT large_it = large_list;
360
+ STATS size_stats (0, MAX_NEAREST_DIST);
361
+ //blob heights
362
+ if (textord_new_initial_xheight)
363
+ return filter_noise_blobs2 (src_list, noise_list, small_list, large_list);
364
+ float min_y; //size limits
365
+ float max_y;
366
+ float max_x;
367
+
368
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
369
+ if (src_it.data ()->bounding_box ().height () < textord_max_noise_size)
370
+ noise_it.add_after_then_move (src_it.extract ());
371
+ }
372
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
373
+ size_stats.add (src_it.data ()->bounding_box ().height (), 1);
374
+ }
375
+ min_y = floor (size_stats.ile (textord_blob_size_smallile / 100.0));
376
+ max_y = ceil (size_stats.ile (textord_blob_size_bigile / 100.0));
377
+ max_x = ceil (size_stats.ile (0.5) * textord_width_limit);
378
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
379
+ height = src_it.data ()->bounding_box ().height ();
380
+ width = src_it.data ()->bounding_box ().width ();
381
+ if (height < min_y)
382
+ small_it.add_after_then_move (src_it.extract ());
383
+ else if (height > max_y || width > max_x)
384
+ large_it.add_after_then_move (src_it.extract ());
385
+ }
386
+ return size_stats.ile (textord_initialx_ile);
387
+ }
388
+
389
+
390
+ /**********************************************************************
391
+ * filter_noise_blobs2
392
+ *
393
+ * Move small blobs to a separate list.
394
+ **********************************************************************/
395
+
396
+ float filter_noise_blobs2( //separate noise
397
+ BLOBNBOX_LIST *src_list, //origonal list
398
+ BLOBNBOX_LIST *noise_list, //noise list
399
+ BLOBNBOX_LIST *small_list, //small blobs
400
+ BLOBNBOX_LIST *large_list //large blobs
401
+ ) {
402
+ inT16 height; //height of blob
403
+ inT16 width; //of blob
404
+ BLOBNBOX *blob; //current blob
405
+ float initial_x; //first guess
406
+ BLOBNBOX_IT src_it = src_list; //iterators
407
+ BLOBNBOX_IT noise_it = noise_list;
408
+ BLOBNBOX_IT small_it = small_list;
409
+ BLOBNBOX_IT large_it = large_list;
410
+ STATS size_stats (0, MAX_NEAREST_DIST);
411
+ //blob heights
412
+ float min_y; //size limits
413
+ float max_y;
414
+ float max_x;
415
+ float max_height; //of good blobs
416
+
417
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
418
+ blob = src_it.data ();
419
+ if (blob->bounding_box ().height () < textord_max_noise_size)
420
+ noise_it.add_after_then_move (src_it.extract ());
421
+ else if (blob->enclosed_area () >= blob->bounding_box ().height ()
422
+ * blob->bounding_box ().width () * textord_noise_area_ratio)
423
+ small_it.add_after_then_move (src_it.extract ());
424
+ }
425
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
426
+ size_stats.add (src_it.data ()->bounding_box ().height (), 1);
427
+ }
428
+ initial_x = size_stats.ile (textord_initialx_ile);
429
+ max_y =
430
+ ceil (initial_x *
431
+ (textord_merge_desc + textord_merge_x +
432
+ 2 * textord_merge_asc) / textord_merge_x);
433
+ min_y = floor (initial_x / 2);
434
+ max_x = ceil (initial_x * textord_width_limit);
435
+ small_it.move_to_first ();
436
+ for (small_it.mark_cycle_pt (); !small_it.cycled_list ();
437
+ small_it.forward ()) {
438
+ height = small_it.data()->bounding_box().height();
439
+ if (height > max_y)
440
+ large_it.add_after_then_move(small_it.extract ());
441
+ else if (height >= min_y)
442
+ src_it.add_after_then_move(small_it.extract ());
443
+ }
444
+ size_stats.clear ();
445
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
446
+ height = src_it.data ()->bounding_box ().height ();
447
+ width = src_it.data ()->bounding_box ().width ();
448
+ if (height < min_y)
449
+ small_it.add_after_then_move (src_it.extract ());
450
+ else if (height > max_y || width > max_x)
451
+ large_it.add_after_then_move (src_it.extract ());
452
+ else
453
+ size_stats.add (height, 1);
454
+ }
455
+ max_height = size_stats.ile (textord_initialasc_ile);
456
+ // printf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,",
457
+ // max_y,min_y,initial_x,max_height);
458
+ max_height *= textord_merge_x / (textord_merge_x + textord_merge_asc);
459
+ if (max_height > initial_x)
460
+ initial_x = max_height;
461
+ // printf(" ret=%g\n",initial_x);
462
+ return initial_x;
463
+ }
464
+
465
+
466
+ /**********************************************************************
467
+ * textord_page
468
+ *
469
+ * Textord the list of blobs and return a list of proper blocks.
470
+ **********************************************************************/
471
+
472
+ void textord_page( //make rows & words
473
+ ICOORD page_tr, //top right
474
+ BLOCK_LIST *blocks, //block list
475
+ TO_BLOCK_LIST *land_blocks, //rotated for landscape
476
+ TO_BLOCK_LIST *port_blocks //output list
477
+ ) {
478
+ float gradient; //global skew
479
+
480
+ set_global_loc_code(LOC_TEXT_ORD_ROWS);
481
+ gradient = make_rows (page_tr, blocks, land_blocks, port_blocks);
482
+ if (global_monitor != NULL) {
483
+ global_monitor->ocr_alive = TRUE;
484
+ global_monitor->progress = 20;
485
+ }
486
+ set_global_loc_code(LOC_TEXT_ORD_WORDS);
487
+ make_words(page_tr, gradient, blocks, land_blocks, port_blocks);
488
+ if (global_monitor != NULL) {
489
+ global_monitor->ocr_alive = TRUE;
490
+ global_monitor->progress = 30;
491
+ }
492
+ cleanup_blocks(blocks); //remove empties
493
+ #ifndef GRAPHICS_DISABLED
494
+ close_to_win();
495
+ #endif
496
+ if (textord_exit_after && !interactive_mode)
497
+ exit (0);
498
+ }
499
+
500
+
501
+ /**********************************************************************
502
+ * cleanup_blocks
503
+ *
504
+ * Delete empty blocks, rows from the page.
505
+ **********************************************************************/
506
+
507
+ void cleanup_blocks( //remove empties
508
+ BLOCK_LIST *blocks //list
509
+ ) {
510
+ BLOCK_IT block_it = blocks; //iterator
511
+ ROW_IT row_it; //row iterator
512
+
513
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
514
+ block_it.forward ()) {
515
+ row_it.set_to_list (block_it.data ()->row_list ());
516
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
517
+ if ((textord_noise_rejrows
518
+ && !row_it.data ()->word_list ()->empty ()
519
+ && clean_noise_from_row (row_it.data ()))
520
+ || row_it.data ()->word_list ()->empty ())
521
+ delete row_it.extract ();//lose empty row
522
+ else {
523
+ if (textord_noise_rejwords)
524
+ clean_noise_from_words (row_it.data ());
525
+ if (textord_blshift_maxshift >= 0)
526
+ tweak_row_baseline (row_it.data ());
527
+ }
528
+ }
529
+ if (block_it.data ()->row_list ()->empty ()) {
530
+ delete block_it.extract ();//lose empty block
531
+ }
532
+ }
533
+ }
534
+
535
+
536
+ /**********************************************************************
537
+ * clean_noise_from_row
538
+ *
539
+ * Move blobs of words from rows of garbage into the reject blobs list.
540
+ **********************************************************************/
541
+
542
+ BOOL8 clean_noise_from_row( //remove empties
543
+ ROW *row //row to clean
544
+ ) {
545
+ BOOL8 testing_on;
546
+ TBOX blob_box; //bounding box
547
+ C_BLOB *blob; //current blob
548
+ C_OUTLINE *outline; //current outline
549
+ WERD *word; //current word
550
+ inT32 blob_size; //biggest size
551
+ inT32 trans_count = 0; //no of transitions
552
+ inT32 trans_threshold; //noise tolerance
553
+ inT32 dot_count; //small objects
554
+ inT32 norm_count; //normal objects
555
+ inT32 super_norm_count; //real char-like
556
+ //words of row
557
+ WERD_IT word_it = row->word_list ();
558
+ C_BLOB_IT blob_it; //blob iterator
559
+ C_OUTLINE_IT out_it; //outline iterator
560
+
561
+ if (textord_test_y > row->base_line (textord_test_x)
562
+ && textord_show_blobs
563
+ && textord_test_y < row->base_line (textord_test_x) + row->x_height ())
564
+ testing_on = TRUE;
565
+ else
566
+ testing_on = FALSE;
567
+ dot_count = 0;
568
+ norm_count = 0;
569
+ super_norm_count = 0;
570
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
571
+ word = word_it.data (); //current word
572
+ //blobs in word
573
+ blob_it.set_to_list (word->cblob_list ());
574
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
575
+ blob_it.forward ()) {
576
+ blob = blob_it.data ();
577
+ if (!word->flag (W_DONT_CHOP)) {
578
+ //get outlines
579
+ out_it.set_to_list (blob->out_list ());
580
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
581
+ out_it.forward ()) {
582
+ outline = out_it.data ();
583
+ blob_box = outline->bounding_box ();
584
+ blob_size =
585
+ blob_box.width () >
586
+ blob_box.height ()? blob_box.width () : blob_box.
587
+ height();
588
+ if (blob_size < textord_noise_sizelimit * row->x_height ())
589
+ dot_count++; //count smal outlines
590
+ if (!outline->child ()->empty ()
591
+ && blob_box.height () <
592
+ (1 + textord_noise_syfract) * row->x_height ()
593
+ && blob_box.height () >
594
+ (1 - textord_noise_syfract) * row->x_height ()
595
+ && blob_box.width () <
596
+ (1 + textord_noise_sxfract) * row->x_height ()
597
+ && blob_box.width () >
598
+ (1 - textord_noise_sxfract) * row->x_height ())
599
+ super_norm_count++; //count smal outlines
600
+ }
601
+ }
602
+ else
603
+ super_norm_count++;
604
+ blob_box = blob->bounding_box ();
605
+ blob_size =
606
+ blob_box.width () >
607
+ blob_box.height ()? blob_box.width () : blob_box.height ();
608
+ if (blob_size >= textord_noise_sizelimit * row->x_height ()
609
+ && blob_size < row->x_height () * 2) {
610
+ trans_threshold = blob_size / textord_noise_sizefraction;
611
+ trans_count = blob->count_transitions (trans_threshold);
612
+ if (trans_count < textord_noise_translimit)
613
+ norm_count++;
614
+ }
615
+ else if (blob_box.height () > row->x_height () * 2
616
+ && (!word_it.at_first () || !blob_it.at_first ()))
617
+ dot_count += 2;
618
+ #ifndef SECURE_NAMES
619
+ if (testing_on) {
620
+ tprintf
621
+ ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n",
622
+ blob_box.left (), blob_box.bottom (), blob_box.right (),
623
+ blob_box.top (), blob->out_list ()->length (), trans_count,
624
+ blob_box.bottom () - row->base_line (blob_box.left ()));
625
+ }
626
+ #endif
627
+ }
628
+ }
629
+ #ifndef SECURE_NAMES
630
+ if (textord_noise_debug) {
631
+ tprintf ("Row ending at (%d,%g):",
632
+ blob_box.right (), row->base_line (blob_box.right ()));
633
+ tprintf (" R=%g, dc=%d, nc=%d, %s\n",
634
+ norm_count > 0 ? (float) dot_count / norm_count : 9999,
635
+ dot_count, norm_count,
636
+ dot_count > norm_count * textord_noise_normratio
637
+ && dot_count > 2 ? "REJECTED" : "ACCEPTED");
638
+ }
639
+ #endif
640
+ return super_norm_count < textord_noise_sncount
641
+ && dot_count > norm_count * textord_noise_rowratio && dot_count > 2;
642
+ }
643
+
644
+
645
+ /**********************************************************************
646
+ * clean_noise_from_words
647
+ *
648
+ * Move blobs of words from rows of garbage into the reject blobs list.
649
+ **********************************************************************/
650
+
651
+ void clean_noise_from_words( //remove empties
652
+ ROW *row //row to clean
653
+ ) {
654
+ TBOX blob_box; //bounding box
655
+ inT8 *word_dud; //was it chucked
656
+ C_BLOB *blob; //current blob
657
+ C_OUTLINE *outline; //current outline
658
+ WERD *word; //current word
659
+ inT32 blob_size; //biggest size
660
+ inT32 trans_count; //no of transitions
661
+ inT32 trans_threshold; //noise tolerance
662
+ inT32 dot_count; //small objects
663
+ inT32 norm_count; //normal objects
664
+ inT32 dud_words; //number discarded
665
+ inT32 ok_words; //number remaining
666
+ inT32 word_index; //current word
667
+ //words of row
668
+ WERD_IT word_it = row->word_list ();
669
+ C_BLOB_IT blob_it; //blob iterator
670
+ C_OUTLINE_IT out_it; //outline iterator
671
+
672
+ ok_words = word_it.length ();
673
+ if (ok_words == 0 || textord_no_rejects)
674
+ return;
675
+ word_dud = (inT8 *) alloc_mem (ok_words * sizeof (inT8));
676
+ dud_words = 0;
677
+ ok_words = 0;
678
+ word_index = 0;
679
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
680
+ word = word_it.data (); //current word
681
+ dot_count = 0;
682
+ norm_count = 0;
683
+ //blobs in word
684
+ blob_it.set_to_list (word->cblob_list ());
685
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
686
+ blob_it.forward ()) {
687
+ blob = blob_it.data ();
688
+ if (!word->flag (W_DONT_CHOP)) {
689
+ //get outlines
690
+ out_it.set_to_list (blob->out_list ());
691
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
692
+ out_it.forward ()) {
693
+ outline = out_it.data ();
694
+ blob_box = outline->bounding_box ();
695
+ blob_size =
696
+ blob_box.width () >
697
+ blob_box.height ()? blob_box.width () : blob_box.
698
+ height();
699
+ if (blob_size < textord_noise_sizelimit * row->x_height ())
700
+ dot_count++; //count smal outlines
701
+ if (!outline->child ()->empty ()
702
+ && blob_box.height () <
703
+ (1 + textord_noise_syfract) * row->x_height ()
704
+ && blob_box.height () >
705
+ (1 - textord_noise_syfract) * row->x_height ()
706
+ && blob_box.width () <
707
+ (1 + textord_noise_sxfract) * row->x_height ()
708
+ && blob_box.width () >
709
+ (1 - textord_noise_sxfract) * row->x_height ())
710
+ norm_count++; //count smal outlines
711
+ }
712
+ }
713
+ else
714
+ norm_count++;
715
+ blob_box = blob->bounding_box ();
716
+ blob_size =
717
+ blob_box.width () >
718
+ blob_box.height ()? blob_box.width () : blob_box.height ();
719
+ if (blob_size >= textord_noise_sizelimit * row->x_height ()
720
+ && blob_size < row->x_height () * 2) {
721
+ trans_threshold = blob_size / textord_noise_sizefraction;
722
+ trans_count = blob->count_transitions (trans_threshold);
723
+ if (trans_count < textord_noise_translimit)
724
+ norm_count++;
725
+ }
726
+ else if (blob_box.height () > row->x_height () * 2
727
+ && (!word_it.at_first () || !blob_it.at_first ()))
728
+ dot_count += 2;
729
+ }
730
+ if (dot_count > 2) {
731
+ if (dot_count > norm_count * textord_noise_normratio * 2)
732
+ word_dud[word_index] = 2;
733
+ else if (dot_count > norm_count * textord_noise_normratio)
734
+ word_dud[word_index] = 1;
735
+ else
736
+ word_dud[word_index] = 0;
737
+ }
738
+ else
739
+ word_dud[word_index] = 0;
740
+ if (word_dud[word_index] == 2)
741
+ dud_words++;
742
+ else
743
+ ok_words++;
744
+ word_index++;
745
+ }
746
+
747
+ word_index = 0;
748
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
749
+ if (word_dud[word_index] == 2
750
+ || (word_dud[word_index] == 1 && dud_words > ok_words)) {
751
+ word = word_it.data (); //current word
752
+ //rejected blobs
753
+ blob_it.set_to_list (word->rej_cblob_list ());
754
+ //move from blobs
755
+ blob_it.add_list_after (word->cblob_list ());
756
+ }
757
+ word_index++;
758
+ }
759
+ free_mem(word_dud);
760
+ }
761
+
762
+
763
+ /**********************************************************************
764
+ * tweak_row_baseline
765
+ *
766
+ * Shift baseline to fit the blobs more accurately where they are
767
+ * close enough.
768
+ **********************************************************************/
769
+
770
+ void tweak_row_baseline( //remove empties
771
+ ROW *row //row to clean
772
+ ) {
773
+ TBOX blob_box; //bounding box
774
+ C_BLOB *blob; //current blob
775
+ WERD *word; //current word
776
+ inT32 blob_count; //no of blobs
777
+ inT32 src_index; //source segment
778
+ inT32 dest_index; //destination segment
779
+ inT32 *xstarts; //spline segments
780
+ double *coeffs; //spline coeffs
781
+ float ydiff; //baseline error
782
+ float x_centre; //centre of blob
783
+ //words of row
784
+ WERD_IT word_it = row->word_list ();
785
+ C_BLOB_IT blob_it; //blob iterator
786
+
787
+ blob_count = 0;
788
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
789
+ word = word_it.data (); //current word
790
+ //get total blobs
791
+ blob_count += word->cblob_list ()->length ();
792
+ }
793
+ if (blob_count == 0)
794
+ return;
795
+ xstarts =
796
+ (inT32 *) alloc_mem ((blob_count + row->baseline.segments + 1) *
797
+ sizeof (inT32));
798
+ coeffs =
799
+ (double *) alloc_mem ((blob_count + row->baseline.segments) * 3 *
800
+ sizeof (double));
801
+
802
+ src_index = 0;
803
+ dest_index = 0;
804
+ xstarts[0] = row->baseline.xcoords[0];
805
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
806
+ word = word_it.data (); //current word
807
+ //blobs in word
808
+ blob_it.set_to_list (word->cblob_list ());
809
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
810
+ blob_it.forward ()) {
811
+ blob = blob_it.data ();
812
+ blob_box = blob->bounding_box ();
813
+ x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
814
+ ydiff = blob_box.bottom () - row->base_line (x_centre);
815
+ if (ydiff < 0)
816
+ ydiff = -ydiff / row->x_height ();
817
+ else
818
+ ydiff = ydiff / row->x_height ();
819
+ if (ydiff < textord_blshift_maxshift
820
+ && blob_box.height () / row->x_height () >
821
+ textord_blshift_xfraction) {
822
+ if (xstarts[dest_index] >= x_centre)
823
+ xstarts[dest_index] = blob_box.left ();
824
+ coeffs[dest_index * 3] = 0;
825
+ coeffs[dest_index * 3 + 1] = 0;
826
+ coeffs[dest_index * 3 + 2] = blob_box.bottom ();
827
+ //shift it
828
+ dest_index++;
829
+ xstarts[dest_index] = blob_box.right () + 1;
830
+ }
831
+ else {
832
+ if (xstarts[dest_index] <= x_centre) {
833
+ while (row->baseline.xcoords[src_index + 1] <= x_centre
834
+ && src_index < row->baseline.segments - 1) {
835
+ if (row->baseline.xcoords[src_index + 1] >
836
+ xstarts[dest_index]) {
837
+ coeffs[dest_index * 3] =
838
+ row->baseline.quadratics[src_index].a;
839
+ coeffs[dest_index * 3 + 1] =
840
+ row->baseline.quadratics[src_index].b;
841
+ coeffs[dest_index * 3 + 2] =
842
+ row->baseline.quadratics[src_index].c;
843
+ dest_index++;
844
+ xstarts[dest_index] =
845
+ row->baseline.xcoords[src_index + 1];
846
+ }
847
+ src_index++;
848
+ }
849
+ coeffs[dest_index * 3] =
850
+ row->baseline.quadratics[src_index].a;
851
+ coeffs[dest_index * 3 + 1] =
852
+ row->baseline.quadratics[src_index].b;
853
+ coeffs[dest_index * 3 + 2] =
854
+ row->baseline.quadratics[src_index].c;
855
+ dest_index++;
856
+ xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
857
+ }
858
+ }
859
+ }
860
+ }
861
+ while (src_index < row->baseline.segments
862
+ && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index])
863
+ src_index++;
864
+ while (src_index < row->baseline.segments) {
865
+ coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
866
+ coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
867
+ coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
868
+ dest_index++;
869
+ src_index++;
870
+ xstarts[dest_index] = row->baseline.xcoords[src_index];
871
+ }
872
+ //turn to spline
873
+ row->baseline = QSPLINE (dest_index, xstarts, coeffs);
874
+ free_mem(xstarts);
875
+ free_mem(coeffs);
876
+ }
877
+
878
+
879
+ /**********************************************************************
880
+ * blob_y_order
881
+ *
882
+ * Sort function to sort blobs in y from page top.
883
+ **********************************************************************/
884
+
885
+ inT32 blob_y_order( //sort function
886
+ void *item1, //items to compare
887
+ void *item2) {
888
+ //converted ptr
889
+ BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
890
+ //converted ptr
891
+ BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
892
+
893
+ if (blob1->bounding_box ().bottom () > blob2->bounding_box ().bottom ())
894
+ return -1;
895
+ else if (blob1->bounding_box ().bottom () <
896
+ blob2->bounding_box ().bottom ())
897
+ return 1;
898
+ else {
899
+ if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
900
+ return -1;
901
+ else if (blob1->bounding_box ().left () >
902
+ blob2->bounding_box ().left ())
903
+ return 1;
904
+ else
905
+ return 0;
906
+ }
907
+ }