tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,907 @@
1
+ /**********************************************************************
2
+ * File: tordmain.cpp (Formerly textordp.c)
3
+ * Description: C++ top level textord code.
4
+ * Author: Ray Smith
5
+ * Created: Tue Jul 28 17:12:33 BST 1992
6
+ *
7
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #include "mfcpch.h"
21
+ #ifdef __UNIX__
22
+ #include <assert.h>
23
+ #endif
24
+ #include "stderr.h"
25
+ #include "globaloc.h"
26
+ #include "tessout.h"
27
+ #include "blread.h"
28
+ #include "blobbox.h"
29
+ //#include "lmedsq.h"
30
+ #include "edgblob.h"
31
+ //#include "adthsh.h"
32
+ #include "drawtord.h"
33
+ #include "makerow.h"
34
+ #include "wordseg.h"
35
+ #include "ocrclass.h"
36
+ #include "genblob.h"
37
+ #include "imgs.h"
38
+ //#include "bairdskw.h"
39
+ #include "tordmain.h"
40
+ #include "secname.h"
41
+ #include "pageseg.h"
42
+
43
+ const ERRCODE BLOCKLESS_BLOBS = "Warning:some blobs assigned to no block";
44
+
45
+ #define EXTERN
46
+
47
+ EXTERN BOOL_VAR (textord_no_rejects, FALSE, "Don't remove noise blobs");
48
+ EXTERN BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs");
49
+ EXTERN BOOL_VAR (textord_show_boxes, FALSE, "Display unsorted blobs");
50
+ EXTERN BOOL_VAR (textord_new_initial_xheight, TRUE,
51
+ "Use test xheight mechanism");
52
+ EXTERN BOOL_VAR (textord_exit_after, FALSE, "Exit after completing textord");
53
+ EXTERN INT_VAR (textord_max_noise_size, 7, "Pixel size of noise");
54
+ EXTERN double_VAR (textord_blob_size_bigile, 95,
55
+ "Percentile for large blobs");
56
+ EXTERN double_VAR (textord_noise_area_ratio, 0.7,
57
+ "Fraction of bounding box for noise");
58
+ EXTERN double_VAR (textord_blob_size_smallile, 20,
59
+ "Percentile for small blobs");
60
+ EXTERN double_VAR (textord_initialx_ile, 0.75,
61
+ "Ile of sizes for xheight guess");
62
+ EXTERN double_VAR (textord_initialasc_ile, 0.90,
63
+ "Ile of sizes for xheight guess");
64
+ EXTERN INT_VAR (textord_noise_sizefraction, 10,
65
+ "Fraction of size for maxima");
66
+ EXTERN double_VAR (textord_noise_sizelimit, 0.5,
67
+ "Fraction of x for big t count");
68
+ EXTERN INT_VAR (textord_noise_translimit, 16, "Transitions for normal blob");
69
+ EXTERN double_VAR (textord_noise_normratio, 2.0,
70
+ "Dot to norm ratio for deletion");
71
+ EXTERN BOOL_VAR (textord_noise_rejwords, TRUE, "Reject noise-like words");
72
+ EXTERN BOOL_VAR (textord_noise_rejrows, TRUE, "Reject noise-like rows");
73
+ EXTERN double_VAR (textord_noise_syfract, 0.2,
74
+ "xh fract error for norm blobs");
75
+ EXTERN double_VAR (textord_noise_sxfract, 0.4,
76
+ "xh fract width error for norm blobs");
77
+ EXTERN INT_VAR (textord_noise_sncount, 1, "super norm blobs to save row");
78
+ EXTERN double_VAR (textord_noise_rowratio, 6.0,
79
+ "Dot to norm ratio for deletion");
80
+
81
+ EXTERN BOOL_VAR (textord_noise_debug, FALSE, "Debug row garbage detector");
82
+ EXTERN double_VAR (textord_blshift_maxshift, 0.00, "Max baseline shift");
83
+ EXTERN double_VAR (textord_blshift_xfraction, 9.99,
84
+ "Min size of baseline shift");
85
+ EXTERN STRING_EVAR (tessedit_image_ext, ".tif", "Externsion for image file");
86
+
87
+ #ifndef EMBEDDED
88
+ EXTERN clock_t previous_cpu;
89
+ #endif
90
+
91
+ extern BOOL_VAR_H (polygon_tess_approximation, TRUE,
92
+ "Do tess poly instead of grey scale");
93
+
94
+ #define MAX_NEAREST_DIST 600 //for block skew stats
95
+ #define MAX_BLOB_TRANSITIONS100 //for nois stats
96
+
97
+ extern IMAGE page_image; //must be defined somewhere
98
+ extern BOOL_VAR_H (interactive_mode, TRUE, "Run interactively?");
99
+ extern /*"C" */ ETEXT_DESC *global_monitor; //progress monitor
100
+
101
+ /**********************************************************************
102
+ * read_and_textord
103
+ *
104
+ * Read a file of blocks n blobs and textord them.
105
+ **********************************************************************/
106
+
107
+ void read_and_textord( //read .pb file
108
+ const char *filename, //.pb file
109
+ BLOCK_LIST *blocks) {
110
+ int c; //input character
111
+ FILE *infp; //input file
112
+ BLOCK *block; //current block
113
+ TBOX page_box; //bounding_box
114
+ BLOCK_IT block_it = blocks; //iterator
115
+ //different orientations
116
+ TO_BLOCK_LIST land_blocks, port_blocks;
117
+
118
+ infp = fopen (filename, "r");
119
+ if (infp == NULL)
120
+ CANTOPENFILE.error ("read_and_textord", EXIT, filename);
121
+
122
+ while (((c = fgetc (infp)) != EOF) && (ungetc (c, infp) != EOF)) {
123
+ //get one
124
+ block = BLOCK::de_serialise (infp);
125
+ //add to list
126
+ block_it.add_after_then_move (block);
127
+ //find page size
128
+ page_box += block->bounding_box ();
129
+ }
130
+ fclose(infp);
131
+
132
+ assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);
133
+ filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);
134
+ filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);
135
+ textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);
136
+ }
137
+
138
+
139
+ /**********************************************************************
140
+ * edges_and_textord
141
+ *
142
+ * Read a file of blocks n blobs and textord them.
143
+ **********************************************************************/
144
+
145
+ void edges_and_textord( //read .pb file
146
+ const char *filename, //.pb file
147
+ BLOCK_LIST *blocks) {
148
+ BLOCK *block; //current block
149
+ const char *lastdot; //of name
150
+ STRING name = filename; //truncated name
151
+ ICOORD page_tr;
152
+ TBOX page_box; //bounding_box
153
+ PDBLK_CLIST pd_blocks; //copy of list
154
+ BLOCK_IT block_it = blocks; //iterator
155
+ PDBLK_C_IT pd_it = &pd_blocks; //iterator
156
+ //different orientations
157
+ TO_BLOCK_LIST land_blocks, port_blocks;
158
+ IMAGE thresh_image; //thresholded
159
+
160
+ lastdot = strrchr (name.string (), '.');
161
+ if (lastdot != NULL)
162
+ name[lastdot-name.string()] = '\0';
163
+ if (page_image.get_bpp () == 0) {
164
+ name += tessedit_image_ext;
165
+ if (page_image.read_header (name.string ()))
166
+ CANTOPENFILE.error ("edges_and_textord", EXIT, name.string ());
167
+ if (page_image.read (0))
168
+ READFAILED.error ("edges_and_textord", EXIT, name.string ());
169
+ name = filename;
170
+ lastdot = strrchr (name.string (), '.');
171
+ if (lastdot != NULL)
172
+ name[lastdot-name.string()] = '\0';
173
+ }
174
+ page_tr = ICOORD (page_image.get_xsize (), page_image.get_ysize ());
175
+ if (!read_pd_file (name, page_image.get_xsize (), page_image.get_ysize (),
176
+ blocks))
177
+ segment_page(blocks);
178
+ block_it.set_to_list (blocks);
179
+ if (global_monitor != NULL)
180
+ global_monitor->ocr_alive = TRUE;
181
+
182
+ if (page_image.get_bpp () > 1) {
183
+ set_global_loc_code(LOC_ADAPTIVE);
184
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
185
+ block_it.forward ()) {
186
+ block = block_it.data ();
187
+ pd_it.add_after_then_move (block);
188
+ }
189
+ // adaptive_threshold(&page_image,&pd_blocks,&thresh_image);
190
+ set_global_loc_code(LOC_EDGE_PROG);
191
+ #ifndef EMBEDDED
192
+ previous_cpu = clock ();
193
+ #endif
194
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
195
+ block_it.forward ()) {
196
+ block = block_it.data ();
197
+ if (!polygon_tess_approximation)
198
+ invert_image(&page_image);
199
+ #ifndef GRAPHICS_DISABLED
200
+ extract_edges(NULL, &page_image, &thresh_image, page_tr, block);
201
+ #else
202
+ extract_edges(&page_image, &thresh_image, page_tr, block);
203
+ #endif
204
+ page_box += block->bounding_box ();
205
+ }
206
+ page_image = thresh_image; //everyone else gets it
207
+ }
208
+ else {
209
+ set_global_loc_code(LOC_EDGE_PROG);
210
+ if (!page_image.white_high ())
211
+ invert_image(&page_image);
212
+
213
+ #ifndef EMBEDDED
214
+ previous_cpu = clock ();
215
+ #endif
216
+
217
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
218
+ block_it.forward ()) {
219
+ block = block_it.data ();
220
+ #ifndef GRAPHICS_DISABLED
221
+ extract_edges(NULL, &page_image, &page_image, page_tr, block);
222
+ #else
223
+ extract_edges(&page_image, &page_image, page_tr, block);
224
+ #endif
225
+ page_box += block->bounding_box ();
226
+ }
227
+ }
228
+ if (global_monitor != NULL) {
229
+ global_monitor->ocr_alive = TRUE;
230
+ global_monitor->progress = 10;
231
+ }
232
+
233
+ assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);
234
+ if (global_monitor != NULL)
235
+ global_monitor->ocr_alive = TRUE;
236
+ filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);
237
+ #ifndef EMBEDDED
238
+ previous_cpu = clock ();
239
+ #endif
240
+ filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);
241
+ if (global_monitor != NULL)
242
+ global_monitor->ocr_alive = TRUE;
243
+ textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);
244
+ }
245
+
246
+ /**********************************************************************
247
+ * assign_blobs_to_blocks2
248
+ *
249
+ * Make a list of TO_BLOCKs for portrait and landscape orientation.
250
+ **********************************************************************/
251
+
252
+ void assign_blobs_to_blocks2( //split into groups
253
+ BLOCK_LIST *blocks, //blocks to process
254
+ TO_BLOCK_LIST *land_blocks, //rotated for landscape
255
+ TO_BLOCK_LIST *port_blocks //output list
256
+ ) {
257
+ BLOCK *block; //current block
258
+ BLOBNBOX *newblob; //created blob
259
+ C_BLOB *blob; //current blob
260
+ BLOCK_IT block_it = blocks;
261
+ C_BLOB_IT blob_it; //iterator
262
+ BLOBNBOX_IT port_box_it; //iterator
263
+ //destination iterator
264
+ TO_BLOCK_IT port_block_it = port_blocks;
265
+ TO_BLOCK *port_block; //created block
266
+
267
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
268
+ block_it.forward ()) {
269
+ block = block_it.data ();
270
+ blob_it.set_to_list (block->blob_list ());
271
+ //make one
272
+ port_block = new TO_BLOCK (block);
273
+ //make one
274
+ port_box_it.set_to_list (&port_block->blobs);
275
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
276
+ blob_it.forward ()) {
277
+ blob = blob_it.extract ();
278
+ //convert blob
279
+ newblob = new BLOBNBOX (blob);
280
+ //add to list
281
+ port_box_it.add_after_then_move (newblob);
282
+ //convert blob
283
+ }
284
+ port_block_it.add_after_then_move (port_block);
285
+ }
286
+ }
287
+
288
+
289
+ /**********************************************************************
290
+ * filter_blobs
291
+ *
292
+ * Sort the blobs into sizes in all the blocks for later work.
293
+ **********************************************************************/
294
+
295
+ void filter_blobs( //split into groups
296
+ ICOORD page_tr, //top right
297
+ TO_BLOCK_LIST *blocks, //output list
298
+ BOOL8 testing_on //for plotting
299
+ ) {
300
+ TO_BLOCK_IT block_it = blocks; //destination iterator
301
+ TO_BLOCK *block; //created block
302
+
303
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
304
+ block_it.forward ()) {
305
+ block = block_it.data ();
306
+ block->line_size = filter_noise_blobs (&block->blobs,
307
+ &block->noise_blobs,
308
+ &block->small_blobs,
309
+ &block->large_blobs);
310
+ block->line_spacing =
311
+ block->line_size * (textord_merge_desc + textord_merge_x +
312
+ textord_merge_asc +
313
+ textord_merge_asc) / textord_merge_x;
314
+ block->line_size *= textord_min_linesize;
315
+ block->max_blob_size = block->line_size * textord_excess_blobsize;
316
+ #ifndef GRAPHICS_DISABLED
317
+ if (textord_show_blobs && testing_on) {
318
+ if (to_win == NULL)
319
+ create_to_win(page_tr);
320
+ plot_blob_list (to_win, &block->noise_blobs,
321
+ ScrollView::CORAL, ScrollView::BLUE);
322
+ plot_blob_list (to_win, &block->small_blobs,
323
+ ScrollView::GOLDENROD, ScrollView::YELLOW);
324
+ plot_blob_list (to_win, &block->large_blobs,
325
+ ScrollView::DARK_GREEN, ScrollView::YELLOW);
326
+ plot_blob_list (to_win, &block->blobs,
327
+ ScrollView::WHITE, ScrollView::BROWN);
328
+ }
329
+ if (textord_show_boxes && testing_on) {
330
+ if (to_win == NULL)
331
+ create_to_win(page_tr);
332
+ plot_box_list (to_win, &block->noise_blobs, ScrollView::WHITE);
333
+ plot_box_list (to_win, &block->small_blobs, ScrollView::WHITE);
334
+ plot_box_list (to_win, &block->large_blobs, ScrollView::WHITE);
335
+ plot_box_list (to_win, &block->blobs, ScrollView::WHITE);
336
+ }
337
+ #endif
338
+ }
339
+ }
340
+
341
+
342
+ /**********************************************************************
343
+ * filter_noise_blobs
344
+ *
345
+ * Move small blobs to a separate list.
346
+ **********************************************************************/
347
+
348
+ float filter_noise_blobs( //separate noise
349
+ BLOBNBOX_LIST *src_list, //origonal list
350
+ BLOBNBOX_LIST *noise_list, //noise list
351
+ BLOBNBOX_LIST *small_list, //small blobs
352
+ BLOBNBOX_LIST *large_list //large blobs
353
+ ) {
354
+ inT16 height; //height of blob
355
+ inT16 width; //of blob
356
+ BLOBNBOX_IT src_it = src_list; //iterators
357
+ BLOBNBOX_IT noise_it = noise_list;
358
+ BLOBNBOX_IT small_it = small_list;
359
+ BLOBNBOX_IT large_it = large_list;
360
+ STATS size_stats (0, MAX_NEAREST_DIST);
361
+ //blob heights
362
+ if (textord_new_initial_xheight)
363
+ return filter_noise_blobs2 (src_list, noise_list, small_list, large_list);
364
+ float min_y; //size limits
365
+ float max_y;
366
+ float max_x;
367
+
368
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
369
+ if (src_it.data ()->bounding_box ().height () < textord_max_noise_size)
370
+ noise_it.add_after_then_move (src_it.extract ());
371
+ }
372
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
373
+ size_stats.add (src_it.data ()->bounding_box ().height (), 1);
374
+ }
375
+ min_y = floor (size_stats.ile (textord_blob_size_smallile / 100.0));
376
+ max_y = ceil (size_stats.ile (textord_blob_size_bigile / 100.0));
377
+ max_x = ceil (size_stats.ile (0.5) * textord_width_limit);
378
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
379
+ height = src_it.data ()->bounding_box ().height ();
380
+ width = src_it.data ()->bounding_box ().width ();
381
+ if (height < min_y)
382
+ small_it.add_after_then_move (src_it.extract ());
383
+ else if (height > max_y || width > max_x)
384
+ large_it.add_after_then_move (src_it.extract ());
385
+ }
386
+ return size_stats.ile (textord_initialx_ile);
387
+ }
388
+
389
+
390
+ /**********************************************************************
391
+ * filter_noise_blobs2
392
+ *
393
+ * Move small blobs to a separate list.
394
+ **********************************************************************/
395
+
396
+ float filter_noise_blobs2( //separate noise
397
+ BLOBNBOX_LIST *src_list, //origonal list
398
+ BLOBNBOX_LIST *noise_list, //noise list
399
+ BLOBNBOX_LIST *small_list, //small blobs
400
+ BLOBNBOX_LIST *large_list //large blobs
401
+ ) {
402
+ inT16 height; //height of blob
403
+ inT16 width; //of blob
404
+ BLOBNBOX *blob; //current blob
405
+ float initial_x; //first guess
406
+ BLOBNBOX_IT src_it = src_list; //iterators
407
+ BLOBNBOX_IT noise_it = noise_list;
408
+ BLOBNBOX_IT small_it = small_list;
409
+ BLOBNBOX_IT large_it = large_list;
410
+ STATS size_stats (0, MAX_NEAREST_DIST);
411
+ //blob heights
412
+ float min_y; //size limits
413
+ float max_y;
414
+ float max_x;
415
+ float max_height; //of good blobs
416
+
417
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
418
+ blob = src_it.data ();
419
+ if (blob->bounding_box ().height () < textord_max_noise_size)
420
+ noise_it.add_after_then_move (src_it.extract ());
421
+ else if (blob->enclosed_area () >= blob->bounding_box ().height ()
422
+ * blob->bounding_box ().width () * textord_noise_area_ratio)
423
+ small_it.add_after_then_move (src_it.extract ());
424
+ }
425
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
426
+ size_stats.add (src_it.data ()->bounding_box ().height (), 1);
427
+ }
428
+ initial_x = size_stats.ile (textord_initialx_ile);
429
+ max_y =
430
+ ceil (initial_x *
431
+ (textord_merge_desc + textord_merge_x +
432
+ 2 * textord_merge_asc) / textord_merge_x);
433
+ min_y = floor (initial_x / 2);
434
+ max_x = ceil (initial_x * textord_width_limit);
435
+ small_it.move_to_first ();
436
+ for (small_it.mark_cycle_pt (); !small_it.cycled_list ();
437
+ small_it.forward ()) {
438
+ height = small_it.data()->bounding_box().height();
439
+ if (height > max_y)
440
+ large_it.add_after_then_move(small_it.extract ());
441
+ else if (height >= min_y)
442
+ src_it.add_after_then_move(small_it.extract ());
443
+ }
444
+ size_stats.clear ();
445
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
446
+ height = src_it.data ()->bounding_box ().height ();
447
+ width = src_it.data ()->bounding_box ().width ();
448
+ if (height < min_y)
449
+ small_it.add_after_then_move (src_it.extract ());
450
+ else if (height > max_y || width > max_x)
451
+ large_it.add_after_then_move (src_it.extract ());
452
+ else
453
+ size_stats.add (height, 1);
454
+ }
455
+ max_height = size_stats.ile (textord_initialasc_ile);
456
+ // printf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,",
457
+ // max_y,min_y,initial_x,max_height);
458
+ max_height *= textord_merge_x / (textord_merge_x + textord_merge_asc);
459
+ if (max_height > initial_x)
460
+ initial_x = max_height;
461
+ // printf(" ret=%g\n",initial_x);
462
+ return initial_x;
463
+ }
464
+
465
+
466
+ /**********************************************************************
467
+ * textord_page
468
+ *
469
+ * Textord the list of blobs and return a list of proper blocks.
470
+ **********************************************************************/
471
+
472
+ void textord_page( //make rows & words
473
+ ICOORD page_tr, //top right
474
+ BLOCK_LIST *blocks, //block list
475
+ TO_BLOCK_LIST *land_blocks, //rotated for landscape
476
+ TO_BLOCK_LIST *port_blocks //output list
477
+ ) {
478
+ float gradient; //global skew
479
+
480
+ set_global_loc_code(LOC_TEXT_ORD_ROWS);
481
+ gradient = make_rows (page_tr, blocks, land_blocks, port_blocks);
482
+ if (global_monitor != NULL) {
483
+ global_monitor->ocr_alive = TRUE;
484
+ global_monitor->progress = 20;
485
+ }
486
+ set_global_loc_code(LOC_TEXT_ORD_WORDS);
487
+ make_words(page_tr, gradient, blocks, land_blocks, port_blocks);
488
+ if (global_monitor != NULL) {
489
+ global_monitor->ocr_alive = TRUE;
490
+ global_monitor->progress = 30;
491
+ }
492
+ cleanup_blocks(blocks); //remove empties
493
+ #ifndef GRAPHICS_DISABLED
494
+ close_to_win();
495
+ #endif
496
+ if (textord_exit_after && !interactive_mode)
497
+ exit (0);
498
+ }
499
+
500
+
501
+ /**********************************************************************
502
+ * cleanup_blocks
503
+ *
504
+ * Delete empty blocks, rows from the page.
505
+ **********************************************************************/
506
+
507
+ void cleanup_blocks( //remove empties
508
+ BLOCK_LIST *blocks //list
509
+ ) {
510
+ BLOCK_IT block_it = blocks; //iterator
511
+ ROW_IT row_it; //row iterator
512
+
513
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
514
+ block_it.forward ()) {
515
+ row_it.set_to_list (block_it.data ()->row_list ());
516
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
517
+ if ((textord_noise_rejrows
518
+ && !row_it.data ()->word_list ()->empty ()
519
+ && clean_noise_from_row (row_it.data ()))
520
+ || row_it.data ()->word_list ()->empty ())
521
+ delete row_it.extract ();//lose empty row
522
+ else {
523
+ if (textord_noise_rejwords)
524
+ clean_noise_from_words (row_it.data ());
525
+ if (textord_blshift_maxshift >= 0)
526
+ tweak_row_baseline (row_it.data ());
527
+ }
528
+ }
529
+ if (block_it.data ()->row_list ()->empty ()) {
530
+ delete block_it.extract ();//lose empty block
531
+ }
532
+ }
533
+ }
534
+
535
+
536
+ /**********************************************************************
537
+ * clean_noise_from_row
538
+ *
539
+ * Move blobs of words from rows of garbage into the reject blobs list.
540
+ **********************************************************************/
541
+
542
+ BOOL8 clean_noise_from_row( //remove empties
543
+ ROW *row //row to clean
544
+ ) {
545
+ BOOL8 testing_on;
546
+ TBOX blob_box; //bounding box
547
+ C_BLOB *blob; //current blob
548
+ C_OUTLINE *outline; //current outline
549
+ WERD *word; //current word
550
+ inT32 blob_size; //biggest size
551
+ inT32 trans_count = 0; //no of transitions
552
+ inT32 trans_threshold; //noise tolerance
553
+ inT32 dot_count; //small objects
554
+ inT32 norm_count; //normal objects
555
+ inT32 super_norm_count; //real char-like
556
+ //words of row
557
+ WERD_IT word_it = row->word_list ();
558
+ C_BLOB_IT blob_it; //blob iterator
559
+ C_OUTLINE_IT out_it; //outline iterator
560
+
561
+ if (textord_test_y > row->base_line (textord_test_x)
562
+ && textord_show_blobs
563
+ && textord_test_y < row->base_line (textord_test_x) + row->x_height ())
564
+ testing_on = TRUE;
565
+ else
566
+ testing_on = FALSE;
567
+ dot_count = 0;
568
+ norm_count = 0;
569
+ super_norm_count = 0;
570
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
571
+ word = word_it.data (); //current word
572
+ //blobs in word
573
+ blob_it.set_to_list (word->cblob_list ());
574
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
575
+ blob_it.forward ()) {
576
+ blob = blob_it.data ();
577
+ if (!word->flag (W_DONT_CHOP)) {
578
+ //get outlines
579
+ out_it.set_to_list (blob->out_list ());
580
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
581
+ out_it.forward ()) {
582
+ outline = out_it.data ();
583
+ blob_box = outline->bounding_box ();
584
+ blob_size =
585
+ blob_box.width () >
586
+ blob_box.height ()? blob_box.width () : blob_box.
587
+ height();
588
+ if (blob_size < textord_noise_sizelimit * row->x_height ())
589
+ dot_count++; //count smal outlines
590
+ if (!outline->child ()->empty ()
591
+ && blob_box.height () <
592
+ (1 + textord_noise_syfract) * row->x_height ()
593
+ && blob_box.height () >
594
+ (1 - textord_noise_syfract) * row->x_height ()
595
+ && blob_box.width () <
596
+ (1 + textord_noise_sxfract) * row->x_height ()
597
+ && blob_box.width () >
598
+ (1 - textord_noise_sxfract) * row->x_height ())
599
+ super_norm_count++; //count smal outlines
600
+ }
601
+ }
602
+ else
603
+ super_norm_count++;
604
+ blob_box = blob->bounding_box ();
605
+ blob_size =
606
+ blob_box.width () >
607
+ blob_box.height ()? blob_box.width () : blob_box.height ();
608
+ if (blob_size >= textord_noise_sizelimit * row->x_height ()
609
+ && blob_size < row->x_height () * 2) {
610
+ trans_threshold = blob_size / textord_noise_sizefraction;
611
+ trans_count = blob->count_transitions (trans_threshold);
612
+ if (trans_count < textord_noise_translimit)
613
+ norm_count++;
614
+ }
615
+ else if (blob_box.height () > row->x_height () * 2
616
+ && (!word_it.at_first () || !blob_it.at_first ()))
617
+ dot_count += 2;
618
+ #ifndef SECURE_NAMES
619
+ if (testing_on) {
620
+ tprintf
621
+ ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n",
622
+ blob_box.left (), blob_box.bottom (), blob_box.right (),
623
+ blob_box.top (), blob->out_list ()->length (), trans_count,
624
+ blob_box.bottom () - row->base_line (blob_box.left ()));
625
+ }
626
+ #endif
627
+ }
628
+ }
629
+ #ifndef SECURE_NAMES
630
+ if (textord_noise_debug) {
631
+ tprintf ("Row ending at (%d,%g):",
632
+ blob_box.right (), row->base_line (blob_box.right ()));
633
+ tprintf (" R=%g, dc=%d, nc=%d, %s\n",
634
+ norm_count > 0 ? (float) dot_count / norm_count : 9999,
635
+ dot_count, norm_count,
636
+ dot_count > norm_count * textord_noise_normratio
637
+ && dot_count > 2 ? "REJECTED" : "ACCEPTED");
638
+ }
639
+ #endif
640
+ return super_norm_count < textord_noise_sncount
641
+ && dot_count > norm_count * textord_noise_rowratio && dot_count > 2;
642
+ }
643
+
644
+
645
+ /**********************************************************************
646
+ * clean_noise_from_words
647
+ *
648
+ * Move blobs of words from rows of garbage into the reject blobs list.
649
+ **********************************************************************/
650
+
651
+ void clean_noise_from_words( //remove empties
652
+ ROW *row //row to clean
653
+ ) {
654
+ TBOX blob_box; //bounding box
655
+ inT8 *word_dud; //was it chucked
656
+ C_BLOB *blob; //current blob
657
+ C_OUTLINE *outline; //current outline
658
+ WERD *word; //current word
659
+ inT32 blob_size; //biggest size
660
+ inT32 trans_count; //no of transitions
661
+ inT32 trans_threshold; //noise tolerance
662
+ inT32 dot_count; //small objects
663
+ inT32 norm_count; //normal objects
664
+ inT32 dud_words; //number discarded
665
+ inT32 ok_words; //number remaining
666
+ inT32 word_index; //current word
667
+ //words of row
668
+ WERD_IT word_it = row->word_list ();
669
+ C_BLOB_IT blob_it; //blob iterator
670
+ C_OUTLINE_IT out_it; //outline iterator
671
+
672
+ ok_words = word_it.length ();
673
+ if (ok_words == 0 || textord_no_rejects)
674
+ return;
675
+ word_dud = (inT8 *) alloc_mem (ok_words * sizeof (inT8));
676
+ dud_words = 0;
677
+ ok_words = 0;
678
+ word_index = 0;
679
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
680
+ word = word_it.data (); //current word
681
+ dot_count = 0;
682
+ norm_count = 0;
683
+ //blobs in word
684
+ blob_it.set_to_list (word->cblob_list ());
685
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
686
+ blob_it.forward ()) {
687
+ blob = blob_it.data ();
688
+ if (!word->flag (W_DONT_CHOP)) {
689
+ //get outlines
690
+ out_it.set_to_list (blob->out_list ());
691
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
692
+ out_it.forward ()) {
693
+ outline = out_it.data ();
694
+ blob_box = outline->bounding_box ();
695
+ blob_size =
696
+ blob_box.width () >
697
+ blob_box.height ()? blob_box.width () : blob_box.
698
+ height();
699
+ if (blob_size < textord_noise_sizelimit * row->x_height ())
700
+ dot_count++; //count smal outlines
701
+ if (!outline->child ()->empty ()
702
+ && blob_box.height () <
703
+ (1 + textord_noise_syfract) * row->x_height ()
704
+ && blob_box.height () >
705
+ (1 - textord_noise_syfract) * row->x_height ()
706
+ && blob_box.width () <
707
+ (1 + textord_noise_sxfract) * row->x_height ()
708
+ && blob_box.width () >
709
+ (1 - textord_noise_sxfract) * row->x_height ())
710
+ norm_count++; //count smal outlines
711
+ }
712
+ }
713
+ else
714
+ norm_count++;
715
+ blob_box = blob->bounding_box ();
716
+ blob_size =
717
+ blob_box.width () >
718
+ blob_box.height ()? blob_box.width () : blob_box.height ();
719
+ if (blob_size >= textord_noise_sizelimit * row->x_height ()
720
+ && blob_size < row->x_height () * 2) {
721
+ trans_threshold = blob_size / textord_noise_sizefraction;
722
+ trans_count = blob->count_transitions (trans_threshold);
723
+ if (trans_count < textord_noise_translimit)
724
+ norm_count++;
725
+ }
726
+ else if (blob_box.height () > row->x_height () * 2
727
+ && (!word_it.at_first () || !blob_it.at_first ()))
728
+ dot_count += 2;
729
+ }
730
+ if (dot_count > 2) {
731
+ if (dot_count > norm_count * textord_noise_normratio * 2)
732
+ word_dud[word_index] = 2;
733
+ else if (dot_count > norm_count * textord_noise_normratio)
734
+ word_dud[word_index] = 1;
735
+ else
736
+ word_dud[word_index] = 0;
737
+ }
738
+ else
739
+ word_dud[word_index] = 0;
740
+ if (word_dud[word_index] == 2)
741
+ dud_words++;
742
+ else
743
+ ok_words++;
744
+ word_index++;
745
+ }
746
+
747
+ word_index = 0;
748
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
749
+ if (word_dud[word_index] == 2
750
+ || (word_dud[word_index] == 1 && dud_words > ok_words)) {
751
+ word = word_it.data (); //current word
752
+ //rejected blobs
753
+ blob_it.set_to_list (word->rej_cblob_list ());
754
+ //move from blobs
755
+ blob_it.add_list_after (word->cblob_list ());
756
+ }
757
+ word_index++;
758
+ }
759
+ free_mem(word_dud);
760
+ }
761
+
762
+
763
+ /**********************************************************************
764
+ * tweak_row_baseline
765
+ *
766
+ * Shift baseline to fit the blobs more accurately where they are
767
+ * close enough.
768
+ **********************************************************************/
769
+
770
+ void tweak_row_baseline( //remove empties
771
+ ROW *row //row to clean
772
+ ) {
773
+ TBOX blob_box; //bounding box
774
+ C_BLOB *blob; //current blob
775
+ WERD *word; //current word
776
+ inT32 blob_count; //no of blobs
777
+ inT32 src_index; //source segment
778
+ inT32 dest_index; //destination segment
779
+ inT32 *xstarts; //spline segments
780
+ double *coeffs; //spline coeffs
781
+ float ydiff; //baseline error
782
+ float x_centre; //centre of blob
783
+ //words of row
784
+ WERD_IT word_it = row->word_list ();
785
+ C_BLOB_IT blob_it; //blob iterator
786
+
787
+ blob_count = 0;
788
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
789
+ word = word_it.data (); //current word
790
+ //get total blobs
791
+ blob_count += word->cblob_list ()->length ();
792
+ }
793
+ if (blob_count == 0)
794
+ return;
795
+ xstarts =
796
+ (inT32 *) alloc_mem ((blob_count + row->baseline.segments + 1) *
797
+ sizeof (inT32));
798
+ coeffs =
799
+ (double *) alloc_mem ((blob_count + row->baseline.segments) * 3 *
800
+ sizeof (double));
801
+
802
+ src_index = 0;
803
+ dest_index = 0;
804
+ xstarts[0] = row->baseline.xcoords[0];
805
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
806
+ word = word_it.data (); //current word
807
+ //blobs in word
808
+ blob_it.set_to_list (word->cblob_list ());
809
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
810
+ blob_it.forward ()) {
811
+ blob = blob_it.data ();
812
+ blob_box = blob->bounding_box ();
813
+ x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
814
+ ydiff = blob_box.bottom () - row->base_line (x_centre);
815
+ if (ydiff < 0)
816
+ ydiff = -ydiff / row->x_height ();
817
+ else
818
+ ydiff = ydiff / row->x_height ();
819
+ if (ydiff < textord_blshift_maxshift
820
+ && blob_box.height () / row->x_height () >
821
+ textord_blshift_xfraction) {
822
+ if (xstarts[dest_index] >= x_centre)
823
+ xstarts[dest_index] = blob_box.left ();
824
+ coeffs[dest_index * 3] = 0;
825
+ coeffs[dest_index * 3 + 1] = 0;
826
+ coeffs[dest_index * 3 + 2] = blob_box.bottom ();
827
+ //shift it
828
+ dest_index++;
829
+ xstarts[dest_index] = blob_box.right () + 1;
830
+ }
831
+ else {
832
+ if (xstarts[dest_index] <= x_centre) {
833
+ while (row->baseline.xcoords[src_index + 1] <= x_centre
834
+ && src_index < row->baseline.segments - 1) {
835
+ if (row->baseline.xcoords[src_index + 1] >
836
+ xstarts[dest_index]) {
837
+ coeffs[dest_index * 3] =
838
+ row->baseline.quadratics[src_index].a;
839
+ coeffs[dest_index * 3 + 1] =
840
+ row->baseline.quadratics[src_index].b;
841
+ coeffs[dest_index * 3 + 2] =
842
+ row->baseline.quadratics[src_index].c;
843
+ dest_index++;
844
+ xstarts[dest_index] =
845
+ row->baseline.xcoords[src_index + 1];
846
+ }
847
+ src_index++;
848
+ }
849
+ coeffs[dest_index * 3] =
850
+ row->baseline.quadratics[src_index].a;
851
+ coeffs[dest_index * 3 + 1] =
852
+ row->baseline.quadratics[src_index].b;
853
+ coeffs[dest_index * 3 + 2] =
854
+ row->baseline.quadratics[src_index].c;
855
+ dest_index++;
856
+ xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
857
+ }
858
+ }
859
+ }
860
+ }
861
+ while (src_index < row->baseline.segments
862
+ && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index])
863
+ src_index++;
864
+ while (src_index < row->baseline.segments) {
865
+ coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
866
+ coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
867
+ coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
868
+ dest_index++;
869
+ src_index++;
870
+ xstarts[dest_index] = row->baseline.xcoords[src_index];
871
+ }
872
+ //turn to spline
873
+ row->baseline = QSPLINE (dest_index, xstarts, coeffs);
874
+ free_mem(xstarts);
875
+ free_mem(coeffs);
876
+ }
877
+
878
+
879
+ /**********************************************************************
880
+ * blob_y_order
881
+ *
882
+ * Sort function to sort blobs in y from page top.
883
+ **********************************************************************/
884
+
885
+ inT32 blob_y_order( //sort function
886
+ void *item1, //items to compare
887
+ void *item2) {
888
+ //converted ptr
889
+ BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
890
+ //converted ptr
891
+ BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
892
+
893
+ if (blob1->bounding_box ().bottom () > blob2->bounding_box ().bottom ())
894
+ return -1;
895
+ else if (blob1->bounding_box ().bottom () <
896
+ blob2->bounding_box ().bottom ())
897
+ return 1;
898
+ else {
899
+ if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
900
+ return -1;
901
+ else if (blob1->bounding_box ().left () >
902
+ blob2->bounding_box ().left ())
903
+ return 1;
904
+ else
905
+ return 0;
906
+ }
907
+ }