tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,193 @@
1
+ /**********************************************************************
2
+ * to_spacing
3
+ *
4
+ * Compute fuzzy word spacing thresholds for each row.
5
+ **********************************************************************/
6
+
7
+ #ifndef TOSPACE_H
8
+ #define TOSPACE_H
9
+
10
+ #include "blobbox.h"
11
+ #include "gap_map.h"
12
+ #include "statistc.h"
13
+ #include "notdll.h"
14
+ extern BOOL_VAR_H (tosp_old_to_method, FALSE, "Space stats use prechopping?");
15
+ extern BOOL_VAR_H (tosp_only_use_prop_rows, TRUE,
16
+ "Block stats to use fixed pitch rows?");
17
+ extern BOOL_VAR_H (tosp_use_pre_chopping, FALSE,
18
+ "Space stats use prechopping?");
19
+ extern BOOL_VAR_H (tosp_old_to_bug_fix, FALSE,
20
+ "Fix suspected bug in old code");
21
+ extern BOOL_VAR_H (tosp_block_use_cert_spaces, TRUE,
22
+ "Only stat OBVIOUS spaces");
23
+ extern BOOL_VAR_H (tosp_row_use_cert_spaces, TRUE,
24
+ "Only stat OBVIOUS spaces");
25
+ extern BOOL_VAR_H (tosp_narrow_blobs_not_cert, TRUE,
26
+ "Only stat OBVIOUS spaces");
27
+ extern BOOL_VAR_H (tosp_row_use_cert_spaces1, TRUE,
28
+ "Only stat OBVIOUS spaces");
29
+ extern BOOL_VAR_H (tosp_recovery_isolated_row_stats, TRUE,
30
+ "Use row alone when inadequate cert spaces");
31
+ extern BOOL_VAR_H (tosp_only_small_gaps_for_kern, FALSE, "Better guess");
32
+ extern BOOL_VAR_H (tosp_all_flips_fuzzy, FALSE, "Pass ANY flip to context?");
33
+ extern BOOL_VAR_H (tosp_fuzzy_limit_all, TRUE,
34
+ "Dont restrict kn->sp fuzzy limit to tables");
35
+ extern BOOL_VAR_H (tosp_stats_use_xht_gaps, TRUE,
36
+ "Use within xht gap for wd breaks");
37
+ extern BOOL_VAR_H (tosp_use_xht_gaps, TRUE,
38
+ "Use within xht gap for wd breaks");
39
+ extern BOOL_VAR_H (tosp_only_use_xht_gaps, FALSE,
40
+ "Only use within xht gap for wd breaks");
41
+ extern BOOL_VAR_H (tosp_rule_9_test_punct, FALSE,
42
+ "Dont chng kn to space next to punct");
43
+ extern BOOL_VAR_H (tosp_flip_fuzz_kn_to_sp, TRUE, "Default flip");
44
+ extern BOOL_VAR_H (tosp_flip_fuzz_sp_to_kn, TRUE, "Default flip");
45
+ extern BOOL_VAR_H (tosp_improve_thresh, FALSE,
46
+ "Enable improvement heuristic");
47
+ extern INT_VAR_H (tosp_debug_level, 0, "Debug data");
48
+ extern INT_VAR_H (tosp_enough_space_samples_for_median, 3,
49
+ "or should we use mean");
50
+ extern INT_VAR_H (tosp_redo_kern_limit, 10,
51
+ "No.samples reqd to reestimate for row");
52
+ extern INT_VAR_H (tosp_few_samples, 40,
53
+ "No.gaps reqd with 1 large gap to treat as a table");
54
+ extern INT_VAR_H (tosp_short_row, 20,
55
+ "No.gaps reqd with few cert spaces to use certs");
56
+ extern INT_VAR_H (tosp_sanity_method, 1, "How to avoid being silly");
57
+ extern double_VAR_H (tosp_threshold_bias1, 0,
58
+ "how far between kern and space?");
59
+ extern double_VAR_H (tosp_threshold_bias2, 0,
60
+ "how far between kern and space?");
61
+ extern double_VAR_H (tosp_narrow_fraction, 0.3,
62
+ "Fract of xheight for narrow");
63
+ extern double_VAR_H (tosp_narrow_aspect_ratio, 0.48,
64
+ "narrow if w/h less than this");
65
+ extern double_VAR_H (tosp_wide_fraction, 0.52, "Fract of xheight for wide");
66
+ extern double_VAR_H (tosp_wide_aspect_ratio, 0.0,
67
+ "wide if w/h less than this");
68
+ extern double_VAR_H (tosp_fuzzy_space_factor, 0.6,
69
+ "Fract of xheight for fuzz sp");
70
+ extern double_VAR_H (tosp_fuzzy_space_factor1, 0.5,
71
+ "Fract of xheight for fuzz sp");
72
+ extern double_VAR_H (tosp_fuzzy_space_factor2, 0.72,
73
+ "Fract of xheight for fuzz sp");
74
+ extern double_VAR_H (tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");
75
+ extern double_VAR_H (tosp_kern_gap_factor1, 2.0,
76
+ "gap ratio to flip kern->sp");
77
+ extern double_VAR_H (tosp_kern_gap_factor2, 1.3,
78
+ "gap ratio to flip kern->sp");
79
+ extern double_VAR_H (tosp_kern_gap_factor3, 2.5,
80
+ "gap ratio to flip kern->sp");
81
+ extern double_VAR_H (tosp_ignore_big_gaps, -1, "xht multiplier");
82
+ extern double_VAR_H (tosp_ignore_very_big_gaps, 3.5, "xht multiplier");
83
+ extern double_VAR_H (tosp_rep_space, 1.6, "rep gap multiplier for space");
84
+ extern double_VAR_H (tosp_enough_small_gaps, 0.65,
85
+ "Fract of kerns reqd for isolated row stats");
86
+ extern double_VAR_H (tosp_table_kn_sp_ratio, 2.25,
87
+ "Min difference of kn & sp in table");
88
+ extern double_VAR_H (tosp_table_xht_sp_ratio, 0.33,
89
+ "Expect spaces bigger than this");
90
+ extern double_VAR_H (tosp_table_fuzzy_kn_sp_ratio, 3.0,
91
+ "Fuzzy if less than this");
92
+ extern double_VAR_H (tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
93
+ extern double_VAR_H (tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
94
+ extern double_VAR_H (tosp_min_sane_kn_sp, 1.5,
95
+ "Dont trust spaces less than this time kn");
96
+ extern double_VAR_H (tosp_init_guess_kn_mult, 2.2,
97
+ "Thresh guess - mult kn by this");
98
+ extern double_VAR_H (tosp_init_guess_xht_mult, 0.28,
99
+ "Thresh guess - mult xht by this");
100
+ extern double_VAR_H (tosp_max_sane_kn_thresh, 5.0,
101
+ "Multiplier on kn to limit thresh");
102
+ extern double_VAR_H (tosp_flip_caution, 0.0,
103
+ "Dont autoflip kn to sp when large separation");
104
+ extern double_VAR_H (tosp_large_kerning, 0.19,
105
+ "Limit use of xht gap with large kns");
106
+ extern double_VAR_H (tosp_dont_fool_with_small_kerns, -1,
107
+ "Limit use of xht gap with odd small kns");
108
+ extern double_VAR_H (tosp_near_lh_edge, 0,
109
+ "Dont reduce box if the top left is non blank");
110
+ extern double_VAR_H (tosp_silly_kn_sp_gap, 0.2,
111
+ "Dont let sp minus kn get too small");
112
+ extern double_VAR_H (tosp_pass_wide_fuzz_sp_to_context, 0.75,
113
+ "How wide fuzzies need context");
114
+ void to_spacing( //set spacing
115
+ ICOORD page_tr, //topright of page
116
+ TO_BLOCK_LIST *blocks //blocks on page
117
+ );
118
+ //DEBUG USE ONLY
119
+ void block_spacing_stats(TO_BLOCK *block,
120
+ GAPMAP *gapmap,
121
+ BOOL8 &old_text_ord_proportional,
122
+ inT16 &block_space_gap_width, //resulting estimate
123
+ inT16 &block_non_space_gap_width //resulting estimate
124
+ );
125
+ //estimate for block
126
+ void row_spacing_stats(TO_ROW *row,
127
+ GAPMAP *gapmap,
128
+ inT16 block_idx,
129
+ inT16 row_idx,
130
+ inT16 block_space_gap_width,
131
+ inT16 block_non_space_gap_width //estimate for block
132
+ );
133
+ //estimate for block
134
+ void old_to_method(TO_ROW *row,
135
+ STATS *all_gap_stats,
136
+ STATS *space_gap_stats,
137
+ STATS *small_gap_stats,
138
+ inT16 block_space_gap_width,
139
+ inT16 block_non_space_gap_width //estimate for block
140
+ );
141
+ BOOL8 isolated_row_stats(TO_ROW *row,
142
+ GAPMAP *gapmap,
143
+ STATS *all_gap_stats,
144
+ BOOL8 suspected_table,
145
+ inT16 block_idx,
146
+ inT16 row_idx);
147
+ inT16 stats_count_under(STATS *stats, inT16 threshold);
148
+ void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
149
+ ROW *make_prop_words( //find lines
150
+ TO_ROW *row, //row to make
151
+ FCOORD rotation //for drawing
152
+ );
153
+ BOOL8 make_a_word_break( //decide on word break
154
+ TO_ROW *row, //row being made
155
+ TBOX blob_box, //for next_blob //how many blanks?
156
+ inT16 prev_gap,
157
+ TBOX prev_blob_box,
158
+ inT16 real_current_gap,
159
+ inT16 within_xht_current_gap,
160
+ TBOX next_blob_box,
161
+ inT16 next_gap,
162
+ uinT8 &blanks,
163
+ BOOL8 &fuzzy_sp,
164
+ BOOL8 &fuzzy_non);
165
+ BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box);
166
+ BOOL8 wide_blob(TO_ROW *row, TBOX blob_box);
167
+ BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box);
168
+ //A COPY FOR PEEKING
169
+ void peek_at_next_gap(TO_ROW *row,
170
+ BLOBNBOX_IT box_it,
171
+ TBOX &next_blob_box,
172
+ inT16 &next_gap,
173
+ inT16 &next_within_xht_gap);
174
+ void mark_gap( //Debug stuff
175
+ TBOX blob, //blob following gap
176
+ inT16 rule, // heuristic id
177
+ inT16 prev_gap,
178
+ inT16 prev_blob_width,
179
+ inT16 current_gap,
180
+ inT16 next_blob_width,
181
+ inT16 next_gap);
182
+ float find_mean_blob_spacing(WERD *word);
183
+ BOOL8 ignore_big_gap(TO_ROW *row,
184
+ inT32 row_length,
185
+ GAPMAP *gapmap,
186
+ inT16 left,
187
+ inT16 right);
188
+ TBOX reduced_box_next( //get bounding box
189
+ TO_ROW *row, //current row
190
+ BLOBNBOX_IT *it //iterator to blobds
191
+ );
192
+ TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht);
193
+ #endif
@@ -0,0 +1,87 @@
1
+ /**********************************************************************
2
+ * File: tovars.cpp (Formerly to_vars.c)
3
+ * Description: Variables used by textord.
4
+ * Author: Ray Smith
5
+ * Created: Tue Aug 24 16:55:02 BST 1993
6
+ *
7
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #include "mfcpch.h"
21
+ #include "tovars.h"
22
+
23
+ #define EXTERN
24
+
25
+ EXTERN BOOL_VAR (textord_show_initial_words, FALSE, "Display separate words");
26
+ EXTERN BOOL_VAR (textord_show_new_words, FALSE, "Display separate words");
27
+ EXTERN BOOL_VAR (textord_show_fixed_words, FALSE,
28
+ "Display forced fixed pitch words");
29
+ EXTERN BOOL_VAR (textord_blocksall_fixed, FALSE, "Moan about prop blocks");
30
+ EXTERN BOOL_VAR (textord_blocksall_prop, FALSE,
31
+ "Moan about fixed pitch blocks");
32
+ EXTERN BOOL_VAR (textord_blocksall_testing, FALSE, "Dump stats when moaning");
33
+ EXTERN BOOL_VAR (textord_test_mode, FALSE, "Do current test");
34
+ EXTERN BOOL_VAR (textord_repeat_extraction, TRUE, "Extract repeated chars");
35
+ EXTERN INT_VAR (textord_dotmatrix_gap, 3,
36
+ "Max pixel gap for broken pixed pitch");
37
+ EXTERN INT_VAR (textord_repeat_threshold, 4,
38
+ "Min multiple for repeated char");
39
+ EXTERN INT_VAR (textord_debug_block, 0, "Block to do debug on");
40
+ EXTERN INT_VAR (textord_pitch_range, 2, "Max range test on pitch");
41
+ EXTERN double_VAR (textord_repeat_rating, 6, "Min rating for equal blobs");
42
+ EXTERN double_VAR (textord_wordstats_smooth_factor, 0.05,
43
+ "Smoothing gap stats");
44
+ EXTERN double_VAR (textord_width_smooth_factor, 0.10,
45
+ "Smoothing width stats");
46
+ EXTERN double_VAR (textord_words_width_ile, 0.4,
47
+ "Ile of blob widths for space est");
48
+ EXTERN double_VAR (textord_words_maxspace, 4.0, "Multiple of xheight");
49
+ EXTERN double_VAR (textord_words_default_maxspace, 3.5,
50
+ "Max believable third space");
51
+ EXTERN double_VAR (textord_words_default_minspace, 0.6,
52
+ "Fraction of xheight");
53
+ EXTERN double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight");
54
+ EXTERN double_VAR (textord_words_default_nonspace, 0.2,
55
+ "Fraction of xheight");
56
+ EXTERN double_VAR (textord_words_initial_lower, 0.25,
57
+ "Max inital cluster size");
58
+ EXTERN double_VAR (textord_words_initial_upper, 0.15,
59
+ "Min initial cluster spacing");
60
+ EXTERN double_VAR (textord_words_minlarge, 0.75,
61
+ "Fraction of valid gaps needed");
62
+ EXTERN double_VAR (textord_words_pitchsd_threshold, 0.040,
63
+ "Pitch sync threshold");
64
+ EXTERN double_VAR (textord_words_def_fixed, 0.016,
65
+ "Threshold for definite fixed");
66
+ EXTERN double_VAR (textord_words_def_prop, 0.090,
67
+ "Threshold for definite prop");
68
+ EXTERN INT_VAR (textord_words_veto_power, 5,
69
+ "Rows required to outvote a veto");
70
+ EXTERN double_VAR (textord_pitch_rowsimilarity, 0.08,
71
+ "Fraction of xheight for sameness");
72
+ EXTERN BOOL_VAR (textord_pitch_scalebigwords, FALSE,
73
+ "Scale scores on big words");
74
+ EXTERN double_VAR (words_initial_lower, 0.5, "Max inital cluster size");
75
+ EXTERN double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing");
76
+ EXTERN double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight");
77
+ EXTERN double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight");
78
+ EXTERN double_VAR (words_default_fixed_limit, 0.6, "Allowed size variance");
79
+ EXTERN double_VAR (textord_words_definite_spread, 0.30,
80
+ "Non-fuzzy spacing region");
81
+ EXTERN double_VAR (textord_spacesize_ratiofp, 2.8,
82
+ "Min ratio space/nonspace");
83
+ EXTERN double_VAR (textord_spacesize_ratioprop, 2.0,
84
+ "Min ratio space/nonspace");
85
+ EXTERN double_VAR (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold");
86
+ EXTERN double_VAR (textord_max_pitch_iqr, 0.20, "Xh fraction noise in pitch");
87
+ EXTERN double_VAR (textord_fp_min_width, 0.5, "Min width of decent blobs");
@@ -0,0 +1,94 @@
1
+ /**********************************************************************
2
+ * File: tovars.h (Formerly to_vars.h)
3
+ * Description: Variables used by textord.
4
+ * Author: Ray Smith
5
+ * Created: Tue Aug 24 16:55:02 BST 1993
6
+ *
7
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #ifndef TOVARS_H
21
+ #define TOVARS_H
22
+
23
+ #include "varable.h"
24
+ #include "notdll.h"
25
+
26
+ extern BOOL_VAR_H (textord_show_initial_words, FALSE,
27
+ "Display separate words");
28
+ extern BOOL_VAR_H (textord_show_new_words, FALSE, "Display separate words");
29
+ extern BOOL_VAR_H (textord_show_fixed_words, FALSE,
30
+ "Display forced fixed pitch words");
31
+ extern BOOL_VAR_H (textord_blocksall_fixed, FALSE, "Moan about prop blocks");
32
+ extern BOOL_VAR_H (textord_blocksall_prop, FALSE,
33
+ "Moan about fixed pitch blocks");
34
+ extern BOOL_VAR_H (textord_blocksall_testing, FALSE,
35
+ "Dump stats when moaning");
36
+ extern BOOL_VAR_H (textord_test_mode, FALSE, "Do current test");
37
+ extern BOOL_VAR_H (textord_repeat_extraction, TRUE, "Extract repeated chars");
38
+ extern INT_VAR_H (textord_dotmatrix_gap, 3,
39
+ "Max pixel gap for broken pixed pitch");
40
+ extern INT_VAR_H (textord_repeat_threshold, 4,
41
+ "Min multiple for repeated char");
42
+ extern INT_VAR_H (textord_debug_block, 0, "Block to do debug on");
43
+ extern INT_VAR_H (textord_pitch_range, 2, "Max range test on pitch");
44
+ extern double_VAR_H (textord_repeat_rating, 6, "Min rating for equal blobs");
45
+ extern double_VAR_H (textord_wordstats_smooth_factor, 0.05,
46
+ "Smoothing gap stats");
47
+ extern double_VAR_H (textord_width_smooth_factor, 0.10,
48
+ "Smoothing width stats");
49
+ extern double_VAR_H (textord_words_width_ile, 0.4,
50
+ "Ile of blob widths for space est");
51
+ extern double_VAR_H (textord_words_maxspace, 4.0, "Multiple of xheight");
52
+ extern double_VAR_H (textord_words_default_maxspace, 3.5,
53
+ "Max believable third space");
54
+ extern double_VAR_H (textord_words_default_minspace, 0.6,
55
+ "Fraction of xheight");
56
+ extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight");
57
+ extern double_VAR_H (textord_words_default_nonspace, 0.2,
58
+ "Fraction of xheight");
59
+ extern double_VAR_H (textord_words_initial_lower, 0.25,
60
+ "Max inital cluster size");
61
+ extern double_VAR_H (textord_words_initial_upper, 0.15,
62
+ "Min initial cluster spacing");
63
+ extern double_VAR_H (textord_words_minlarge, 0.75,
64
+ "Fraction of valid gaps needed");
65
+ extern double_VAR_H (textord_words_pitchsd_threshold, 0.025,
66
+ "Pitch sync threshold");
67
+ extern double_VAR_H (textord_words_def_fixed, 0.01,
68
+ "Threshold for definite fixed");
69
+ extern double_VAR_H (textord_words_def_prop, 0.06,
70
+ "Threshold for definite prop");
71
+ extern INT_VAR_H (textord_words_veto_power, 5,
72
+ "Rows required to outvote a veto");
73
+ extern double_VAR_H (textord_pitch_rowsimilarity, 0.08,
74
+ "Fraction of xheight for sameness");
75
+ extern BOOL_VAR_H (textord_pitch_scalebigwords, FALSE,
76
+ "Scale scores on big words");
77
+ extern double_VAR_H (words_initial_lower, 0.5, "Max inital cluster size");
78
+ extern double_VAR_H (words_initial_upper, 0.15,
79
+ "Min initial cluster spacing");
80
+ extern double_VAR_H (words_default_prop_nonspace, 0.25,
81
+ "Fraction of xheight");
82
+ extern double_VAR_H (words_default_fixed_space, 0.75, "Fraction of xheight");
83
+ extern double_VAR_H (words_default_fixed_limit, 0.6, "Allowed size variance");
84
+ extern double_VAR_H (textord_words_definite_spread, 0.30,
85
+ "Non-fuzzy spacing region");
86
+ extern double_VAR_H (textord_spacesize_ratiofp, 2.8,
87
+ "Min ratio space/nonspace");
88
+ extern double_VAR_H (textord_spacesize_ratioprop, 2.0,
89
+ "Min ratio space/nonspace");
90
+ extern double_VAR_H (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold");
91
+ extern double_VAR_H (textord_max_pitch_iqr, 0.20,
92
+ "Xh fraction noise in pitch");
93
+ extern double_VAR_H (textord_fp_min_width, 0.5, "Min width of decent blobs");
94
+ #endif
@@ -0,0 +1,312 @@
1
+ /**********************************************************************
2
+ * File: underlin.cpp (Formerly undrline.c)
3
+ * Description: Code to chop blobs apart from underlines.
4
+ * Author: Ray Smith
5
+ * Created: Mon Aug 8 11:14:00 BST 1994
6
+ *
7
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #include "mfcpch.h"
21
+ #ifdef __UNIX__
22
+ #include <assert.h>
23
+ #endif
24
+ #include "underlin.h"
25
+
26
+ #define PROJECTION_MARGIN 10 //arbitrary
27
+ #define EXTERN
28
+
29
+ EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
30
+ EXTERN BOOL_VAR (textord_restore_underlines, TRUE,
31
+ "Chop underlines & put back");
32
+
33
+ /**********************************************************************
34
+ * restore_underlined_blobs
35
+ *
36
+ * Find underlined blobs and put them back in the row.
37
+ **********************************************************************/
38
+
39
+ void restore_underlined_blobs( //get chop points
40
+ TO_BLOCK *block //block to do
41
+ ) {
42
+ inT16 chop_coord; //chop boundary
43
+ TBOX blob_box; //of underline
44
+ BLOBNBOX *u_line; //underline bit
45
+ TO_ROW *row; //best row for blob
46
+ ICOORDELT_LIST chop_cells; //blobs to cut out
47
+ //real underlines
48
+ BLOBNBOX_LIST residual_underlines;
49
+ OUTLINE_LIST left_outlines; //in current blob
50
+ OUTLINE_LIST right_outlines; //for next blob
51
+ C_OUTLINE_LIST left_coutlines;
52
+ C_OUTLINE_LIST right_coutlines;
53
+ ICOORDELT_IT cell_it = &chop_cells;
54
+ //under lines
55
+ BLOBNBOX_IT under_it = &block->underlines;
56
+ BLOBNBOX_IT ru_it = &residual_underlines;
57
+
58
+ if (block->get_rows()->empty())
59
+ return; // Don't crash if there are no rows.
60
+ for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
61
+ under_it.forward ()) {
62
+ u_line = under_it.extract ();
63
+ blob_box = u_line->bounding_box ();
64
+ row = most_overlapping_row (block->get_rows (), u_line);
65
+ find_underlined_blobs (u_line, &row->baseline, row->xheight,
66
+ row->xheight * textord_underline_offset,
67
+ &chop_cells);
68
+ cell_it.set_to_list (&chop_cells);
69
+ for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
70
+ cell_it.forward ()) {
71
+ chop_coord = cell_it.data ()->x ();
72
+ if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
73
+ split_to_blob (u_line, chop_coord,
74
+ textord_fp_chop_error + 0.5,
75
+ &left_outlines, &left_coutlines,
76
+ &right_outlines, &right_coutlines);
77
+ if (!left_outlines.empty ())
78
+ ru_it.
79
+ add_after_then_move (new
80
+ BLOBNBOX (new PBLOB (&left_outlines)));
81
+ else if (!left_coutlines.empty ())
82
+ ru_it.
83
+ add_after_then_move (new
84
+ BLOBNBOX (new
85
+ C_BLOB (&left_coutlines)));
86
+ //right edge of lbob
87
+ chop_coord = cell_it.data ()->y ();
88
+ split_to_blob (NULL, chop_coord,
89
+ textord_fp_chop_error + 0.5,
90
+ &left_outlines, &left_coutlines,
91
+ &right_outlines, &right_coutlines);
92
+ if (!left_outlines.empty ())
93
+ row->insert_blob (new BLOBNBOX (new PBLOB (&left_outlines)));
94
+ else if (!left_coutlines.empty ())
95
+ row->
96
+ insert_blob (new BLOBNBOX (new C_BLOB (&left_coutlines)));
97
+ else {
98
+ ASSERT_HOST(FALSE);
99
+ fprintf (stderr,
100
+ "Error:no outlines after chopping from %d to %d from (%d,%d)->(%d,%d)\n",
101
+ cell_it.data ()->x (), cell_it.data ()->y (),
102
+ blob_box.left (), blob_box.bottom (),
103
+ blob_box.right (), blob_box.top ());
104
+ }
105
+ u_line = NULL; //no more blobs to add
106
+ }
107
+ delete cell_it.extract ();
108
+ }
109
+ if (!right_outlines.empty () || !right_coutlines.empty ()) {
110
+ split_to_blob (NULL, blob_box.right (),
111
+ textord_fp_chop_error + 0.5,
112
+ &left_outlines, &left_coutlines,
113
+ &right_outlines, &right_coutlines);
114
+ if (!left_outlines.empty ())
115
+ ru_it.
116
+ add_after_then_move (new BLOBNBOX (new PBLOB (&left_outlines)));
117
+ else if (!left_coutlines.empty ())
118
+ ru_it.
119
+ add_after_then_move (new
120
+ BLOBNBOX (new C_BLOB (&left_coutlines)));
121
+ }
122
+ if (u_line != NULL) {
123
+ if (u_line->blob() != NULL)
124
+ delete u_line->blob();
125
+ if (u_line->cblob() != NULL)
126
+ delete u_line->cblob();
127
+ delete u_line;
128
+ }
129
+ }
130
+ if (!ru_it.empty ()) {
131
+ ru_it.move_to_first ();
132
+ for (ru_it.mark_cycle_pt (); !ru_it.cycled_list (); ru_it.forward ()) {
133
+ under_it.add_after_then_move (ru_it.extract ());
134
+ }
135
+ }
136
+ }
137
+
138
+
139
+ /**********************************************************************
140
+ * most_overlapping_row
141
+ *
142
+ * Return the row which most overlaps the blob.
143
+ **********************************************************************/
144
+
145
+ TO_ROW *most_overlapping_row( //find best row
146
+ TO_ROW_LIST *rows, //list of rows
147
+ BLOBNBOX *blob //blob to place
148
+ ) {
149
+ inT16 x = (blob->bounding_box ().left ()
150
+ + blob->bounding_box ().right ()) / 2;
151
+ TO_ROW_IT row_it = rows; //row iterator
152
+ TO_ROW *row; //current row
153
+ TO_ROW *best_row; //output row
154
+ float overlap; //of blob & row
155
+ float bestover; //best overlap
156
+
157
+ best_row = NULL;
158
+ bestover = (float) -MAX_INT32;
159
+ if (row_it.empty ())
160
+ return NULL;
161
+ row = row_it.data ();
162
+ row_it.mark_cycle_pt ();
163
+ while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
164
+ && !row_it.cycled_list ()) {
165
+ best_row = row;
166
+ bestover =
167
+ blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
168
+ row_it.forward ();
169
+ row = row_it.data ();
170
+ }
171
+ while (row->baseline.y (x) + row->xheight + row->ascrise
172
+ >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
173
+ overlap = row->baseline.y (x) + row->xheight + row->ascrise;
174
+ if (blob->bounding_box ().top () < overlap)
175
+ overlap = blob->bounding_box ().top ();
176
+ if (blob->bounding_box ().bottom () >
177
+ row->baseline.y (x) + row->descdrop)
178
+ overlap -= blob->bounding_box ().bottom ();
179
+ else
180
+ overlap -= row->baseline.y (x) + row->descdrop;
181
+ if (overlap > bestover) {
182
+ bestover = overlap;
183
+ best_row = row;
184
+ }
185
+ row_it.forward ();
186
+ row = row_it.data ();
187
+ }
188
+ if (bestover < 0
189
+ && row->baseline.y (x) + row->xheight + row->ascrise
190
+ - blob->bounding_box ().bottom () > bestover)
191
+ best_row = row;
192
+ return best_row;
193
+ }
194
+
195
+
196
+ /**********************************************************************
197
+ * find_underlined_blobs
198
+ *
199
+ * Find the start and end coords of blobs in the underline.
200
+ **********************************************************************/
201
+
202
+ void find_underlined_blobs( //get chop points
203
+ BLOBNBOX *u_line, //underlined unit
204
+ QSPLINE *baseline, //actual baseline
205
+ float xheight, //height of line
206
+ float baseline_offset, //amount to shrinke it
207
+ ICOORDELT_LIST *chop_cells //places to chop
208
+ ) {
209
+ inT16 x, y; //sides of blob
210
+ ICOORD blob_chop; //sides of blob
211
+ TBOX blob_box = u_line->bounding_box ();
212
+ //cell iterator
213
+ ICOORDELT_IT cell_it = chop_cells;
214
+ STATS upper_proj (blob_box.left (), blob_box.right () + 1);
215
+ STATS middle_proj (blob_box.left (), blob_box.right () + 1);
216
+ STATS lower_proj (blob_box.left (), blob_box.right () + 1);
217
+ C_OUTLINE_IT out_it; //outlines of blob
218
+
219
+ ASSERT_HOST (u_line->cblob () != NULL);
220
+
221
+ out_it.set_to_list (u_line->cblob ()->out_list ());
222
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
223
+ vertical_cunderline_projection (out_it.data (),
224
+ baseline, xheight, baseline_offset,
225
+ &lower_proj, &middle_proj, &upper_proj);
226
+ }
227
+
228
+ for (x = blob_box.left (); x < blob_box.right (); x++) {
229
+ if (middle_proj.pile_count (x) > 0) {
230
+ for (y = x + 1;
231
+ y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
232
+ blob_chop = ICOORD (x, y);
233
+ cell_it.add_after_then_move (new ICOORDELT (blob_chop));
234
+ x = y;
235
+ }
236
+ }
237
+ }
238
+
239
+
240
+ /**********************************************************************
241
+ * vertical_cunderline_projection
242
+ *
243
+ * Compute the vertical projection of a outline from its outlines
244
+ * and add to the given STATS.
245
+ **********************************************************************/
246
+
247
+ void vertical_cunderline_projection( //project outlines
248
+ C_OUTLINE *outline, //outline to project
249
+ QSPLINE *baseline, //actual baseline
250
+ float xheight, //height of line
251
+ float baseline_offset, //amount to shrinke it
252
+ STATS *lower_proj, //below baseline
253
+ STATS *middle_proj, //centre region
254
+ STATS *upper_proj //top region
255
+ ) {
256
+ ICOORD pos; //current point
257
+ ICOORD step; //edge step
258
+ inT16 lower_y, upper_y; //region limits
259
+ inT32 length; //of outline
260
+ inT16 stepindex; //current step
261
+ C_OUTLINE_IT out_it = outline->child ();
262
+
263
+ pos = outline->start_pos ();
264
+ length = outline->pathlength ();
265
+ for (stepindex = 0; stepindex < length; stepindex++) {
266
+ step = outline->step (stepindex);
267
+ if (step.x () > 0) {
268
+ lower_y =
269
+ (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5);
270
+ upper_y =
271
+ (inT16) floor (baseline->y (pos.x ()) + baseline_offset +
272
+ xheight + 0.5);
273
+ if (pos.y () >= lower_y) {
274
+ lower_proj->add (pos.x (), -lower_y);
275
+ if (pos.y () >= upper_y) {
276
+ middle_proj->add (pos.x (), lower_y - upper_y);
277
+ upper_proj->add (pos.x (), upper_y - pos.y ());
278
+ }
279
+ else
280
+ middle_proj->add (pos.x (), lower_y - pos.y ());
281
+ }
282
+ else
283
+ lower_proj->add (pos.x (), -pos.y ());
284
+ }
285
+ else if (step.x () < 0) {
286
+ lower_y =
287
+ (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
288
+ 0.5);
289
+ upper_y =
290
+ (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
291
+ xheight + 0.5);
292
+ if (pos.y () >= lower_y) {
293
+ lower_proj->add (pos.x () - 1, lower_y);
294
+ if (pos.y () >= upper_y) {
295
+ middle_proj->add (pos.x () - 1, upper_y - lower_y);
296
+ upper_proj->add (pos.x () - 1, pos.y () - upper_y);
297
+ }
298
+ else
299
+ middle_proj->add (pos.x () - 1, pos.y () - lower_y);
300
+ }
301
+ else
302
+ lower_proj->add (pos.x () - 1, pos.y ());
303
+ }
304
+ pos += step;
305
+ }
306
+
307
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
308
+ vertical_cunderline_projection (out_it.data (),
309
+ baseline, xheight, baseline_offset,
310
+ lower_proj, middle_proj, upper_proj);
311
+ }
312
+ }