tesseract_bin 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,1761 @@
1
+ /**********************************************************************
2
+ * File: oldbasel.cpp (Formerly oldbl.c)
3
+ * Description: A re-implementation of the old baseline algorithm.
4
+ * Author: Ray Smith
5
+ * Created: Wed Oct 6 09:41:48 BST 1993
6
+ *
7
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #include "mfcpch.h"
21
+ #include "statistc.h"
22
+ #include "quadlsq.h"
23
+ #include "lmedsq.h"
24
+ #include "makerow.h"
25
+ #include "drawtord.h"
26
+ #include "oldbasel.h"
27
+ #include "tprintf.h"
28
+
29
+ #define EXTERN
30
+
31
+ EXTERN BOOL_VAR (textord_really_old_xheight, FALSE,
32
+ "Use original wiseowl xheight");
33
+ EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation");
34
+ EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation");
35
+ EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism");
36
+ EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines");
37
+ EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions");
38
+ EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights");
39
+ EXTERN BOOL_VAR (oldbl_xhfix, FALSE,
40
+ "Fix bug in modes threshold for xheights");
41
+ EXTERN BOOL_VAR(textord_ocropus_mode, FALSE, "Make baselines for ocropus");
42
+ EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");
43
+ EXTERN INT_VAR (oldbl_holed_losscount, 10,
44
+ "Max lost before fallback line used");
45
+ EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");
46
+ EXTERN double_VAR (textord_oldbl_jumplimit, 0.15,
47
+ "X fraction for new partition");
48
+
49
+ #define TURNLIMIT 1 /*min size for turning point */
50
+ #define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */
51
+ #define DESCENDER_FRACTION 0.5 /*descender/x-height */
52
+ #define MIN_ASC_FRACTION 0.20 /*min size of ascenders */
53
+ #define MIN_DESC_FRACTION 0.25 /*min size of descenders */
54
+ #define MINASCRISE 2.0 /*min ascender/desc step */
55
+ #define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */
56
+ #define MAXHEIGHT 300 /*max blob height */
57
+ #define MAXOVERLAP 0.1 /*max 10% missed overlap */
58
+ #define MAXBADRUN 2 /*max non best for failed */
59
+ #define HEIGHTBUCKETS 200 /* Num of buckets */
60
+ #define DELTAHEIGHT 5.0 /* Small amount of diff */
61
+ #define GOODHEIGHT 5
62
+ #define MAXLOOPS 10
63
+ #define MODENUM 10
64
+ #define MAXPARTS 6
65
+ #define SPLINESIZE 23
66
+
67
+ #define ABS(x) ((x)<0 ? (-(x)) : (x))
68
+
69
+ /**********************************************************************
70
+ * make_old_baselines
71
+ *
72
+ * Top level function to make baselines the old way.
73
+ **********************************************************************/
74
+
75
+ void make_old_baselines( //make splines
76
+ TO_BLOCK *block, //block to do
77
+ BOOL8 testing_on //correct orientation
78
+ ) {
79
+ QSPLINE *prev_baseline; //baseline of previous row
80
+ TO_ROW *row; //current row
81
+ TO_ROW_IT row_it = block->get_rows ();
82
+ BLOBNBOX_IT blob_it;
83
+
84
+ prev_baseline = NULL; //nothing yet
85
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
86
+ row = row_it.data ();
87
+ find_textlines (block, row, 2, NULL);
88
+ if (row->xheight <= 0 && prev_baseline != NULL)
89
+ find_textlines (block, row, 2, prev_baseline);
90
+ if (row->xheight > 0)
91
+ //was a good one
92
+ prev_baseline = &row->baseline;
93
+ else {
94
+ prev_baseline = NULL;
95
+ blob_it.set_to_list (row->blob_list ());
96
+ if (textord_debug_baselines)
97
+ tprintf ("Row baseline generation failed on row at (%d,%d)\n",
98
+ blob_it.data ()->bounding_box ().left (),
99
+ blob_it.data ()->bounding_box ().bottom ());
100
+ }
101
+ }
102
+ correlate_lines(block);
103
+ }
104
+
105
+
106
+ /**********************************************************************
107
+ * correlate_lines
108
+ *
109
+ * Correlate the x-heights and ascender heights of a block to fill-in
110
+ * the ascender height and descender height for rows without one.
111
+ * Also fix baselines of rows without a decent fit.
112
+ **********************************************************************/
113
+
114
+ void correlate_lines( //cleanup lines
115
+ TO_BLOCK *block //block to do
116
+ ) {
117
+ TO_ROW **rows; //array of ptrs
118
+ int rowcount; /*no of rows to do */
119
+ register int rowindex; /*no of row */
120
+ //iterator
121
+ TO_ROW_IT row_it = block->get_rows ();
122
+
123
+ rowcount = row_it.length ();
124
+ if (rowcount == 0) {
125
+ //default value
126
+ block->xheight = block->line_size;
127
+ return; /*none to do */
128
+ }
129
+ rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
130
+ rowindex = 0;
131
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
132
+ //make array
133
+ rows[rowindex++] = row_it.data ();
134
+
135
+ /*try to fix bad lines */
136
+ correlate_neighbours(block, rows, rowcount);
137
+
138
+ block->xheight = (float) correlate_with_stats (rows, rowcount);
139
+ /*use stats */
140
+ if (block->xheight <= 0)
141
+ //desperate
142
+ block->xheight = block->line_size * textord_merge_x;
143
+ if (block->xheight < textord_min_xheight)
144
+ block->xheight = (float) textord_min_xheight;
145
+
146
+ free_mem(rows);
147
+ }
148
+
149
+
150
+ /**********************************************************************
151
+ * correlate_neighbours
152
+ *
153
+ * Try to fix rows that had a bad spline fit by using neighbours.
154
+ **********************************************************************/
155
+
156
+ void correlate_neighbours( //fix bad rows
157
+ TO_BLOCK *block, /*block rows are in */
158
+ TO_ROW **rows, /*rows of block */
159
+ int rowcount /*no of rows to do */
160
+ ) {
161
+ TO_ROW *row; /*current row */
162
+ register int rowindex; /*no of row */
163
+ register int otherrow; /*second row */
164
+ int upperrow; /*row above to use */
165
+ int lowerrow; /*row below to use */
166
+ float biggest;
167
+
168
+ for (rowindex = 0; rowindex < rowcount; rowindex++) {
169
+ row = rows[rowindex]; /*current row */
170
+ if (row->xheight < 0) {
171
+ /*quadratic failed */
172
+ for (otherrow = rowindex - 2;
173
+ otherrow >= 0
174
+ && (rows[otherrow]->xheight < 0.0
175
+ || !row->baseline.overlap (&rows[otherrow]->baseline,
176
+ MAXOVERLAP)); otherrow--);
177
+ upperrow = otherrow; /*decent row above */
178
+ for (otherrow = rowindex + 1;
179
+ otherrow < rowcount
180
+ && (rows[otherrow]->xheight < 0.0
181
+ || !row->baseline.overlap (&rows[otherrow]->baseline,
182
+ MAXOVERLAP)); otherrow++);
183
+ lowerrow = otherrow; /*decent row below */
184
+ if (upperrow >= 0)
185
+ find_textlines (block, row, 2, &rows[upperrow]->baseline);
186
+ if (row->xheight < 0 && lowerrow < rowcount)
187
+ find_textlines (block, row, 2, &rows[lowerrow]->baseline);
188
+ if (row->xheight < 0) {
189
+ if (upperrow >= 0)
190
+ find_textlines (block, row, 1, &rows[upperrow]->baseline);
191
+ else if (lowerrow < rowcount)
192
+ find_textlines (block, row, 1, &rows[lowerrow]->baseline);
193
+ }
194
+ }
195
+ }
196
+
197
+ for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
198
+ row = rows[rowindex]; /*current row */
199
+ if (row->xheight < 0) /*linear failed */
200
+ /*make do */
201
+ row->xheight = -row->xheight;
202
+ biggest = MAX (biggest, row->xheight);
203
+ }
204
+ }
205
+
206
+
207
+ /**********************************************************************
208
+ * correlate_with_stats
209
+ *
210
+ * correlate the x-heights and ascender heights of a block to fill-in
211
+ * the ascender height and descender height for rows without one.
212
+ **********************************************************************/
213
+
214
+ int correlate_with_stats( //fix xheights
215
+ TO_ROW **rows, /*rows of block */
216
+ int rowcount /*no of rows to do */
217
+ ) {
218
+ TO_ROW *row; /*current row */
219
+ register int rowindex; /*no of row */
220
+ float lineheight; /*mean x-height */
221
+ float ascheight; /*average ascenders */
222
+ float minascheight; /*min allowed ascheight */
223
+ int xcount; /*no of samples for xheight */
224
+ float fullheight; /*mean top height */
225
+ int fullcount; /*no of samples */
226
+ float descheight; /*mean descender drop */
227
+ float mindescheight; /*min allowed descheight */
228
+ int desccount; /*no of samples */
229
+ float xshift; /*shift in xheight */
230
+
231
+ /*no samples */
232
+ xcount = fullcount = desccount = 0;
233
+ lineheight = ascheight = fullheight = descheight = 0.0;
234
+ for (rowindex = 0; rowindex < rowcount; rowindex++) {
235
+ row = rows[rowindex]; /*current row */
236
+ if (row->ascrise > 0.0) { /*got ascenders? */
237
+ lineheight += row->xheight;/*average x-heights */
238
+ ascheight += row->ascrise; /*average ascenders */
239
+ xcount++;
240
+ }
241
+ else {
242
+ fullheight += row->xheight;/*assume full height */
243
+ fullcount++;
244
+ }
245
+ if (row->descdrop < 0.0) { /*got descenders? */
246
+ /*average descenders */
247
+ descheight += row->descdrop;
248
+ desccount++;
249
+ }
250
+ }
251
+
252
+ if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
253
+ lineheight /= xcount; /*average x-height */
254
+ /*average caps height */
255
+ fullheight = lineheight + ascheight / xcount;
256
+ /*must be decent size */
257
+ if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))
258
+ fullheight = lineheight * (1 + MIN_ASC_FRACTION);
259
+ }
260
+ else {
261
+ fullheight /= fullcount; /*average max height */
262
+ /*guess x-height */
263
+ lineheight = fullheight * X_HEIGHT_FRACTION;
264
+ }
265
+ if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
266
+ descheight /= desccount; /*average descenders */
267
+ else
268
+ /*guess descenders */
269
+ descheight = -lineheight * DESCENDER_FRACTION;
270
+
271
+ minascheight = lineheight * MIN_ASC_FRACTION;
272
+ mindescheight = -lineheight * MIN_DESC_FRACTION;
273
+ for (rowindex = 0; rowindex < rowcount; rowindex++) {
274
+ row = rows[rowindex]; /*do each row */
275
+ row->all_caps = FALSE;
276
+ if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {
277
+ /*no ascenders */
278
+ if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
279
+ && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {
280
+ row->ascrise = fullheight - lineheight;
281
+ /*shift in x */
282
+ xshift = lineheight - row->xheight;
283
+ /*set to average */
284
+ row->xheight = lineheight;
285
+
286
+ }
287
+ else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)
288
+ && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {
289
+ row->ascrise = row->xheight - lineheight;
290
+ xshift = -row->ascrise; /*shift in x */
291
+ /*set to average */
292
+ row->xheight = lineheight;
293
+ row->all_caps = TRUE;
294
+ }
295
+ else {
296
+ row->ascrise = (fullheight - lineheight) * row->xheight
297
+ / fullheight;
298
+ xshift = -row->ascrise; /*shift in x */
299
+ /*scale it */
300
+ row->xheight -= row->ascrise;
301
+ row->all_caps = TRUE;
302
+ }
303
+ if (row->ascrise < minascheight)
304
+ row->ascrise =
305
+ row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);
306
+ }
307
+ if (row->descdrop > mindescheight) {
308
+ if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
309
+ && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))
310
+ /*set to average */
311
+ row->descdrop = descheight;
312
+ else
313
+ row->descdrop = -row->xheight * DESCENDER_FRACTION;
314
+ }
315
+ }
316
+ return (int) lineheight; //block xheight
317
+ }
318
+
319
+
320
+ /**********************************************************************
321
+ * find_textlines
322
+ *
323
+ * Compute the baseline for the given row.
324
+ **********************************************************************/
325
+
326
+ void find_textlines( //get baseline
327
+ TO_BLOCK *block, //block row is in
328
+ TO_ROW *row, //row to do
329
+ int degree, //required approximation
330
+ QSPLINE *spline //starting spline
331
+ ) {
332
+ int partcount; /*no of partitions of */
333
+ BOOL8 holed_line; //lost too many blobs
334
+ int bestpart; /*biggest partition */
335
+ char *partids; /*partition no of each blob */
336
+ int partsizes[MAXPARTS]; /*no in each partition */
337
+ int lineheight; /*guessed x-height */
338
+ float jumplimit; /*allowed delta change */
339
+ int *xcoords; /*useful sample points */
340
+ int *ycoords; /*useful sample points */
341
+ TBOX *blobcoords; /*edges of blob rectangles */
342
+ int blobcount; /*no of blobs on line */
343
+ float *ydiffs; /*diffs from 1st approx */
344
+ int pointcount; /*no of coords */
345
+ int xstarts[SPLINESIZE + 1]; //segment boundaries
346
+ int segments; //no of segments
347
+
348
+ //no of blobs in row
349
+ blobcount = row->blob_list ()->length ();
350
+ partids = (char *) alloc_mem (blobcount * sizeof (char));
351
+ xcoords = (int *) alloc_mem (blobcount * sizeof (int));
352
+ ycoords = (int *) alloc_mem (blobcount * sizeof (int));
353
+ blobcoords = (TBOX *) alloc_mem (blobcount * sizeof (TBOX));
354
+ ydiffs = (float *) alloc_mem (blobcount * sizeof (float));
355
+
356
+ lineheight = get_blob_coords (row, (int) block->line_size, blobcoords,
357
+ holed_line, blobcount);
358
+ /*limit for line change */
359
+ jumplimit = lineheight * textord_oldbl_jumplimit;
360
+ if (jumplimit < MINASCRISE)
361
+ jumplimit = MINASCRISE;
362
+
363
+ if (textord_oldbl_debug) {
364
+ tprintf
365
+ ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
366
+ block->line_size, lineheight, jumplimit);
367
+ }
368
+ if (holed_line)
369
+ make_holed_baseline (blobcoords, blobcount, spline, &row->baseline,
370
+ row->line_m ());
371
+ else
372
+ make_first_baseline (blobcoords, blobcount,
373
+ xcoords, ycoords, spline, &row->baseline, jumplimit);
374
+ #ifndef GRAPHICS_DISABLED
375
+ if (textord_show_final_rows)
376
+ row->baseline.plot (to_win, ScrollView::GOLDENROD);
377
+ #endif
378
+ if (blobcount > 1) {
379
+ bestpart = partition_line (blobcoords, blobcount,
380
+ &partcount, partids, partsizes,
381
+ &row->baseline, jumplimit, ydiffs);
382
+ pointcount = partition_coords (blobcoords, blobcount,
383
+ partids, bestpart, xcoords, ycoords);
384
+ segments = segment_spline (blobcoords, blobcount,
385
+ xcoords, ycoords,
386
+ degree, pointcount, xstarts);
387
+ if (!holed_line) {
388
+ do {
389
+ row->baseline = QSPLINE (xstarts, segments,
390
+ xcoords, ycoords, pointcount, degree);
391
+ }
392
+ while (textord_oldbl_split_splines
393
+ && split_stepped_spline (&row->baseline, jumplimit / 2,
394
+ xcoords, xstarts, segments));
395
+ }
396
+ find_lesser_parts(row,
397
+ blobcoords,
398
+ blobcount,
399
+ partids,
400
+ partsizes,
401
+ partcount,
402
+ bestpart);
403
+
404
+ }
405
+ else {
406
+ row->xheight = -1.0f; /*failed */
407
+ row->descdrop = 0.0f;
408
+ row->ascrise = 0.0f;
409
+ }
410
+ row->baseline.extrapolate (row->line_m (),
411
+ block->block->bounding_box ().left (),
412
+ block->block->bounding_box ().right ());
413
+ if (textord_really_old_xheight)
414
+ old_first_xheight (row, blobcoords, lineheight,
415
+ blobcount, &row->baseline, jumplimit);
416
+ else
417
+ make_first_xheight (row, blobcoords, lineheight, (int) block->line_size,
418
+ blobcount, &row->baseline, jumplimit);
419
+ free_mem(partids);
420
+ free_mem(xcoords);
421
+ free_mem(ycoords);
422
+ free_mem(blobcoords);
423
+ free_mem(ydiffs);
424
+ }
425
+
426
+
427
+ /**********************************************************************
428
+ * get_blob_coords
429
+ *
430
+ * Fill the blobcoords array with the coordinates of the blobs
431
+ * in the row. The return value is the first guess atthe line height.
432
+ **********************************************************************/
433
+
434
+ int get_blob_coords( //get boxes
435
+ TO_ROW *row, //row to use
436
+ inT32 lineheight, //block level
437
+ TBOX *blobcoords, //ouput boxes
438
+ BOOL8 &holed_line, //lost a lot of blobs
439
+ int &outcount //no of real blobs
440
+ ) {
441
+ //blobs
442
+ BLOBNBOX_IT blob_it = row->blob_list ();
443
+ register int blobindex; /*no along text line */
444
+ int losscount; //lost blobs
445
+ int maxlosscount; //greatest lost blobs
446
+ /*height stat collection */
447
+ STATS heightstat (0, MAXHEIGHT);
448
+
449
+ if (blob_it.empty ())
450
+ return 0; //none
451
+ maxlosscount = 0;
452
+ losscount = 0;
453
+ blob_it.mark_cycle_pt ();
454
+ blobindex = 0;
455
+ do {
456
+ blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
457
+ if (blobcoords[blobindex].height () > lineheight * 0.25)
458
+ heightstat.add (blobcoords[blobindex].height (), 1);
459
+ if (blobindex == 0
460
+ || blobcoords[blobindex].height () > lineheight * 0.25
461
+ || blob_it.cycled_list ()) {
462
+ blobindex++; /*no of merged blobs */
463
+ losscount = 0;
464
+ }
465
+ else {
466
+ if (blobcoords[blobindex].height ()
467
+ < blobcoords[blobindex].width () * oldbl_dot_error_size
468
+ && blobcoords[blobindex].width ()
469
+ < blobcoords[blobindex].height () * oldbl_dot_error_size) {
470
+ //counts as dot
471
+ blobindex++;
472
+ losscount = 0;
473
+ }
474
+ else {
475
+ losscount++; //lost it
476
+ if (losscount > maxlosscount)
477
+ //remember max
478
+ maxlosscount = losscount;
479
+ }
480
+ }
481
+ }
482
+ while (!blob_it.cycled_list ());
483
+
484
+ holed_line = maxlosscount > oldbl_holed_losscount;
485
+ outcount = blobindex; /*total blobs */
486
+
487
+ if (heightstat.get_total () > 1)
488
+ /*guess x-height */
489
+ return (int) heightstat.ile (0.25);
490
+ else
491
+ return blobcoords[0].height ();
492
+ }
493
+
494
+
495
+ /**********************************************************************
496
+ * make_first_baseline
497
+ *
498
+ * Make the first estimate at a baseline, either by shifting
499
+ * a supplied previous spline, or by doing a piecewise linear
500
+ * approximation using all the blobs.
501
+ **********************************************************************/
502
+
503
+ void
504
+ make_first_baseline ( //initial approximation
505
+ TBOX blobcoords[], /*blob bounding boxes */
506
+ int blobcount, /*no of blobcoords */
507
+ int xcoords[], /*coords for spline */
508
+ int ycoords[], /*approximator */
509
+ QSPLINE * spline, /*initial spline */
510
+ QSPLINE * baseline, /*output spline */
511
+ float jumplimit /*guess half descenders */
512
+ ) {
513
+ int leftedge; /*left edge of line */
514
+ int rightedge; /*right edge of line */
515
+ int blobindex; /*current blob */
516
+ int segment; /*current segment */
517
+ float prevy, thisy, nexty; /*3 y coords */
518
+ float y1, y2, y3; /*3 smooth blobs */
519
+ float maxmax, minmin; /*absolute limits */
520
+ int x2 = 0; /*right edge of old y3 */
521
+ int ycount; /*no of ycoords in use */
522
+ float yturns[SPLINESIZE]; /*y coords of turn pts */
523
+ int xturns[SPLINESIZE]; /*xcoords of turn pts */
524
+ int xstarts[SPLINESIZE + 1];
525
+ int segments; //no of segments
526
+ ICOORD shift; //shift of spline
527
+
528
+ prevy = 0;
529
+ /*left edge of row */
530
+ leftedge = blobcoords[0].left ();
531
+ /*right edge of line */
532
+ rightedge = blobcoords[blobcount - 1].right ();
533
+ if (spline == NULL /*no given spline */
534
+ || spline->segments < 3 /*or trivial */
535
+ /*or too non-overlap */
536
+ || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
537
+ || spline->xcoords[spline->segments - 1] < rightedge
538
+ - MAXOVERLAP * (rightedge - leftedge)) {
539
+ if (textord_oldbl_paradef)
540
+ return; //use default
541
+ xstarts[0] = blobcoords[0].left () - 1;
542
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
543
+ xcoords[blobindex] = (blobcoords[blobindex].left ()
544
+ + blobcoords[blobindex].right ()) / 2;
545
+ ycoords[blobindex] = blobcoords[blobindex].bottom ();
546
+ }
547
+ xstarts[1] = blobcoords[blobcount - 1].right () + 1;
548
+ segments = 1; /*no of segments */
549
+
550
+ /*linear */
551
+ *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
552
+
553
+ if (blobcount >= 3) {
554
+ y1 = y2 = y3 = 0.0f;
555
+ ycount = 0;
556
+ segment = 0; /*no of segments */
557
+ maxmax = minmin = 0.0f;
558
+ thisy = ycoords[0] - baseline->y (xcoords[0]);
559
+ nexty = ycoords[1] - baseline->y (xcoords[1]);
560
+ for (blobindex = 2; blobindex < blobcount; blobindex++) {
561
+ prevy = thisy; /*shift ycoords */
562
+ thisy = nexty;
563
+ nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
564
+ /*middle of smooth y */
565
+ if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) {
566
+ y1 = y2; /*shift window */
567
+ y2 = y3;
568
+ y3 = thisy; /*middle point */
569
+ ycount++;
570
+ /*local max */
571
+ if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
572
+ /*local min */
573
+ || (y1 > y2 && y2 <= y3))) {
574
+ if (segment < SPLINESIZE - 2) {
575
+ /*turning pt */
576
+ xturns[segment] = x2;
577
+ yturns[segment] = y2;
578
+ segment++; /*no of spline segs */
579
+ }
580
+ }
581
+ if (ycount == 1) {
582
+ maxmax = minmin = y3;/*initialise limits */
583
+ }
584
+ else {
585
+ if (y3 > maxmax)
586
+ maxmax = y3; /*biggest max */
587
+ if (y3 < minmin)
588
+ minmin = y3; /*smallest min */
589
+ }
590
+ /*possible turning pt */
591
+ x2 = blobcoords[blobindex - 1].right ();
592
+ }
593
+ }
594
+
595
+ jumplimit *= 1.2;
596
+ /*must be wavy */
597
+ if (maxmax - minmin > jumplimit) {
598
+ ycount = segment; /*no of segments */
599
+ for (blobindex = 0, segment = 1; blobindex < ycount;
600
+ blobindex++) {
601
+ if (yturns[blobindex] > minmin + jumplimit
602
+ || yturns[blobindex] < maxmax - jumplimit) {
603
+ /*significant peak */
604
+ if (segment == 1
605
+ || yturns[blobindex] > prevy + jumplimit
606
+ || yturns[blobindex] < prevy - jumplimit) {
607
+ /*different to previous */
608
+ xstarts[segment] = xturns[blobindex];
609
+ segment++;
610
+ prevy = yturns[blobindex];
611
+ }
612
+ /*bigger max */
613
+ else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
614
+ /*smaller min */
615
+ || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
616
+ xstarts[segment - 1] = xturns[blobindex];
617
+ /*improved previous */
618
+ prevy = yturns[blobindex];
619
+ }
620
+ }
621
+ }
622
+ xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
623
+ segments = segment; /*no of segments */
624
+ /*linear */
625
+ *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
626
+ }
627
+ }
628
+ }
629
+ else {
630
+ *baseline = *spline; /*copy it */
631
+ shift = ICOORD (0, (inT16) (blobcoords[0].bottom ()
632
+ - spline->y (blobcoords[0].right ())));
633
+ baseline->move (shift);
634
+ }
635
+ }
636
+
637
+
638
+ /**********************************************************************
639
+ * make_holed_baseline
640
+ *
641
+ * Make the first estimate at a baseline, either by shifting
642
+ * a supplied previous spline, or by doing a piecewise linear
643
+ * approximation using all the blobs.
644
+ **********************************************************************/
645
+
646
+ void
647
+ make_holed_baseline ( //initial approximation
648
+ TBOX blobcoords[], /*blob bounding boxes */
649
+ int blobcount, /*no of blobcoords */
650
+ QSPLINE * spline, /*initial spline */
651
+ QSPLINE * baseline, /*output spline */
652
+ float gradient //of line
653
+ ) {
654
+ int leftedge; /*left edge of line */
655
+ int rightedge; /*right edge of line */
656
+ int blobindex; /*current blob */
657
+ float x; //centre of row
658
+ ICOORD shift; //shift of spline
659
+
660
+ LMS lms(blobcount); //straight baseline
661
+ inT32 xstarts[2]; //straight line
662
+ double coeffs[3];
663
+ float c; //line parameter
664
+
665
+ /*left edge of row */
666
+ leftedge = blobcoords[0].left ();
667
+ /*right edge of line */
668
+ rightedge = blobcoords[blobcount - 1].right ();
669
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
670
+ lms.add (FCOORD ((blobcoords[blobindex].left () +
671
+ blobcoords[blobindex].right ()) / 2.0,
672
+ blobcoords[blobindex].bottom ()));
673
+ }
674
+ lms.constrained_fit (gradient, c);
675
+ xstarts[0] = leftedge;
676
+ xstarts[1] = rightedge;
677
+ coeffs[0] = 0;
678
+ coeffs[1] = gradient;
679
+ coeffs[2] = c;
680
+ *baseline = QSPLINE (1, xstarts, coeffs);
681
+ if (spline != NULL /*no given spline */
682
+ && spline->segments >= 3 /*or trivial */
683
+ /*or too non-overlap */
684
+ && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
685
+ && spline->xcoords[spline->segments - 1] >= rightedge
686
+ - MAXOVERLAP * (rightedge - leftedge)) {
687
+ *baseline = *spline; /*copy it */
688
+ x = (leftedge + rightedge) / 2.0;
689
+ shift = ICOORD (0, (inT16) (gradient * x + c - spline->y (x)));
690
+ baseline->move (shift);
691
+ }
692
+ }
693
+
694
+
695
+ /**********************************************************************
696
+ * partition_line
697
+ *
698
+ * Partition a row of blobs into different groups of continuous
699
+ * y position. jumplimit specifies the max allowable limit on a jump
700
+ * before a new partition is started.
701
+ * The return value is the biggest partition
702
+ **********************************************************************/
703
+
704
+ int
705
+ partition_line ( //partition blobs
706
+ TBOX blobcoords[], //bounding boxes
707
+ int blobcount, /*no of blobs on row */
708
+ int *numparts, /*number of partitions */
709
+ char partids[], /*partition no of each blob */
710
+ int partsizes[], /*no in each partition */
711
+ QSPLINE * spline, /*curve to fit to */
712
+ float jumplimit, /*allowed delta change */
713
+ float ydiffs[] /*diff from spline */
714
+ ) {
715
+ register int blobindex; /*no along text line */
716
+ int bestpart; /*best new partition */
717
+ int biggestpart; /*part with most members */
718
+ float diff; /*difference from line */
719
+ int startx; /*index of start blob */
720
+ float partdiffs[MAXPARTS]; /*step between parts */
721
+
722
+ for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
723
+ partsizes[bestpart] = 0; /*zero them all */
724
+
725
+ startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
726
+ *numparts = 1; /*1 partition */
727
+ bestpart = -1; /*first point */
728
+ for (blobindex = startx; blobindex < blobcount; blobindex++) {
729
+ /*do each blob in row */
730
+ diff = ydiffs[blobindex]; /*diff from line */
731
+ if (textord_oldbl_debug) {
732
+ tprintf ("%d(%d,%d), ", blobindex,
733
+ blobcoords[blobindex].left (),
734
+ blobcoords[blobindex].bottom ());
735
+ }
736
+ bestpart =
737
+ choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
738
+ /*record partition */
739
+ partids[blobindex] = bestpart;
740
+ partsizes[bestpart]++; /*another in it */
741
+ }
742
+
743
+ bestpart = -1; /*first point */
744
+ partsizes[0]--; /*doing 1st pt again */
745
+ /*do each blob in row */
746
+ for (blobindex = startx; blobindex >= 0; blobindex--) {
747
+ diff = ydiffs[blobindex]; /*diff from line */
748
+ if (textord_oldbl_debug) {
749
+ tprintf ("%d(%d,%d), ", blobindex,
750
+ blobcoords[blobindex].left (),
751
+ blobcoords[blobindex].bottom ());
752
+ }
753
+ bestpart =
754
+ choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
755
+ /*record partition */
756
+ partids[blobindex] = bestpart;
757
+ partsizes[bestpart]++; /*another in it */
758
+ }
759
+
760
+ for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
761
+ if (partsizes[bestpart] >= partsizes[biggestpart])
762
+ biggestpart = bestpart; /*new biggest */
763
+ if (textord_oldbl_merge_parts)
764
+ merge_oldbl_parts(blobcoords,
765
+ blobcount,
766
+ partids,
767
+ partsizes,
768
+ biggestpart,
769
+ jumplimit);
770
+ return biggestpart; /*biggest partition */
771
+ }
772
+
773
+
774
+ /**********************************************************************
775
+ * merge_oldbl_parts
776
+ *
777
+ * For any adjacent group of blobs in a different part, put them in the
778
+ * main part if they fit closely to neighbours in the main part.
779
+ **********************************************************************/
780
+
781
+ void
782
+ merge_oldbl_parts ( //partition blobs
783
+ TBOX blobcoords[], //bounding boxes
784
+ int blobcount, /*no of blobs on row */
785
+ char partids[], /*partition no of each blob */
786
+ int partsizes[], /*no in each partition */
787
+ int biggestpart, //major partition
788
+ float jumplimit /*allowed delta change */
789
+ ) {
790
+ BOOL8 found_one; //found a bestpart blob
791
+ BOOL8 close_one; //found was close enough
792
+ register int blobindex; /*no along text line */
793
+ int prevpart; //previous iteration
794
+ int runlength; //no in this part
795
+ float diff; /*difference from line */
796
+ int startx; /*index of start blob */
797
+ int test_blob; //another index
798
+ FCOORD coord; //blob coordinate
799
+ float m, c; //fitted line
800
+ QLSQ stats; //line stuff
801
+
802
+ prevpart = biggestpart;
803
+ runlength = 0;
804
+ startx = 0;
805
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
806
+ if (partids[blobindex] != prevpart) {
807
+ // tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
808
+ // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
809
+ // prevpart,partids[blobindex],runlength);
810
+ if (prevpart != biggestpart && runlength > MAXBADRUN) {
811
+ stats.clear ();
812
+ for (test_blob = startx; test_blob < blobindex; test_blob++) {
813
+ coord = FCOORD ((blobcoords[test_blob].left ()
814
+ + blobcoords[test_blob].right ()) / 2.0,
815
+ blobcoords[test_blob].bottom ());
816
+ stats.add (coord.x (), coord.y ());
817
+ }
818
+ stats.fit (1);
819
+ m = stats.get_b ();
820
+ c = stats.get_c ();
821
+ if (textord_oldbl_debug)
822
+ tprintf ("Fitted line y=%g x + %g\n", m, c);
823
+ found_one = FALSE;
824
+ close_one = FALSE;
825
+ for (test_blob = 1; !found_one
826
+ && (startx - test_blob >= 0
827
+ || blobindex + test_blob <= blobcount); test_blob++) {
828
+ if (startx - test_blob >= 0
829
+ && partids[startx - test_blob] == biggestpart) {
830
+ found_one = TRUE;
831
+ coord = FCOORD ((blobcoords[startx - test_blob].left ()
832
+ + blobcoords[startx -
833
+ test_blob].right ()) /
834
+ 2.0,
835
+ blobcoords[startx -
836
+ test_blob].bottom ());
837
+ diff = m * coord.x () + c - coord.y ();
838
+ if (textord_oldbl_debug)
839
+ tprintf
840
+ ("Diff of common blob to suspect part=%g at (%g,%g)\n",
841
+ diff, coord.x (), coord.y ());
842
+ if (diff < jumplimit && -diff < jumplimit)
843
+ close_one = TRUE;
844
+ }
845
+ if (blobindex + test_blob <= blobcount
846
+ && partids[blobindex + test_blob - 1] == biggestpart) {
847
+ found_one = TRUE;
848
+ coord =
849
+ FCOORD ((blobcoords[blobindex + test_blob - 1].
850
+ left () + blobcoords[blobindex + test_blob -
851
+ 1].right ()) / 2.0,
852
+ blobcoords[blobindex + test_blob -
853
+ 1].bottom ());
854
+ diff = m * coord.x () + c - coord.y ();
855
+ if (textord_oldbl_debug)
856
+ tprintf
857
+ ("Diff of common blob to suspect part=%g at (%g,%g)\n",
858
+ diff, coord.x (), coord.y ());
859
+ if (diff < jumplimit && -diff < jumplimit)
860
+ close_one = TRUE;
861
+ }
862
+ }
863
+ if (close_one) {
864
+ if (textord_oldbl_debug)
865
+ tprintf
866
+ ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
867
+ runlength, biggestpart, prevpart,
868
+ blobcoords[startx].left (),
869
+ blobcoords[startx].bottom ());
870
+ //switch sides
871
+ partsizes[prevpart] -= runlength;
872
+ for (test_blob = startx; test_blob < blobindex; test_blob++)
873
+ partids[test_blob] = biggestpart;
874
+ }
875
+ }
876
+ prevpart = partids[blobindex];
877
+ runlength = 1;
878
+ startx = blobindex;
879
+ }
880
+ else
881
+ runlength++;
882
+ }
883
+ }
884
+
885
+
886
+ /**********************************************************************
887
+ * get_ydiffs
888
+ *
889
+ * Get the differences between the blobs and the spline,
890
+ * putting them in ydiffs. The return value is the index
891
+ * of the blob in the middle of the "best behaved" region
892
+ **********************************************************************/
893
+
894
+ int
895
+ get_ydiffs ( //evaluate differences
896
+ TBOX blobcoords[], //bounding boxes
897
+ int blobcount, /*no of blobs */
898
+ QSPLINE * spline, /*approximating spline */
899
+ float ydiffs[] /*output */
900
+ ) {
901
+ register int blobindex; /*current blob */
902
+ int xcentre; /*xcoord */
903
+ int lastx; /*last xcentre */
904
+ float diffsum; /*sum of diffs */
905
+ float diff; /*current difference */
906
+ float drift; /*sum of spline steps */
907
+ float bestsum; /*smallest diffsum */
908
+ int bestindex; /*index of bestsum */
909
+
910
+ diffsum = 0.0f;
911
+ bestindex = 0;
912
+ bestsum = (float) MAX_INT32;
913
+ drift = 0.0f;
914
+ lastx = blobcoords[0].left ();
915
+ /*do each blob in row */
916
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
917
+ /*centre of blob */
918
+ xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
919
+ //step functions in spline
920
+ drift += spline->step (lastx, xcentre);
921
+ lastx = xcentre;
922
+ diff = blobcoords[blobindex].bottom ();
923
+ diff -= spline->y (xcentre);
924
+ diff += drift;
925
+ ydiffs[blobindex] = diff; /*store difference */
926
+ if (blobindex > 2)
927
+ /*remove old one */
928
+ diffsum -= ABS (ydiffs[blobindex - 3]);
929
+ diffsum += ABS (diff); /*add new one */
930
+ if (blobindex >= 2 && diffsum < bestsum) {
931
+ bestsum = diffsum; /*find min sum */
932
+ bestindex = blobindex - 1; /*middle of set */
933
+ }
934
+ }
935
+ return bestindex;
936
+ }
937
+
938
+
939
+ /**********************************************************************
940
+ * choose_partition
941
+ *
942
+ * Choose a partition for the point and return the index.
943
+ **********************************************************************/
944
+
945
+ int
946
+ choose_partition ( //select partition
947
+ register float diff, /*diff from spline */
948
+ float partdiffs[], /*diff on all parts */
949
+ int lastpart, /*last assigned partition */
950
+ float jumplimit, /*new part threshold */
951
+ int *partcount /*no of partitions */
952
+ ) {
953
+ register int partition; /*partition no */
954
+ int bestpart; /*best new partition */
955
+ float bestdelta; /*best gap from a part */
956
+ static float drift; /*drift from spline */
957
+ float delta; /*diff from part */
958
+ static float lastdelta; /*previous delta */
959
+
960
+ if (lastpart < 0) {
961
+ partdiffs[0] = diff;
962
+ lastpart = 0; /*first point */
963
+ drift = 0.0f;
964
+ lastdelta = 0.0f;
965
+ }
966
+ /*adjusted diff from part */
967
+ delta = diff - partdiffs[lastpart] - drift;
968
+ if (textord_oldbl_debug) {
969
+ tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift);
970
+ }
971
+ if (ABS (delta) > jumplimit / 2) {
972
+ /*delta on part 0 */
973
+ bestdelta = diff - partdiffs[0] - drift;
974
+ bestpart = 0; /*0 best so far */
975
+ for (partition = 1; partition < *partcount; partition++) {
976
+ delta = diff - partdiffs[partition] - drift;
977
+ if (ABS (delta) < ABS (bestdelta)) {
978
+ bestdelta = delta;
979
+ bestpart = partition; /*part with nearest jump */
980
+ }
981
+ }
982
+ delta = bestdelta;
983
+ /*too far away */
984
+ if (ABS (bestdelta) > jumplimit
985
+ && *partcount < MAXPARTS) { /*and spare part left */
986
+ bestpart = (*partcount)++; /*best was new one */
987
+ /*start new one */
988
+ partdiffs[bestpart] = diff - drift;
989
+ delta = 0.0f;
990
+ }
991
+ }
992
+ else {
993
+ bestpart = lastpart; /*best was last one */
994
+ }
995
+
996
+ if (bestpart == lastpart
997
+ && (ABS (delta - lastdelta) < jumplimit / 2
998
+ || ABS (delta) < jumplimit / 2))
999
+ /*smooth the drift */
1000
+ drift = (3 * drift + delta) / 3;
1001
+ lastdelta = delta;
1002
+
1003
+ if (textord_oldbl_debug) {
1004
+ tprintf ("P=%d\n", bestpart);
1005
+ }
1006
+
1007
+ return bestpart;
1008
+ }
1009
+
1010
+
1011
+ ///*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking
1012
+ //partitions and gives all the rest partid 0*/
1013
+ //
1014
+ //merge_partitions(partids,partcount,blobcount,bestpart)
1015
+ //register char *partids; /*partition numbers*/
1016
+ //int partcount; /*no of partitions*/
1017
+ //int blobcount; /*no of blobs*/
1018
+ //int bestpart; /*best partition*/
1019
+ //{
1020
+ // register int blobindex; /*no along text line*/
1021
+ // int runlength; /*run of same partition*/
1022
+ // int bestrun; /*biggest runlength*/
1023
+ //
1024
+ // bestrun=0; /*no runs yet*/
1025
+ // runlength=1;
1026
+ // for (blobindex=1;blobindex<blobcount;blobindex++)
1027
+ // { if (partids[blobindex]!=partids[blobindex-1])
1028
+ // { if (runlength>bestrun)
1029
+ // bestrun=runlength; /*find biggest run*/
1030
+ // runlength=1; /*new run*/
1031
+ // }
1032
+ // else
1033
+ // { runlength++;
1034
+ // }
1035
+ // }
1036
+ // if (runlength>bestrun)
1037
+ // bestrun=runlength;
1038
+ //
1039
+ // for (blobindex=0;blobindex<blobcount;blobindex++)
1040
+ // { if (blobindex<1
1041
+ // || partids[blobindex]!=partids[blobindex-1])
1042
+ // { if ((blobindex+1>=blobcount
1043
+ // || partids[blobindex]!=partids[blobindex+1])
1044
+ // /*loner*/
1045
+ // && (bestrun>2 || partids[blobindex]!=bestpart))
1046
+ // { partids[blobindex]=partcount; /*discard loner*/
1047
+ // }
1048
+ // else if (blobindex+1<blobcount
1049
+ // && partids[blobindex]==partids[blobindex+1]
1050
+ // /*pair*/
1051
+ // && (blobindex+2>=blobcount
1052
+ // || partids[blobindex]!=partids[blobindex+2])
1053
+ // && (bestrun>3 || partids[blobindex]!=bestpart))
1054
+ // { partids[blobindex]=partcount; /*discard both*/
1055
+ // partids[blobindex+1]=partcount;
1056
+ // }
1057
+ // }
1058
+ // }
1059
+ // for (blobindex=0;blobindex<blobcount;blobindex++)
1060
+ // { if (partids[blobindex]<partcount)
1061
+ // partids[blobindex]=0; /*all others together*/
1062
+ // }
1063
+ //}
1064
+
1065
+ /**********************************************************************
1066
+ * partition_coords
1067
+ *
1068
+ * Get the x,y coordinates of all points in the bestpart and put them
1069
+ * in xcoords,ycoords. Return the number of points found.
1070
+ **********************************************************************/
1071
+
1072
+ int
1073
+ partition_coords ( //find relevant coords
1074
+ TBOX blobcoords[], //bounding boxes
1075
+ int blobcount, /*no of blobs in row */
1076
+ char partids[], /*partition no of each blob */
1077
+ int bestpart, /*best new partition */
1078
+ int xcoords[], /*points to work on */
1079
+ int ycoords[] /*points to work on */
1080
+ ) {
1081
+ register int blobindex; /*no along text line */
1082
+ int pointcount; /*no of points */
1083
+
1084
+ pointcount = 0;
1085
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
1086
+ if (partids[blobindex] == bestpart) {
1087
+ /*centre of blob */
1088
+ xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
1089
+ ycoords[pointcount++] = blobcoords[blobindex].bottom ();
1090
+ }
1091
+ }
1092
+ return pointcount; /*no of points found */
1093
+ }
1094
+
1095
+
1096
+ /**********************************************************************
1097
+ * segment_spline
1098
+ *
1099
+ * Segment the row at midpoints between maxima and minima of the x,y pairs.
1100
+ * The xstarts of the segments are returned and the number found.
1101
+ **********************************************************************/
1102
+
1103
+ int
1104
+ segment_spline ( //make xstarts
1105
+ TBOX blobcoords[], //boundign boxes
1106
+ int blobcount, /*no of blobs in row */
1107
+ int xcoords[], /*points to work on */
1108
+ int ycoords[], /*points to work on */
1109
+ int degree, int pointcount, /*no of points */
1110
+ int xstarts[] //result
1111
+ ) {
1112
+ register int ptindex; /*no along text line */
1113
+ register int segment; /*partition no */
1114
+ int lastmin, lastmax; /*possible turn points */
1115
+ int turnpoints[SPLINESIZE]; /*good turning points */
1116
+ int turncount; /*no of turning points */
1117
+ int max_x; //max specified coord
1118
+
1119
+ xstarts[0] = xcoords[0] - 1; //leftmost defined pt
1120
+ max_x = xcoords[pointcount - 1] + 1;
1121
+ if (degree < 2)
1122
+ pointcount = 0;
1123
+ turncount = 0; /*no turning points yet */
1124
+ if (pointcount > 3) {
1125
+ ptindex = 1;
1126
+ lastmax = lastmin = 0; /*start with first one */
1127
+ while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
1128
+ /*minimum */
1129
+ if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
1130
+ if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
1131
+ if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
1132
+ /*new max point */
1133
+ turnpoints[turncount++] = lastmax;
1134
+ lastmin = ptindex; /*latest minimum */
1135
+ }
1136
+ else if (ycoords[ptindex] < ycoords[lastmin]) {
1137
+ lastmin = ptindex; /*lower minimum */
1138
+ }
1139
+ }
1140
+
1141
+ /*maximum */
1142
+ if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
1143
+ if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
1144
+ if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
1145
+ /*new min point */
1146
+ turnpoints[turncount++] = lastmin;
1147
+ lastmax = ptindex; /*latest maximum */
1148
+ }
1149
+ else if (ycoords[ptindex] > ycoords[lastmax]) {
1150
+ lastmax = ptindex; /*higher maximum */
1151
+ }
1152
+ }
1153
+ ptindex++;
1154
+ }
1155
+ /*possible global min */
1156
+ if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
1157
+ && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
1158
+ if (turncount < SPLINESIZE - 1)
1159
+ /*2 more turns */
1160
+ turnpoints[turncount++] = lastmax;
1161
+ if (turncount < SPLINESIZE - 1)
1162
+ turnpoints[turncount++] = ptindex;
1163
+ }
1164
+ else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
1165
+ /*possible global max */
1166
+ && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
1167
+ if (turncount < SPLINESIZE - 1)
1168
+ /*2 more turns */
1169
+ turnpoints[turncount++] = lastmin;
1170
+ if (turncount < SPLINESIZE - 1)
1171
+ turnpoints[turncount++] = ptindex;
1172
+ }
1173
+ else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
1174
+ && turncount < SPLINESIZE - 1) {
1175
+ if (ycoords[ptindex] > ycoords[lastmax])
1176
+ turnpoints[turncount++] = ptindex;
1177
+ else
1178
+ turnpoints[turncount++] = lastmax;
1179
+ }
1180
+ else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
1181
+ && turncount < SPLINESIZE - 1) {
1182
+ if (ycoords[ptindex] < ycoords[lastmin])
1183
+ turnpoints[turncount++] = ptindex;
1184
+ else
1185
+ turnpoints[turncount++] = lastmin;
1186
+ }
1187
+ }
1188
+
1189
+ if (textord_oldbl_debug && turncount > 0)
1190
+ tprintf ("First turn is %d at (%d,%d)\n",
1191
+ turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
1192
+ for (segment = 1; segment < turncount; segment++) {
1193
+ /*centre y coord */
1194
+ lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
1195
+
1196
+ /* fix alg so that it works with both rising and falling sections */
1197
+ if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
1198
+ /*find rising y centre */
1199
+ for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
1200
+ else
1201
+ /*find falling y centre */
1202
+ for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
1203
+
1204
+ /*centre x */
1205
+ xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
1206
+ + xcoords[turnpoints[segment - 1]]
1207
+ + xcoords[turnpoints[segment]] + 2) / 4;
1208
+ /*halfway between turns */
1209
+ if (textord_oldbl_debug)
1210
+ tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
1211
+ segment, turnpoints[segment],
1212
+ xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
1213
+ ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
1214
+ }
1215
+
1216
+ xstarts[segment] = max_x;
1217
+ return segment; /*no of splines */
1218
+ }
1219
+
1220
+
1221
+ /**********************************************************************
1222
+ * split_stepped_spline
1223
+ *
1224
+ * Re-segment the spline in cases where there is a big step function.
1225
+ * Return TRUE if any were done.
1226
+ **********************************************************************/
1227
+
1228
+ BOOL8
1229
+ split_stepped_spline ( //make xstarts
1230
+ QSPLINE * baseline, //current shot
1231
+ float jumplimit, //max step fuction
1232
+ int xcoords[], /*points to work on */
1233
+ int xstarts[], //result
1234
+ int &segments //no of segments
1235
+ ) {
1236
+ BOOL8 doneany; //return value
1237
+ register int segment; /*partition no */
1238
+ int startindex, centreindex, endindex;
1239
+ float leftcoord, rightcoord;
1240
+ int leftindex, rightindex;
1241
+ float step; //spline step
1242
+
1243
+ doneany = FALSE;
1244
+ startindex = 0;
1245
+ for (segment = 1; segment < segments - 1; segment++) {
1246
+ step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1247
+ (xstarts[segment] + xstarts[segment + 1]) / 2.0);
1248
+ if (step < 0)
1249
+ step = -step;
1250
+ if (step > jumplimit) {
1251
+ while (xcoords[startindex] < xstarts[segment - 1])
1252
+ startindex++;
1253
+ centreindex = startindex;
1254
+ while (xcoords[centreindex] < xstarts[segment])
1255
+ centreindex++;
1256
+ endindex = centreindex;
1257
+ while (xcoords[endindex] < xstarts[segment + 1])
1258
+ endindex++;
1259
+ if (segments >= SPLINESIZE) {
1260
+ if (textord_debug_baselines)
1261
+ tprintf ("Too many segments to resegment spline!!\n");
1262
+ }
1263
+ else if (endindex - startindex >= textord_spline_medianwin * 3) {
1264
+ while (centreindex - startindex <
1265
+ textord_spline_medianwin * 3 / 2)
1266
+ centreindex++;
1267
+ while (endindex - centreindex <
1268
+ textord_spline_medianwin * 3 / 2)
1269
+ centreindex--;
1270
+ leftindex = (startindex + startindex + centreindex) / 3;
1271
+ rightindex = (centreindex + endindex + endindex) / 3;
1272
+ leftcoord =
1273
+ (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
1274
+ rightcoord =
1275
+ (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
1276
+ while (xcoords[leftindex] > leftcoord
1277
+ && leftindex - startindex > textord_spline_medianwin)
1278
+ leftindex--;
1279
+ while (xcoords[leftindex] < leftcoord
1280
+ && centreindex - leftindex >
1281
+ textord_spline_medianwin / 2)
1282
+ leftindex++;
1283
+ if (xcoords[leftindex] - leftcoord >
1284
+ leftcoord - xcoords[leftindex - 1])
1285
+ leftindex--;
1286
+ while (xcoords[rightindex] > rightcoord
1287
+ && rightindex - centreindex >
1288
+ textord_spline_medianwin / 2)
1289
+ rightindex--;
1290
+ while (xcoords[rightindex] < rightcoord
1291
+ && endindex - rightindex > textord_spline_medianwin)
1292
+ rightindex++;
1293
+ if (xcoords[rightindex] - rightcoord >
1294
+ rightcoord - xcoords[rightindex - 1])
1295
+ rightindex--;
1296
+ if (textord_debug_baselines)
1297
+ tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
1298
+ xstarts[segment],
1299
+ baseline->
1300
+ step ((xstarts[segment - 1] +
1301
+ xstarts[segment]) / 2.0,
1302
+ (xstarts[segment] +
1303
+ xstarts[segment + 1]) / 2.0),
1304
+ (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
1305
+ (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
1306
+ insert_spline_point (xstarts, segment,
1307
+ (xcoords[leftindex - 1] +
1308
+ xcoords[leftindex]) / 2,
1309
+ (xcoords[rightindex - 1] +
1310
+ xcoords[rightindex]) / 2, segments);
1311
+ doneany = TRUE;
1312
+ }
1313
+ else if (textord_debug_baselines) {
1314
+ tprintf
1315
+ ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
1316
+ startindex, centreindex, endindex,
1317
+ (inT32) textord_spline_medianwin);
1318
+ }
1319
+ }
1320
+ // else tprintf("Spline step at %d is %g\n",
1321
+ // xstarts[segment],
1322
+ // baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
1323
+ // (xstarts[segment]+xstarts[segment+1])/2.0));
1324
+ }
1325
+ return doneany;
1326
+ }
1327
+
1328
+
1329
+ /**********************************************************************
1330
+ * insert_spline_point
1331
+ *
1332
+ * Insert a new spline point and shuffle up the others.
1333
+ **********************************************************************/
1334
+
1335
+ void
1336
+ insert_spline_point ( //get descenders
1337
+ int xstarts[], //starts to shuffle
1338
+ int segment, //insertion pt
1339
+ int coord1, //coords to add
1340
+ int coord2, int &segments //total segments
1341
+ ) {
1342
+ int index; //for shuffling
1343
+
1344
+ for (index = segments; index > segment; index--)
1345
+ xstarts[index + 1] = xstarts[index];
1346
+ segments++;
1347
+ xstarts[segment] = coord1;
1348
+ xstarts[segment + 1] = coord2;
1349
+ }
1350
+
1351
+
1352
+ /**********************************************************************
1353
+ * find_lesser_parts
1354
+ *
1355
+ * Average the step from the spline for the other partitions
1356
+ * and find the commonest partition which has a descender.
1357
+ **********************************************************************/
1358
+
1359
+ void
1360
+ find_lesser_parts ( //get descenders
1361
+ TO_ROW * row, //row to process
1362
+ TBOX blobcoords[], //bounding boxes
1363
+ int blobcount, /*no of blobs */
1364
+ char partids[], /*partition of each blob */
1365
+ int partsizes[], /*size of each part */
1366
+ int partcount, /*no of partitions */
1367
+ int bestpart /*biggest partition */
1368
+ ) {
1369
+ register int blobindex; /*index of blob */
1370
+ register int partition; /*current partition */
1371
+ int xcentre; /*centre of blob */
1372
+ int poscount; /*count of best up step */
1373
+ int negcount; /*count of best down step */
1374
+ float partsteps[MAXPARTS]; /*average step to part */
1375
+ float bestpos; /*best up step */
1376
+ float bestneg; /*best down step */
1377
+ int runlength; /*length of bad run */
1378
+ int biggestrun; /*biggest bad run */
1379
+
1380
+ biggestrun = 0;
1381
+ for (partition = 0; partition < partcount; partition++)
1382
+ partsteps[partition] = 0.0; /*zero accumulators */
1383
+ for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1384
+ xcentre = (blobcoords[blobindex].left ()
1385
+ + blobcoords[blobindex].right ()) >> 1;
1386
+ /*in other parts */
1387
+ if (partids[blobindex] != bestpart) {
1388
+ runlength++; /*run of non bests */
1389
+ if (runlength > biggestrun)
1390
+ biggestrun = runlength;
1391
+ partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()
1392
+ - row->baseline.y (xcentre);
1393
+ }
1394
+ else
1395
+ runlength = 0;
1396
+ }
1397
+ if (biggestrun > MAXBADRUN)
1398
+ row->xheight = -1.0f; /*failed */
1399
+ else
1400
+ row->xheight = 1.0f; /*success */
1401
+ poscount = negcount = 0;
1402
+ bestpos = bestneg = 0.0; /*no step yet */
1403
+ for (partition = 0; partition < partcount; partition++) {
1404
+ if (partition != bestpart) {
1405
+
1406
+ //by jetsoft divide by zero possible
1407
+ if (partsizes[partition]==0)
1408
+ partsteps[partition]=0;
1409
+ else
1410
+ partsteps[partition] /= partsizes[partition];
1411
+ //
1412
+
1413
+
1414
+ if (partsteps[partition] >= MINASCRISE
1415
+ && partsizes[partition] > poscount) {
1416
+ /*ascender rise */
1417
+ bestpos = partsteps[partition];
1418
+ /*2nd most popular */
1419
+ poscount = partsizes[partition];
1420
+ }
1421
+ if (partsteps[partition] <= -MINASCRISE
1422
+ && partsizes[partition] > negcount) {
1423
+ /*ascender rise */
1424
+ bestneg = partsteps[partition];
1425
+ /*2nd most popular */
1426
+ negcount = partsizes[partition];
1427
+ }
1428
+ }
1429
+ }
1430
+ /*average x-height */
1431
+ partsteps[bestpart] /= blobcount;
1432
+ row->descdrop = bestneg;
1433
+ }
1434
+
1435
+
1436
+ /**********************************************************************
1437
+ * old_first_xheight
1438
+ *
1439
+ * Makes an x-height spline by copying the baseline and shifting it.
1440
+ * It estimates the x-height across the line to use as the shift.
1441
+ * It also finds the ascender height if it can.
1442
+ **********************************************************************/
1443
+
1444
+ void
1445
+ old_first_xheight ( //the wiseowl way
1446
+ TO_ROW * row, /*current row */
1447
+ TBOX blobcoords[], /*blob bounding boxes */
1448
+ int initialheight, //initial guess
1449
+ int blobcount, /*blobs in blobcoords */
1450
+ QSPLINE * baseline, /*established */
1451
+ float jumplimit /*min ascender height */
1452
+ ) {
1453
+ register int blobindex; /*current blob */
1454
+ /*height statistics */
1455
+ STATS heightstat (0, MAXHEIGHT);
1456
+ int height; /*height of blob */
1457
+ int xcentre; /*centre of blob */
1458
+ int lineheight; /*approx xheight */
1459
+ float ascenders; /*ascender sum */
1460
+ int asccount; /*no of ascenders */
1461
+ float xsum; /*xheight sum */
1462
+ int xcount; /*xheight count */
1463
+ register float diff; /*height difference */
1464
+
1465
+ if (blobcount > 1) {
1466
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
1467
+ xcentre = (blobcoords[blobindex].left ()
1468
+ + blobcoords[blobindex].right ()) / 2;
1469
+ /*height of blob */
1470
+ height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);
1471
+ if (height > initialheight * oldbl_xhfract
1472
+ && height > textord_min_xheight)
1473
+ heightstat.add (height, 1);
1474
+ }
1475
+ if (heightstat.get_total () > 3) {
1476
+ lineheight = (int) heightstat.ile (0.25);
1477
+ if (lineheight <= 0)
1478
+ lineheight = (int) heightstat.ile (0.5);
1479
+ }
1480
+ else
1481
+ lineheight = initialheight;
1482
+ }
1483
+ else {
1484
+ lineheight = (int) (blobcoords[0].top ()
1485
+ - baseline->y ((blobcoords[0].left ()
1486
+ + blobcoords[0].right ()) / 2) +
1487
+ 0.5);
1488
+ }
1489
+
1490
+ xsum = 0.0f;
1491
+ xcount = 0;
1492
+ for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
1493
+ blobindex++) {
1494
+ xcentre = (blobcoords[blobindex].left ()
1495
+ + blobcoords[blobindex].right ()) / 2;
1496
+ diff = blobcoords[blobindex].top () - baseline->y (xcentre);
1497
+ /*is it ascender */
1498
+ if (diff > lineheight + jumplimit) {
1499
+ ascenders += diff;
1500
+ asccount++; /*count ascenders */
1501
+ }
1502
+ else if (diff > lineheight - jumplimit) {
1503
+ xsum += diff; /*mean xheight */
1504
+ xcount++;
1505
+ }
1506
+ }
1507
+ if (xcount > 0)
1508
+ xsum /= xcount; /*average xheight */
1509
+ else
1510
+ xsum = (float) lineheight; /*guess it */
1511
+ row->xheight *= xsum;
1512
+ if (asccount > 0)
1513
+ row->ascrise = ascenders / asccount - xsum;
1514
+ else
1515
+ row->ascrise = 0.0f; /*had none */
1516
+ if (row->xheight == 0)
1517
+ row->xheight = -1.0f;
1518
+ }
1519
+
1520
+
1521
+ /**********************************************************************
1522
+ * make_first_xheight
1523
+ *
1524
+ * Makes an x-height spline by copying the baseline and shifting it.
1525
+ * It estimates the x-height across the line to use as the shift.
1526
+ * It also finds the ascender height if it can.
1527
+ **********************************************************************/
1528
+
1529
+ void
1530
+ make_first_xheight ( //find xheight
1531
+ TO_ROW * row, /*current row */
1532
+ TBOX blobcoords[], /*blob bounding boxes */
1533
+ int lineheight, //initial guess
1534
+ int init_lineheight, //block level guess
1535
+ int blobcount, /*blobs in blobcoords */
1536
+ QSPLINE * baseline, /*established */
1537
+ float jumplimit /*min ascender height */
1538
+ ) {
1539
+ STATS heightstat (0, HEIGHTBUCKETS);
1540
+ int lefts[HEIGHTBUCKETS];
1541
+ int rights[HEIGHTBUCKETS];
1542
+ int modelist[MODENUM];
1543
+ int blobindex;
1544
+ int mode_count; //blobs to count in thr
1545
+ int sign_bit;
1546
+ int mode_threshold;
1547
+ const int kBaselineTouch = 2; // This really should change with resolution.
1548
+ const int kGoodStrength = 8; // Strength of baseline-touching heights.
1549
+ const float kMinHeight = 0.25; // Min fraction of lineheight to use.
1550
+
1551
+ sign_bit = row->xheight > 0 ? 1 : -1;
1552
+
1553
+ memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
1554
+ memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
1555
+ mode_count = 0;
1556
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
1557
+ int xcenter = (blobcoords[blobindex].left () +
1558
+ blobcoords[blobindex].right ()) / 2;
1559
+ float base = baseline->y(xcenter);
1560
+ float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
1561
+ int strength = textord_ocropus_mode &&
1562
+ bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
1563
+ int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
1564
+ if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
1565
+ if (height > lineheight * oldbl_xhfract
1566
+ && height > textord_min_xheight) {
1567
+ heightstat.add (height, strength);
1568
+ if (height < HEIGHTBUCKETS) {
1569
+ if (xcenter > rights[height])
1570
+ rights[height] = xcenter;
1571
+ if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
1572
+ lefts[height] = xcenter;
1573
+ }
1574
+ }
1575
+ mode_count += strength;
1576
+ }
1577
+ }
1578
+
1579
+ mode_threshold = (int) (blobcount * 0.1);
1580
+ if (oldbl_dot_error_size > 1 || oldbl_xhfix)
1581
+ mode_threshold = (int) (mode_count * 0.1);
1582
+
1583
+ if (textord_oldbl_debug) {
1584
+ tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
1585
+ blobcount, mode_count, mode_threshold);
1586
+ }
1587
+ find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
1588
+ if (textord_oldbl_debug) {
1589
+ for (blobindex = 0; blobindex < MODENUM; blobindex++)
1590
+ tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
1591
+ tprintf ("\n");
1592
+ }
1593
+ pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
1594
+
1595
+ if (textord_oldbl_debug)
1596
+ tprintf ("Output xheight=%g\n", row->xheight);
1597
+ if (row->xheight < 0 && textord_oldbl_debug)
1598
+ tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
1599
+
1600
+ if (sign_bit < 0)
1601
+ row->xheight = -row->xheight;
1602
+ }
1603
+
1604
+ /**********************************************************************
1605
+ * find_top_modes
1606
+ *
1607
+ * Fill the input array with the indices of the top ten modes of the
1608
+ * input distribution.
1609
+ **********************************************************************/
1610
+
1611
+ const int kMinModeFactorOcropus = 32;
1612
+ const int kMinModeFactor = 12;
1613
+
1614
+ void
1615
+ find_top_modes ( //get modes
1616
+ STATS * stats, //stats to hack
1617
+ int statnum, //no of piles
1618
+ int modelist[], int modenum //no of modes to get
1619
+ ) {
1620
+ int mode_count;
1621
+ int last_i = 0;
1622
+ int last_max = MAX_INT32;
1623
+ int i;
1624
+ int mode;
1625
+ int total_max = 0;
1626
+ int mode_factor = textord_ocropus_mode ?
1627
+ kMinModeFactorOcropus : kMinModeFactor;
1628
+
1629
+ for (mode_count = 0; mode_count < modenum; mode_count++) {
1630
+ mode = 0;
1631
+ for (i = 0; i < statnum; i++) {
1632
+ if (stats->pile_count (i) > stats->pile_count (mode)) {
1633
+ if ((stats->pile_count (i) < last_max) ||
1634
+ ((stats->pile_count (i) == last_max) && (i > last_i))) {
1635
+ mode = i;
1636
+ }
1637
+ }
1638
+ }
1639
+ last_i = mode;
1640
+ last_max = stats->pile_count (last_i);
1641
+ total_max += last_max;
1642
+ if (last_max <= total_max / mode_factor)
1643
+ mode = 0;
1644
+ modelist[mode_count] = mode;
1645
+ }
1646
+ }
1647
+
1648
+
1649
+ /**********************************************************************
1650
+ * pick_x_height
1651
+ *
1652
+ * Choose based on the height modes the best x height value.
1653
+ **********************************************************************/
1654
+
1655
+ void pick_x_height(TO_ROW * row, //row to do
1656
+ int modelist[],
1657
+ int lefts[], int rights[],
1658
+ STATS * heightstat,
1659
+ int mode_threshold) {
1660
+ int x;
1661
+ int y;
1662
+ int z;
1663
+ float ratio;
1664
+ int found_one_bigger = FALSE;
1665
+ int best_x_height = 0;
1666
+ int best_asc = 0;
1667
+ int num_in_best;
1668
+
1669
+ for (x = 0; x < MODENUM; x++) {
1670
+ for (y = 0; y < MODENUM; y++) {
1671
+ /* Check for two modes */
1672
+ if (modelist[x] && modelist[y] &&
1673
+ heightstat->pile_count (modelist[x]) > mode_threshold &&
1674
+ (!textord_ocropus_mode ||
1675
+ MIN(rights[modelist[x]], rights[modelist[y]]) >
1676
+ MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
1677
+ ratio = (float) modelist[y] / (float) modelist[x];
1678
+ if (1.2 < ratio && ratio < 1.8) {
1679
+ /* Two modes found */
1680
+ best_x_height = modelist[x];
1681
+ num_in_best = heightstat->pile_count (modelist[x]);
1682
+
1683
+ /* Try to get one higher */
1684
+ do {
1685
+ found_one_bigger = FALSE;
1686
+ for (z = 0; z < MODENUM; z++) {
1687
+ if (modelist[z] == best_x_height + 1 &&
1688
+ (!textord_ocropus_mode ||
1689
+ MIN(rights[modelist[x]], rights[modelist[y]]) >
1690
+ MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
1691
+ ratio = (float) modelist[y] / (float) modelist[z];
1692
+ if ((1.2 < ratio && ratio < 1.8) &&
1693
+ /* Should be half of best */
1694
+ heightstat->pile_count (modelist[z]) >
1695
+ num_in_best * 0.5) {
1696
+ best_x_height++;
1697
+ found_one_bigger = TRUE;
1698
+ break;
1699
+ }
1700
+ }
1701
+ }
1702
+ }
1703
+ while (found_one_bigger);
1704
+
1705
+ /* try to get a higher ascender */
1706
+
1707
+ best_asc = modelist[y];
1708
+ num_in_best = heightstat->pile_count (modelist[y]);
1709
+
1710
+ /* Try to get one higher */
1711
+ do {
1712
+ found_one_bigger = FALSE;
1713
+ for (z = 0; z < MODENUM; z++) {
1714
+ if (modelist[z] > best_asc &&
1715
+ (!textord_ocropus_mode ||
1716
+ MIN(rights[modelist[x]], rights[modelist[y]]) >
1717
+ MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
1718
+ ratio = (float) modelist[z] / (float) best_x_height;
1719
+ if ((1.2 < ratio && ratio < 1.8) &&
1720
+ /* Should be half of best */
1721
+ heightstat->pile_count (modelist[z]) >
1722
+ num_in_best * 0.5) {
1723
+ best_asc = modelist[z];
1724
+ found_one_bigger = TRUE;
1725
+ break;
1726
+ }
1727
+ }
1728
+ }
1729
+ }
1730
+ while (found_one_bigger);
1731
+
1732
+ row->xheight = (float) best_x_height;
1733
+ row->ascrise = (float) best_asc - best_x_height;
1734
+ return;
1735
+ }
1736
+ }
1737
+ }
1738
+ }
1739
+
1740
+ best_x_height = modelist[0]; /* Single Mode found */
1741
+ num_in_best = heightstat->pile_count (best_x_height);
1742
+ do {
1743
+ /* Try to get one higher */
1744
+ found_one_bigger = FALSE;
1745
+ for (z = 1; z < MODENUM; z++) {
1746
+ /* Should be half of best */
1747
+ if ((modelist[z] == best_x_height + 1) &&
1748
+ (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
1749
+ best_x_height++;
1750
+ found_one_bigger = TRUE;
1751
+ break;
1752
+ }
1753
+ }
1754
+ }
1755
+ while (found_one_bigger);
1756
+
1757
+ row->ascrise = 0.0f;
1758
+ row->xheight = (float) best_x_height;
1759
+ if (row->xheight == 0)
1760
+ row->xheight = -1.0f;
1761
+ }