tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,1761 @@
1
+ /**********************************************************************
2
+ * File: oldbasel.cpp (Formerly oldbl.c)
3
+ * Description: A re-implementation of the old baseline algorithm.
4
+ * Author: Ray Smith
5
+ * Created: Wed Oct 6 09:41:48 BST 1993
6
+ *
7
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #include "mfcpch.h"
21
+ #include "statistc.h"
22
+ #include "quadlsq.h"
23
+ #include "lmedsq.h"
24
+ #include "makerow.h"
25
+ #include "drawtord.h"
26
+ #include "oldbasel.h"
27
+ #include "tprintf.h"
28
+
29
+ #define EXTERN
30
+
31
+ EXTERN BOOL_VAR (textord_really_old_xheight, FALSE,
32
+ "Use original wiseowl xheight");
33
+ EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation");
34
+ EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation");
35
+ EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism");
36
+ EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines");
37
+ EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions");
38
+ EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights");
39
+ EXTERN BOOL_VAR (oldbl_xhfix, FALSE,
40
+ "Fix bug in modes threshold for xheights");
41
+ EXTERN BOOL_VAR(textord_ocropus_mode, FALSE, "Make baselines for ocropus");
42
+ EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");
43
+ EXTERN INT_VAR (oldbl_holed_losscount, 10,
44
+ "Max lost before fallback line used");
45
+ EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");
46
+ EXTERN double_VAR (textord_oldbl_jumplimit, 0.15,
47
+ "X fraction for new partition");
48
+
49
+ #define TURNLIMIT 1 /*min size for turning point */
50
+ #define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */
51
+ #define DESCENDER_FRACTION 0.5 /*descender/x-height */
52
+ #define MIN_ASC_FRACTION 0.20 /*min size of ascenders */
53
+ #define MIN_DESC_FRACTION 0.25 /*min size of descenders */
54
+ #define MINASCRISE 2.0 /*min ascender/desc step */
55
+ #define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */
56
+ #define MAXHEIGHT 300 /*max blob height */
57
+ #define MAXOVERLAP 0.1 /*max 10% missed overlap */
58
+ #define MAXBADRUN 2 /*max non best for failed */
59
+ #define HEIGHTBUCKETS 200 /* Num of buckets */
60
+ #define DELTAHEIGHT 5.0 /* Small amount of diff */
61
+ #define GOODHEIGHT 5
62
+ #define MAXLOOPS 10
63
+ #define MODENUM 10
64
+ #define MAXPARTS 6
65
+ #define SPLINESIZE 23
66
+
67
+ #define ABS(x) ((x)<0 ? (-(x)) : (x))
68
+
69
+ /**********************************************************************
70
+ * make_old_baselines
71
+ *
72
+ * Top level function to make baselines the old way.
73
+ **********************************************************************/
74
+
75
+ void make_old_baselines( //make splines
76
+ TO_BLOCK *block, //block to do
77
+ BOOL8 testing_on //correct orientation
78
+ ) {
79
+ QSPLINE *prev_baseline; //baseline of previous row
80
+ TO_ROW *row; //current row
81
+ TO_ROW_IT row_it = block->get_rows ();
82
+ BLOBNBOX_IT blob_it;
83
+
84
+ prev_baseline = NULL; //nothing yet
85
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
86
+ row = row_it.data ();
87
+ find_textlines (block, row, 2, NULL);
88
+ if (row->xheight <= 0 && prev_baseline != NULL)
89
+ find_textlines (block, row, 2, prev_baseline);
90
+ if (row->xheight > 0)
91
+ //was a good one
92
+ prev_baseline = &row->baseline;
93
+ else {
94
+ prev_baseline = NULL;
95
+ blob_it.set_to_list (row->blob_list ());
96
+ if (textord_debug_baselines)
97
+ tprintf ("Row baseline generation failed on row at (%d,%d)\n",
98
+ blob_it.data ()->bounding_box ().left (),
99
+ blob_it.data ()->bounding_box ().bottom ());
100
+ }
101
+ }
102
+ correlate_lines(block);
103
+ }
104
+
105
+
106
+ /**********************************************************************
107
+ * correlate_lines
108
+ *
109
+ * Correlate the x-heights and ascender heights of a block to fill-in
110
+ * the ascender height and descender height for rows without one.
111
+ * Also fix baselines of rows without a decent fit.
112
+ **********************************************************************/
113
+
114
+ void correlate_lines( //cleanup lines
115
+ TO_BLOCK *block //block to do
116
+ ) {
117
+ TO_ROW **rows; //array of ptrs
118
+ int rowcount; /*no of rows to do */
119
+ register int rowindex; /*no of row */
120
+ //iterator
121
+ TO_ROW_IT row_it = block->get_rows ();
122
+
123
+ rowcount = row_it.length ();
124
+ if (rowcount == 0) {
125
+ //default value
126
+ block->xheight = block->line_size;
127
+ return; /*none to do */
128
+ }
129
+ rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
130
+ rowindex = 0;
131
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
132
+ //make array
133
+ rows[rowindex++] = row_it.data ();
134
+
135
+ /*try to fix bad lines */
136
+ correlate_neighbours(block, rows, rowcount);
137
+
138
+ block->xheight = (float) correlate_with_stats (rows, rowcount);
139
+ /*use stats */
140
+ if (block->xheight <= 0)
141
+ //desperate
142
+ block->xheight = block->line_size * textord_merge_x;
143
+ if (block->xheight < textord_min_xheight)
144
+ block->xheight = (float) textord_min_xheight;
145
+
146
+ free_mem(rows);
147
+ }
148
+
149
+
150
+ /**********************************************************************
151
+ * correlate_neighbours
152
+ *
153
+ * Try to fix rows that had a bad spline fit by using neighbours.
154
+ **********************************************************************/
155
+
156
+ void correlate_neighbours( //fix bad rows
157
+ TO_BLOCK *block, /*block rows are in */
158
+ TO_ROW **rows, /*rows of block */
159
+ int rowcount /*no of rows to do */
160
+ ) {
161
+ TO_ROW *row; /*current row */
162
+ register int rowindex; /*no of row */
163
+ register int otherrow; /*second row */
164
+ int upperrow; /*row above to use */
165
+ int lowerrow; /*row below to use */
166
+ float biggest;
167
+
168
+ for (rowindex = 0; rowindex < rowcount; rowindex++) {
169
+ row = rows[rowindex]; /*current row */
170
+ if (row->xheight < 0) {
171
+ /*quadratic failed */
172
+ for (otherrow = rowindex - 2;
173
+ otherrow >= 0
174
+ && (rows[otherrow]->xheight < 0.0
175
+ || !row->baseline.overlap (&rows[otherrow]->baseline,
176
+ MAXOVERLAP)); otherrow--);
177
+ upperrow = otherrow; /*decent row above */
178
+ for (otherrow = rowindex + 1;
179
+ otherrow < rowcount
180
+ && (rows[otherrow]->xheight < 0.0
181
+ || !row->baseline.overlap (&rows[otherrow]->baseline,
182
+ MAXOVERLAP)); otherrow++);
183
+ lowerrow = otherrow; /*decent row below */
184
+ if (upperrow >= 0)
185
+ find_textlines (block, row, 2, &rows[upperrow]->baseline);
186
+ if (row->xheight < 0 && lowerrow < rowcount)
187
+ find_textlines (block, row, 2, &rows[lowerrow]->baseline);
188
+ if (row->xheight < 0) {
189
+ if (upperrow >= 0)
190
+ find_textlines (block, row, 1, &rows[upperrow]->baseline);
191
+ else if (lowerrow < rowcount)
192
+ find_textlines (block, row, 1, &rows[lowerrow]->baseline);
193
+ }
194
+ }
195
+ }
196
+
197
+ for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
198
+ row = rows[rowindex]; /*current row */
199
+ if (row->xheight < 0) /*linear failed */
200
+ /*make do */
201
+ row->xheight = -row->xheight;
202
+ biggest = MAX (biggest, row->xheight);
203
+ }
204
+ }
205
+
206
+
207
+ /**********************************************************************
208
+ * correlate_with_stats
209
+ *
210
+ * correlate the x-heights and ascender heights of a block to fill-in
211
+ * the ascender height and descender height for rows without one.
212
+ **********************************************************************/
213
+
214
+ int correlate_with_stats( //fix xheights
215
+ TO_ROW **rows, /*rows of block */
216
+ int rowcount /*no of rows to do */
217
+ ) {
218
+ TO_ROW *row; /*current row */
219
+ register int rowindex; /*no of row */
220
+ float lineheight; /*mean x-height */
221
+ float ascheight; /*average ascenders */
222
+ float minascheight; /*min allowed ascheight */
223
+ int xcount; /*no of samples for xheight */
224
+ float fullheight; /*mean top height */
225
+ int fullcount; /*no of samples */
226
+ float descheight; /*mean descender drop */
227
+ float mindescheight; /*min allowed descheight */
228
+ int desccount; /*no of samples */
229
+ float xshift; /*shift in xheight */
230
+
231
+ /*no samples */
232
+ xcount = fullcount = desccount = 0;
233
+ lineheight = ascheight = fullheight = descheight = 0.0;
234
+ for (rowindex = 0; rowindex < rowcount; rowindex++) {
235
+ row = rows[rowindex]; /*current row */
236
+ if (row->ascrise > 0.0) { /*got ascenders? */
237
+ lineheight += row->xheight;/*average x-heights */
238
+ ascheight += row->ascrise; /*average ascenders */
239
+ xcount++;
240
+ }
241
+ else {
242
+ fullheight += row->xheight;/*assume full height */
243
+ fullcount++;
244
+ }
245
+ if (row->descdrop < 0.0) { /*got descenders? */
246
+ /*average descenders */
247
+ descheight += row->descdrop;
248
+ desccount++;
249
+ }
250
+ }
251
+
252
+ if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
253
+ lineheight /= xcount; /*average x-height */
254
+ /*average caps height */
255
+ fullheight = lineheight + ascheight / xcount;
256
+ /*must be decent size */
257
+ if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))
258
+ fullheight = lineheight * (1 + MIN_ASC_FRACTION);
259
+ }
260
+ else {
261
+ fullheight /= fullcount; /*average max height */
262
+ /*guess x-height */
263
+ lineheight = fullheight * X_HEIGHT_FRACTION;
264
+ }
265
+ if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
266
+ descheight /= desccount; /*average descenders */
267
+ else
268
+ /*guess descenders */
269
+ descheight = -lineheight * DESCENDER_FRACTION;
270
+
271
+ minascheight = lineheight * MIN_ASC_FRACTION;
272
+ mindescheight = -lineheight * MIN_DESC_FRACTION;
273
+ for (rowindex = 0; rowindex < rowcount; rowindex++) {
274
+ row = rows[rowindex]; /*do each row */
275
+ row->all_caps = FALSE;
276
+ if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {
277
+ /*no ascenders */
278
+ if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
279
+ && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {
280
+ row->ascrise = fullheight - lineheight;
281
+ /*shift in x */
282
+ xshift = lineheight - row->xheight;
283
+ /*set to average */
284
+ row->xheight = lineheight;
285
+
286
+ }
287
+ else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)
288
+ && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {
289
+ row->ascrise = row->xheight - lineheight;
290
+ xshift = -row->ascrise; /*shift in x */
291
+ /*set to average */
292
+ row->xheight = lineheight;
293
+ row->all_caps = TRUE;
294
+ }
295
+ else {
296
+ row->ascrise = (fullheight - lineheight) * row->xheight
297
+ / fullheight;
298
+ xshift = -row->ascrise; /*shift in x */
299
+ /*scale it */
300
+ row->xheight -= row->ascrise;
301
+ row->all_caps = TRUE;
302
+ }
303
+ if (row->ascrise < minascheight)
304
+ row->ascrise =
305
+ row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);
306
+ }
307
+ if (row->descdrop > mindescheight) {
308
+ if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
309
+ && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))
310
+ /*set to average */
311
+ row->descdrop = descheight;
312
+ else
313
+ row->descdrop = -row->xheight * DESCENDER_FRACTION;
314
+ }
315
+ }
316
+ return (int) lineheight; //block xheight
317
+ }
318
+
319
+
320
+ /**********************************************************************
321
+ * find_textlines
322
+ *
323
+ * Compute the baseline for the given row.
324
+ **********************************************************************/
325
+
326
+ void find_textlines( //get baseline
327
+ TO_BLOCK *block, //block row is in
328
+ TO_ROW *row, //row to do
329
+ int degree, //required approximation
330
+ QSPLINE *spline //starting spline
331
+ ) {
332
+ int partcount; /*no of partitions of */
333
+ BOOL8 holed_line; //lost too many blobs
334
+ int bestpart; /*biggest partition */
335
+ char *partids; /*partition no of each blob */
336
+ int partsizes[MAXPARTS]; /*no in each partition */
337
+ int lineheight; /*guessed x-height */
338
+ float jumplimit; /*allowed delta change */
339
+ int *xcoords; /*useful sample points */
340
+ int *ycoords; /*useful sample points */
341
+ TBOX *blobcoords; /*edges of blob rectangles */
342
+ int blobcount; /*no of blobs on line */
343
+ float *ydiffs; /*diffs from 1st approx */
344
+ int pointcount; /*no of coords */
345
+ int xstarts[SPLINESIZE + 1]; //segment boundaries
346
+ int segments; //no of segments
347
+
348
+ //no of blobs in row
349
+ blobcount = row->blob_list ()->length ();
350
+ partids = (char *) alloc_mem (blobcount * sizeof (char));
351
+ xcoords = (int *) alloc_mem (blobcount * sizeof (int));
352
+ ycoords = (int *) alloc_mem (blobcount * sizeof (int));
353
+ blobcoords = (TBOX *) alloc_mem (blobcount * sizeof (TBOX));
354
+ ydiffs = (float *) alloc_mem (blobcount * sizeof (float));
355
+
356
+ lineheight = get_blob_coords (row, (int) block->line_size, blobcoords,
357
+ holed_line, blobcount);
358
+ /*limit for line change */
359
+ jumplimit = lineheight * textord_oldbl_jumplimit;
360
+ if (jumplimit < MINASCRISE)
361
+ jumplimit = MINASCRISE;
362
+
363
+ if (textord_oldbl_debug) {
364
+ tprintf
365
+ ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
366
+ block->line_size, lineheight, jumplimit);
367
+ }
368
+ if (holed_line)
369
+ make_holed_baseline (blobcoords, blobcount, spline, &row->baseline,
370
+ row->line_m ());
371
+ else
372
+ make_first_baseline (blobcoords, blobcount,
373
+ xcoords, ycoords, spline, &row->baseline, jumplimit);
374
+ #ifndef GRAPHICS_DISABLED
375
+ if (textord_show_final_rows)
376
+ row->baseline.plot (to_win, ScrollView::GOLDENROD);
377
+ #endif
378
+ if (blobcount > 1) {
379
+ bestpart = partition_line (blobcoords, blobcount,
380
+ &partcount, partids, partsizes,
381
+ &row->baseline, jumplimit, ydiffs);
382
+ pointcount = partition_coords (blobcoords, blobcount,
383
+ partids, bestpart, xcoords, ycoords);
384
+ segments = segment_spline (blobcoords, blobcount,
385
+ xcoords, ycoords,
386
+ degree, pointcount, xstarts);
387
+ if (!holed_line) {
388
+ do {
389
+ row->baseline = QSPLINE (xstarts, segments,
390
+ xcoords, ycoords, pointcount, degree);
391
+ }
392
+ while (textord_oldbl_split_splines
393
+ && split_stepped_spline (&row->baseline, jumplimit / 2,
394
+ xcoords, xstarts, segments));
395
+ }
396
+ find_lesser_parts(row,
397
+ blobcoords,
398
+ blobcount,
399
+ partids,
400
+ partsizes,
401
+ partcount,
402
+ bestpart);
403
+
404
+ }
405
+ else {
406
+ row->xheight = -1.0f; /*failed */
407
+ row->descdrop = 0.0f;
408
+ row->ascrise = 0.0f;
409
+ }
410
+ row->baseline.extrapolate (row->line_m (),
411
+ block->block->bounding_box ().left (),
412
+ block->block->bounding_box ().right ());
413
+ if (textord_really_old_xheight)
414
+ old_first_xheight (row, blobcoords, lineheight,
415
+ blobcount, &row->baseline, jumplimit);
416
+ else
417
+ make_first_xheight (row, blobcoords, lineheight, (int) block->line_size,
418
+ blobcount, &row->baseline, jumplimit);
419
+ free_mem(partids);
420
+ free_mem(xcoords);
421
+ free_mem(ycoords);
422
+ free_mem(blobcoords);
423
+ free_mem(ydiffs);
424
+ }
425
+
426
+
427
+ /**********************************************************************
428
+ * get_blob_coords
429
+ *
430
+ * Fill the blobcoords array with the coordinates of the blobs
431
+ * in the row. The return value is the first guess atthe line height.
432
+ **********************************************************************/
433
+
434
+ int get_blob_coords( //get boxes
435
+ TO_ROW *row, //row to use
436
+ inT32 lineheight, //block level
437
+ TBOX *blobcoords, //ouput boxes
438
+ BOOL8 &holed_line, //lost a lot of blobs
439
+ int &outcount //no of real blobs
440
+ ) {
441
+ //blobs
442
+ BLOBNBOX_IT blob_it = row->blob_list ();
443
+ register int blobindex; /*no along text line */
444
+ int losscount; //lost blobs
445
+ int maxlosscount; //greatest lost blobs
446
+ /*height stat collection */
447
+ STATS heightstat (0, MAXHEIGHT);
448
+
449
+ if (blob_it.empty ())
450
+ return 0; //none
451
+ maxlosscount = 0;
452
+ losscount = 0;
453
+ blob_it.mark_cycle_pt ();
454
+ blobindex = 0;
455
+ do {
456
+ blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
457
+ if (blobcoords[blobindex].height () > lineheight * 0.25)
458
+ heightstat.add (blobcoords[blobindex].height (), 1);
459
+ if (blobindex == 0
460
+ || blobcoords[blobindex].height () > lineheight * 0.25
461
+ || blob_it.cycled_list ()) {
462
+ blobindex++; /*no of merged blobs */
463
+ losscount = 0;
464
+ }
465
+ else {
466
+ if (blobcoords[blobindex].height ()
467
+ < blobcoords[blobindex].width () * oldbl_dot_error_size
468
+ && blobcoords[blobindex].width ()
469
+ < blobcoords[blobindex].height () * oldbl_dot_error_size) {
470
+ //counts as dot
471
+ blobindex++;
472
+ losscount = 0;
473
+ }
474
+ else {
475
+ losscount++; //lost it
476
+ if (losscount > maxlosscount)
477
+ //remember max
478
+ maxlosscount = losscount;
479
+ }
480
+ }
481
+ }
482
+ while (!blob_it.cycled_list ());
483
+
484
+ holed_line = maxlosscount > oldbl_holed_losscount;
485
+ outcount = blobindex; /*total blobs */
486
+
487
+ if (heightstat.get_total () > 1)
488
+ /*guess x-height */
489
+ return (int) heightstat.ile (0.25);
490
+ else
491
+ return blobcoords[0].height ();
492
+ }
493
+
494
+
495
+ /**********************************************************************
496
+ * make_first_baseline
497
+ *
498
+ * Make the first estimate at a baseline, either by shifting
499
+ * a supplied previous spline, or by doing a piecewise linear
500
+ * approximation using all the blobs.
501
+ **********************************************************************/
502
+
503
+ void
504
+ make_first_baseline ( //initial approximation
505
+ TBOX blobcoords[], /*blob bounding boxes */
506
+ int blobcount, /*no of blobcoords */
507
+ int xcoords[], /*coords for spline */
508
+ int ycoords[], /*approximator */
509
+ QSPLINE * spline, /*initial spline */
510
+ QSPLINE * baseline, /*output spline */
511
+ float jumplimit /*guess half descenders */
512
+ ) {
513
+ int leftedge; /*left edge of line */
514
+ int rightedge; /*right edge of line */
515
+ int blobindex; /*current blob */
516
+ int segment; /*current segment */
517
+ float prevy, thisy, nexty; /*3 y coords */
518
+ float y1, y2, y3; /*3 smooth blobs */
519
+ float maxmax, minmin; /*absolute limits */
520
+ int x2 = 0; /*right edge of old y3 */
521
+ int ycount; /*no of ycoords in use */
522
+ float yturns[SPLINESIZE]; /*y coords of turn pts */
523
+ int xturns[SPLINESIZE]; /*xcoords of turn pts */
524
+ int xstarts[SPLINESIZE + 1];
525
+ int segments; //no of segments
526
+ ICOORD shift; //shift of spline
527
+
528
+ prevy = 0;
529
+ /*left edge of row */
530
+ leftedge = blobcoords[0].left ();
531
+ /*right edge of line */
532
+ rightedge = blobcoords[blobcount - 1].right ();
533
+ if (spline == NULL /*no given spline */
534
+ || spline->segments < 3 /*or trivial */
535
+ /*or too non-overlap */
536
+ || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
537
+ || spline->xcoords[spline->segments - 1] < rightedge
538
+ - MAXOVERLAP * (rightedge - leftedge)) {
539
+ if (textord_oldbl_paradef)
540
+ return; //use default
541
+ xstarts[0] = blobcoords[0].left () - 1;
542
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
543
+ xcoords[blobindex] = (blobcoords[blobindex].left ()
544
+ + blobcoords[blobindex].right ()) / 2;
545
+ ycoords[blobindex] = blobcoords[blobindex].bottom ();
546
+ }
547
+ xstarts[1] = blobcoords[blobcount - 1].right () + 1;
548
+ segments = 1; /*no of segments */
549
+
550
+ /*linear */
551
+ *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
552
+
553
+ if (blobcount >= 3) {
554
+ y1 = y2 = y3 = 0.0f;
555
+ ycount = 0;
556
+ segment = 0; /*no of segments */
557
+ maxmax = minmin = 0.0f;
558
+ thisy = ycoords[0] - baseline->y (xcoords[0]);
559
+ nexty = ycoords[1] - baseline->y (xcoords[1]);
560
+ for (blobindex = 2; blobindex < blobcount; blobindex++) {
561
+ prevy = thisy; /*shift ycoords */
562
+ thisy = nexty;
563
+ nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
564
+ /*middle of smooth y */
565
+ if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) {
566
+ y1 = y2; /*shift window */
567
+ y2 = y3;
568
+ y3 = thisy; /*middle point */
569
+ ycount++;
570
+ /*local max */
571
+ if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
572
+ /*local min */
573
+ || (y1 > y2 && y2 <= y3))) {
574
+ if (segment < SPLINESIZE - 2) {
575
+ /*turning pt */
576
+ xturns[segment] = x2;
577
+ yturns[segment] = y2;
578
+ segment++; /*no of spline segs */
579
+ }
580
+ }
581
+ if (ycount == 1) {
582
+ maxmax = minmin = y3;/*initialise limits */
583
+ }
584
+ else {
585
+ if (y3 > maxmax)
586
+ maxmax = y3; /*biggest max */
587
+ if (y3 < minmin)
588
+ minmin = y3; /*smallest min */
589
+ }
590
+ /*possible turning pt */
591
+ x2 = blobcoords[blobindex - 1].right ();
592
+ }
593
+ }
594
+
595
+ jumplimit *= 1.2;
596
+ /*must be wavy */
597
+ if (maxmax - minmin > jumplimit) {
598
+ ycount = segment; /*no of segments */
599
+ for (blobindex = 0, segment = 1; blobindex < ycount;
600
+ blobindex++) {
601
+ if (yturns[blobindex] > minmin + jumplimit
602
+ || yturns[blobindex] < maxmax - jumplimit) {
603
+ /*significant peak */
604
+ if (segment == 1
605
+ || yturns[blobindex] > prevy + jumplimit
606
+ || yturns[blobindex] < prevy - jumplimit) {
607
+ /*different to previous */
608
+ xstarts[segment] = xturns[blobindex];
609
+ segment++;
610
+ prevy = yturns[blobindex];
611
+ }
612
+ /*bigger max */
613
+ else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
614
+ /*smaller min */
615
+ || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
616
+ xstarts[segment - 1] = xturns[blobindex];
617
+ /*improved previous */
618
+ prevy = yturns[blobindex];
619
+ }
620
+ }
621
+ }
622
+ xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
623
+ segments = segment; /*no of segments */
624
+ /*linear */
625
+ *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
626
+ }
627
+ }
628
+ }
629
+ else {
630
+ *baseline = *spline; /*copy it */
631
+ shift = ICOORD (0, (inT16) (blobcoords[0].bottom ()
632
+ - spline->y (blobcoords[0].right ())));
633
+ baseline->move (shift);
634
+ }
635
+ }
636
+
637
+
638
+ /**********************************************************************
639
+ * make_holed_baseline
640
+ *
641
+ * Make the first estimate at a baseline, either by shifting
642
+ * a supplied previous spline, or by doing a piecewise linear
643
+ * approximation using all the blobs.
644
+ **********************************************************************/
645
+
646
+ void
647
+ make_holed_baseline ( //initial approximation
648
+ TBOX blobcoords[], /*blob bounding boxes */
649
+ int blobcount, /*no of blobcoords */
650
+ QSPLINE * spline, /*initial spline */
651
+ QSPLINE * baseline, /*output spline */
652
+ float gradient //of line
653
+ ) {
654
+ int leftedge; /*left edge of line */
655
+ int rightedge; /*right edge of line */
656
+ int blobindex; /*current blob */
657
+ float x; //centre of row
658
+ ICOORD shift; //shift of spline
659
+
660
+ LMS lms(blobcount); //straight baseline
661
+ inT32 xstarts[2]; //straight line
662
+ double coeffs[3];
663
+ float c; //line parameter
664
+
665
+ /*left edge of row */
666
+ leftedge = blobcoords[0].left ();
667
+ /*right edge of line */
668
+ rightedge = blobcoords[blobcount - 1].right ();
669
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
670
+ lms.add (FCOORD ((blobcoords[blobindex].left () +
671
+ blobcoords[blobindex].right ()) / 2.0,
672
+ blobcoords[blobindex].bottom ()));
673
+ }
674
+ lms.constrained_fit (gradient, c);
675
+ xstarts[0] = leftedge;
676
+ xstarts[1] = rightedge;
677
+ coeffs[0] = 0;
678
+ coeffs[1] = gradient;
679
+ coeffs[2] = c;
680
+ *baseline = QSPLINE (1, xstarts, coeffs);
681
+ if (spline != NULL /*no given spline */
682
+ && spline->segments >= 3 /*or trivial */
683
+ /*or too non-overlap */
684
+ && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
685
+ && spline->xcoords[spline->segments - 1] >= rightedge
686
+ - MAXOVERLAP * (rightedge - leftedge)) {
687
+ *baseline = *spline; /*copy it */
688
+ x = (leftedge + rightedge) / 2.0;
689
+ shift = ICOORD (0, (inT16) (gradient * x + c - spline->y (x)));
690
+ baseline->move (shift);
691
+ }
692
+ }
693
+
694
+
695
+ /**********************************************************************
696
+ * partition_line
697
+ *
698
+ * Partition a row of blobs into different groups of continuous
699
+ * y position. jumplimit specifies the max allowable limit on a jump
700
+ * before a new partition is started.
701
+ * The return value is the biggest partition
702
+ **********************************************************************/
703
+
704
+ int
705
+ partition_line ( //partition blobs
706
+ TBOX blobcoords[], //bounding boxes
707
+ int blobcount, /*no of blobs on row */
708
+ int *numparts, /*number of partitions */
709
+ char partids[], /*partition no of each blob */
710
+ int partsizes[], /*no in each partition */
711
+ QSPLINE * spline, /*curve to fit to */
712
+ float jumplimit, /*allowed delta change */
713
+ float ydiffs[] /*diff from spline */
714
+ ) {
715
+ register int blobindex; /*no along text line */
716
+ int bestpart; /*best new partition */
717
+ int biggestpart; /*part with most members */
718
+ float diff; /*difference from line */
719
+ int startx; /*index of start blob */
720
+ float partdiffs[MAXPARTS]; /*step between parts */
721
+
722
+ for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
723
+ partsizes[bestpart] = 0; /*zero them all */
724
+
725
+ startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
726
+ *numparts = 1; /*1 partition */
727
+ bestpart = -1; /*first point */
728
+ for (blobindex = startx; blobindex < blobcount; blobindex++) {
729
+ /*do each blob in row */
730
+ diff = ydiffs[blobindex]; /*diff from line */
731
+ if (textord_oldbl_debug) {
732
+ tprintf ("%d(%d,%d), ", blobindex,
733
+ blobcoords[blobindex].left (),
734
+ blobcoords[blobindex].bottom ());
735
+ }
736
+ bestpart =
737
+ choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
738
+ /*record partition */
739
+ partids[blobindex] = bestpart;
740
+ partsizes[bestpart]++; /*another in it */
741
+ }
742
+
743
+ bestpart = -1; /*first point */
744
+ partsizes[0]--; /*doing 1st pt again */
745
+ /*do each blob in row */
746
+ for (blobindex = startx; blobindex >= 0; blobindex--) {
747
+ diff = ydiffs[blobindex]; /*diff from line */
748
+ if (textord_oldbl_debug) {
749
+ tprintf ("%d(%d,%d), ", blobindex,
750
+ blobcoords[blobindex].left (),
751
+ blobcoords[blobindex].bottom ());
752
+ }
753
+ bestpart =
754
+ choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
755
+ /*record partition */
756
+ partids[blobindex] = bestpart;
757
+ partsizes[bestpart]++; /*another in it */
758
+ }
759
+
760
+ for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
761
+ if (partsizes[bestpart] >= partsizes[biggestpart])
762
+ biggestpart = bestpart; /*new biggest */
763
+ if (textord_oldbl_merge_parts)
764
+ merge_oldbl_parts(blobcoords,
765
+ blobcount,
766
+ partids,
767
+ partsizes,
768
+ biggestpart,
769
+ jumplimit);
770
+ return biggestpart; /*biggest partition */
771
+ }
772
+
773
+
774
+ /**********************************************************************
775
+ * merge_oldbl_parts
776
+ *
777
+ * For any adjacent group of blobs in a different part, put them in the
778
+ * main part if they fit closely to neighbours in the main part.
779
+ **********************************************************************/
780
+
781
+ void
782
+ merge_oldbl_parts ( //partition blobs
783
+ TBOX blobcoords[], //bounding boxes
784
+ int blobcount, /*no of blobs on row */
785
+ char partids[], /*partition no of each blob */
786
+ int partsizes[], /*no in each partition */
787
+ int biggestpart, //major partition
788
+ float jumplimit /*allowed delta change */
789
+ ) {
790
+ BOOL8 found_one; //found a bestpart blob
791
+ BOOL8 close_one; //found was close enough
792
+ register int blobindex; /*no along text line */
793
+ int prevpart; //previous iteration
794
+ int runlength; //no in this part
795
+ float diff; /*difference from line */
796
+ int startx; /*index of start blob */
797
+ int test_blob; //another index
798
+ FCOORD coord; //blob coordinate
799
+ float m, c; //fitted line
800
+ QLSQ stats; //line stuff
801
+
802
+ prevpart = biggestpart;
803
+ runlength = 0;
804
+ startx = 0;
805
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
806
+ if (partids[blobindex] != prevpart) {
807
+ // tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
808
+ // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
809
+ // prevpart,partids[blobindex],runlength);
810
+ if (prevpart != biggestpart && runlength > MAXBADRUN) {
811
+ stats.clear ();
812
+ for (test_blob = startx; test_blob < blobindex; test_blob++) {
813
+ coord = FCOORD ((blobcoords[test_blob].left ()
814
+ + blobcoords[test_blob].right ()) / 2.0,
815
+ blobcoords[test_blob].bottom ());
816
+ stats.add (coord.x (), coord.y ());
817
+ }
818
+ stats.fit (1);
819
+ m = stats.get_b ();
820
+ c = stats.get_c ();
821
+ if (textord_oldbl_debug)
822
+ tprintf ("Fitted line y=%g x + %g\n", m, c);
823
+ found_one = FALSE;
824
+ close_one = FALSE;
825
+ for (test_blob = 1; !found_one
826
+ && (startx - test_blob >= 0
827
+ || blobindex + test_blob <= blobcount); test_blob++) {
828
+ if (startx - test_blob >= 0
829
+ && partids[startx - test_blob] == biggestpart) {
830
+ found_one = TRUE;
831
+ coord = FCOORD ((blobcoords[startx - test_blob].left ()
832
+ + blobcoords[startx -
833
+ test_blob].right ()) /
834
+ 2.0,
835
+ blobcoords[startx -
836
+ test_blob].bottom ());
837
+ diff = m * coord.x () + c - coord.y ();
838
+ if (textord_oldbl_debug)
839
+ tprintf
840
+ ("Diff of common blob to suspect part=%g at (%g,%g)\n",
841
+ diff, coord.x (), coord.y ());
842
+ if (diff < jumplimit && -diff < jumplimit)
843
+ close_one = TRUE;
844
+ }
845
+ if (blobindex + test_blob <= blobcount
846
+ && partids[blobindex + test_blob - 1] == biggestpart) {
847
+ found_one = TRUE;
848
+ coord =
849
+ FCOORD ((blobcoords[blobindex + test_blob - 1].
850
+ left () + blobcoords[blobindex + test_blob -
851
+ 1].right ()) / 2.0,
852
+ blobcoords[blobindex + test_blob -
853
+ 1].bottom ());
854
+ diff = m * coord.x () + c - coord.y ();
855
+ if (textord_oldbl_debug)
856
+ tprintf
857
+ ("Diff of common blob to suspect part=%g at (%g,%g)\n",
858
+ diff, coord.x (), coord.y ());
859
+ if (diff < jumplimit && -diff < jumplimit)
860
+ close_one = TRUE;
861
+ }
862
+ }
863
+ if (close_one) {
864
+ if (textord_oldbl_debug)
865
+ tprintf
866
+ ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
867
+ runlength, biggestpart, prevpart,
868
+ blobcoords[startx].left (),
869
+ blobcoords[startx].bottom ());
870
+ //switch sides
871
+ partsizes[prevpart] -= runlength;
872
+ for (test_blob = startx; test_blob < blobindex; test_blob++)
873
+ partids[test_blob] = biggestpart;
874
+ }
875
+ }
876
+ prevpart = partids[blobindex];
877
+ runlength = 1;
878
+ startx = blobindex;
879
+ }
880
+ else
881
+ runlength++;
882
+ }
883
+ }
884
+
885
+
886
+ /**********************************************************************
887
+ * get_ydiffs
888
+ *
889
+ * Get the differences between the blobs and the spline,
890
+ * putting them in ydiffs. The return value is the index
891
+ * of the blob in the middle of the "best behaved" region
892
+ **********************************************************************/
893
+
894
+ int
895
+ get_ydiffs ( //evaluate differences
896
+ TBOX blobcoords[], //bounding boxes
897
+ int blobcount, /*no of blobs */
898
+ QSPLINE * spline, /*approximating spline */
899
+ float ydiffs[] /*output */
900
+ ) {
901
+ register int blobindex; /*current blob */
902
+ int xcentre; /*xcoord */
903
+ int lastx; /*last xcentre */
904
+ float diffsum; /*sum of diffs */
905
+ float diff; /*current difference */
906
+ float drift; /*sum of spline steps */
907
+ float bestsum; /*smallest diffsum */
908
+ int bestindex; /*index of bestsum */
909
+
910
+ diffsum = 0.0f;
911
+ bestindex = 0;
912
+ bestsum = (float) MAX_INT32;
913
+ drift = 0.0f;
914
+ lastx = blobcoords[0].left ();
915
+ /*do each blob in row */
916
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
917
+ /*centre of blob */
918
+ xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
919
+ //step functions in spline
920
+ drift += spline->step (lastx, xcentre);
921
+ lastx = xcentre;
922
+ diff = blobcoords[blobindex].bottom ();
923
+ diff -= spline->y (xcentre);
924
+ diff += drift;
925
+ ydiffs[blobindex] = diff; /*store difference */
926
+ if (blobindex > 2)
927
+ /*remove old one */
928
+ diffsum -= ABS (ydiffs[blobindex - 3]);
929
+ diffsum += ABS (diff); /*add new one */
930
+ if (blobindex >= 2 && diffsum < bestsum) {
931
+ bestsum = diffsum; /*find min sum */
932
+ bestindex = blobindex - 1; /*middle of set */
933
+ }
934
+ }
935
+ return bestindex;
936
+ }
937
+
938
+
939
+ /**********************************************************************
940
+ * choose_partition
941
+ *
942
+ * Choose a partition for the point and return the index.
943
+ **********************************************************************/
944
+
945
+ int
946
+ choose_partition ( //select partition
947
+ register float diff, /*diff from spline */
948
+ float partdiffs[], /*diff on all parts */
949
+ int lastpart, /*last assigned partition */
950
+ float jumplimit, /*new part threshold */
951
+ int *partcount /*no of partitions */
952
+ ) {
953
+ register int partition; /*partition no */
954
+ int bestpart; /*best new partition */
955
+ float bestdelta; /*best gap from a part */
956
+ static float drift; /*drift from spline */
957
+ float delta; /*diff from part */
958
+ static float lastdelta; /*previous delta */
959
+
960
+ if (lastpart < 0) {
961
+ partdiffs[0] = diff;
962
+ lastpart = 0; /*first point */
963
+ drift = 0.0f;
964
+ lastdelta = 0.0f;
965
+ }
966
+ /*adjusted diff from part */
967
+ delta = diff - partdiffs[lastpart] - drift;
968
+ if (textord_oldbl_debug) {
969
+ tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift);
970
+ }
971
+ if (ABS (delta) > jumplimit / 2) {
972
+ /*delta on part 0 */
973
+ bestdelta = diff - partdiffs[0] - drift;
974
+ bestpart = 0; /*0 best so far */
975
+ for (partition = 1; partition < *partcount; partition++) {
976
+ delta = diff - partdiffs[partition] - drift;
977
+ if (ABS (delta) < ABS (bestdelta)) {
978
+ bestdelta = delta;
979
+ bestpart = partition; /*part with nearest jump */
980
+ }
981
+ }
982
+ delta = bestdelta;
983
+ /*too far away */
984
+ if (ABS (bestdelta) > jumplimit
985
+ && *partcount < MAXPARTS) { /*and spare part left */
986
+ bestpart = (*partcount)++; /*best was new one */
987
+ /*start new one */
988
+ partdiffs[bestpart] = diff - drift;
989
+ delta = 0.0f;
990
+ }
991
+ }
992
+ else {
993
+ bestpart = lastpart; /*best was last one */
994
+ }
995
+
996
+ if (bestpart == lastpart
997
+ && (ABS (delta - lastdelta) < jumplimit / 2
998
+ || ABS (delta) < jumplimit / 2))
999
+ /*smooth the drift */
1000
+ drift = (3 * drift + delta) / 3;
1001
+ lastdelta = delta;
1002
+
1003
+ if (textord_oldbl_debug) {
1004
+ tprintf ("P=%d\n", bestpart);
1005
+ }
1006
+
1007
+ return bestpart;
1008
+ }
1009
+
1010
+
1011
+ ///*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking
1012
+ //partitions and gives all the rest partid 0*/
1013
+ //
1014
+ //merge_partitions(partids,partcount,blobcount,bestpart)
1015
+ //register char *partids; /*partition numbers*/
1016
+ //int partcount; /*no of partitions*/
1017
+ //int blobcount; /*no of blobs*/
1018
+ //int bestpart; /*best partition*/
1019
+ //{
1020
+ // register int blobindex; /*no along text line*/
1021
+ // int runlength; /*run of same partition*/
1022
+ // int bestrun; /*biggest runlength*/
1023
+ //
1024
+ // bestrun=0; /*no runs yet*/
1025
+ // runlength=1;
1026
+ // for (blobindex=1;blobindex<blobcount;blobindex++)
1027
+ // { if (partids[blobindex]!=partids[blobindex-1])
1028
+ // { if (runlength>bestrun)
1029
+ // bestrun=runlength; /*find biggest run*/
1030
+ // runlength=1; /*new run*/
1031
+ // }
1032
+ // else
1033
+ // { runlength++;
1034
+ // }
1035
+ // }
1036
+ // if (runlength>bestrun)
1037
+ // bestrun=runlength;
1038
+ //
1039
+ // for (blobindex=0;blobindex<blobcount;blobindex++)
1040
+ // { if (blobindex<1
1041
+ // || partids[blobindex]!=partids[blobindex-1])
1042
+ // { if ((blobindex+1>=blobcount
1043
+ // || partids[blobindex]!=partids[blobindex+1])
1044
+ // /*loner*/
1045
+ // && (bestrun>2 || partids[blobindex]!=bestpart))
1046
+ // { partids[blobindex]=partcount; /*discard loner*/
1047
+ // }
1048
+ // else if (blobindex+1<blobcount
1049
+ // && partids[blobindex]==partids[blobindex+1]
1050
+ // /*pair*/
1051
+ // && (blobindex+2>=blobcount
1052
+ // || partids[blobindex]!=partids[blobindex+2])
1053
+ // && (bestrun>3 || partids[blobindex]!=bestpart))
1054
+ // { partids[blobindex]=partcount; /*discard both*/
1055
+ // partids[blobindex+1]=partcount;
1056
+ // }
1057
+ // }
1058
+ // }
1059
+ // for (blobindex=0;blobindex<blobcount;blobindex++)
1060
+ // { if (partids[blobindex]<partcount)
1061
+ // partids[blobindex]=0; /*all others together*/
1062
+ // }
1063
+ //}
1064
+
1065
+ /**********************************************************************
1066
+ * partition_coords
1067
+ *
1068
+ * Get the x,y coordinates of all points in the bestpart and put them
1069
+ * in xcoords,ycoords. Return the number of points found.
1070
+ **********************************************************************/
1071
+
1072
+ int
1073
+ partition_coords ( //find relevant coords
1074
+ TBOX blobcoords[], //bounding boxes
1075
+ int blobcount, /*no of blobs in row */
1076
+ char partids[], /*partition no of each blob */
1077
+ int bestpart, /*best new partition */
1078
+ int xcoords[], /*points to work on */
1079
+ int ycoords[] /*points to work on */
1080
+ ) {
1081
+ register int blobindex; /*no along text line */
1082
+ int pointcount; /*no of points */
1083
+
1084
+ pointcount = 0;
1085
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
1086
+ if (partids[blobindex] == bestpart) {
1087
+ /*centre of blob */
1088
+ xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
1089
+ ycoords[pointcount++] = blobcoords[blobindex].bottom ();
1090
+ }
1091
+ }
1092
+ return pointcount; /*no of points found */
1093
+ }
1094
+
1095
+
1096
+ /**********************************************************************
1097
+ * segment_spline
1098
+ *
1099
+ * Segment the row at midpoints between maxima and minima of the x,y pairs.
1100
+ * The xstarts of the segments are returned and the number found.
1101
+ **********************************************************************/
1102
+
1103
+ int
1104
+ segment_spline ( //make xstarts
1105
+ TBOX blobcoords[], //boundign boxes
1106
+ int blobcount, /*no of blobs in row */
1107
+ int xcoords[], /*points to work on */
1108
+ int ycoords[], /*points to work on */
1109
+ int degree, int pointcount, /*no of points */
1110
+ int xstarts[] //result
1111
+ ) {
1112
+ register int ptindex; /*no along text line */
1113
+ register int segment; /*partition no */
1114
+ int lastmin, lastmax; /*possible turn points */
1115
+ int turnpoints[SPLINESIZE]; /*good turning points */
1116
+ int turncount; /*no of turning points */
1117
+ int max_x; //max specified coord
1118
+
1119
+ xstarts[0] = xcoords[0] - 1; //leftmost defined pt
1120
+ max_x = xcoords[pointcount - 1] + 1;
1121
+ if (degree < 2)
1122
+ pointcount = 0;
1123
+ turncount = 0; /*no turning points yet */
1124
+ if (pointcount > 3) {
1125
+ ptindex = 1;
1126
+ lastmax = lastmin = 0; /*start with first one */
1127
+ while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
1128
+ /*minimum */
1129
+ if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
1130
+ if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
1131
+ if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
1132
+ /*new max point */
1133
+ turnpoints[turncount++] = lastmax;
1134
+ lastmin = ptindex; /*latest minimum */
1135
+ }
1136
+ else if (ycoords[ptindex] < ycoords[lastmin]) {
1137
+ lastmin = ptindex; /*lower minimum */
1138
+ }
1139
+ }
1140
+
1141
+ /*maximum */
1142
+ if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
1143
+ if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
1144
+ if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
1145
+ /*new min point */
1146
+ turnpoints[turncount++] = lastmin;
1147
+ lastmax = ptindex; /*latest maximum */
1148
+ }
1149
+ else if (ycoords[ptindex] > ycoords[lastmax]) {
1150
+ lastmax = ptindex; /*higher maximum */
1151
+ }
1152
+ }
1153
+ ptindex++;
1154
+ }
1155
+ /*possible global min */
1156
+ if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
1157
+ && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
1158
+ if (turncount < SPLINESIZE - 1)
1159
+ /*2 more turns */
1160
+ turnpoints[turncount++] = lastmax;
1161
+ if (turncount < SPLINESIZE - 1)
1162
+ turnpoints[turncount++] = ptindex;
1163
+ }
1164
+ else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
1165
+ /*possible global max */
1166
+ && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
1167
+ if (turncount < SPLINESIZE - 1)
1168
+ /*2 more turns */
1169
+ turnpoints[turncount++] = lastmin;
1170
+ if (turncount < SPLINESIZE - 1)
1171
+ turnpoints[turncount++] = ptindex;
1172
+ }
1173
+ else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
1174
+ && turncount < SPLINESIZE - 1) {
1175
+ if (ycoords[ptindex] > ycoords[lastmax])
1176
+ turnpoints[turncount++] = ptindex;
1177
+ else
1178
+ turnpoints[turncount++] = lastmax;
1179
+ }
1180
+ else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
1181
+ && turncount < SPLINESIZE - 1) {
1182
+ if (ycoords[ptindex] < ycoords[lastmin])
1183
+ turnpoints[turncount++] = ptindex;
1184
+ else
1185
+ turnpoints[turncount++] = lastmin;
1186
+ }
1187
+ }
1188
+
1189
+ if (textord_oldbl_debug && turncount > 0)
1190
+ tprintf ("First turn is %d at (%d,%d)\n",
1191
+ turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
1192
+ for (segment = 1; segment < turncount; segment++) {
1193
+ /*centre y coord */
1194
+ lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
1195
+
1196
+ /* fix alg so that it works with both rising and falling sections */
1197
+ if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
1198
+ /*find rising y centre */
1199
+ for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
1200
+ else
1201
+ /*find falling y centre */
1202
+ for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
1203
+
1204
+ /*centre x */
1205
+ xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
1206
+ + xcoords[turnpoints[segment - 1]]
1207
+ + xcoords[turnpoints[segment]] + 2) / 4;
1208
+ /*halfway between turns */
1209
+ if (textord_oldbl_debug)
1210
+ tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
1211
+ segment, turnpoints[segment],
1212
+ xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
1213
+ ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
1214
+ }
1215
+
1216
+ xstarts[segment] = max_x;
1217
+ return segment; /*no of splines */
1218
+ }
1219
+
1220
+
1221
+ /**********************************************************************
1222
+ * split_stepped_spline
1223
+ *
1224
+ * Re-segment the spline in cases where there is a big step function.
1225
+ * Return TRUE if any were done.
1226
+ **********************************************************************/
1227
+
1228
+ BOOL8
1229
+ split_stepped_spline ( //make xstarts
1230
+ QSPLINE * baseline, //current shot
1231
+ float jumplimit, //max step fuction
1232
+ int xcoords[], /*points to work on */
1233
+ int xstarts[], //result
1234
+ int &segments //no of segments
1235
+ ) {
1236
+ BOOL8 doneany; //return value
1237
+ register int segment; /*partition no */
1238
+ int startindex, centreindex, endindex;
1239
+ float leftcoord, rightcoord;
1240
+ int leftindex, rightindex;
1241
+ float step; //spline step
1242
+
1243
+ doneany = FALSE;
1244
+ startindex = 0;
1245
+ for (segment = 1; segment < segments - 1; segment++) {
1246
+ step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1247
+ (xstarts[segment] + xstarts[segment + 1]) / 2.0);
1248
+ if (step < 0)
1249
+ step = -step;
1250
+ if (step > jumplimit) {
1251
+ while (xcoords[startindex] < xstarts[segment - 1])
1252
+ startindex++;
1253
+ centreindex = startindex;
1254
+ while (xcoords[centreindex] < xstarts[segment])
1255
+ centreindex++;
1256
+ endindex = centreindex;
1257
+ while (xcoords[endindex] < xstarts[segment + 1])
1258
+ endindex++;
1259
+ if (segments >= SPLINESIZE) {
1260
+ if (textord_debug_baselines)
1261
+ tprintf ("Too many segments to resegment spline!!\n");
1262
+ }
1263
+ else if (endindex - startindex >= textord_spline_medianwin * 3) {
1264
+ while (centreindex - startindex <
1265
+ textord_spline_medianwin * 3 / 2)
1266
+ centreindex++;
1267
+ while (endindex - centreindex <
1268
+ textord_spline_medianwin * 3 / 2)
1269
+ centreindex--;
1270
+ leftindex = (startindex + startindex + centreindex) / 3;
1271
+ rightindex = (centreindex + endindex + endindex) / 3;
1272
+ leftcoord =
1273
+ (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
1274
+ rightcoord =
1275
+ (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
1276
+ while (xcoords[leftindex] > leftcoord
1277
+ && leftindex - startindex > textord_spline_medianwin)
1278
+ leftindex--;
1279
+ while (xcoords[leftindex] < leftcoord
1280
+ && centreindex - leftindex >
1281
+ textord_spline_medianwin / 2)
1282
+ leftindex++;
1283
+ if (xcoords[leftindex] - leftcoord >
1284
+ leftcoord - xcoords[leftindex - 1])
1285
+ leftindex--;
1286
+ while (xcoords[rightindex] > rightcoord
1287
+ && rightindex - centreindex >
1288
+ textord_spline_medianwin / 2)
1289
+ rightindex--;
1290
+ while (xcoords[rightindex] < rightcoord
1291
+ && endindex - rightindex > textord_spline_medianwin)
1292
+ rightindex++;
1293
+ if (xcoords[rightindex] - rightcoord >
1294
+ rightcoord - xcoords[rightindex - 1])
1295
+ rightindex--;
1296
+ if (textord_debug_baselines)
1297
+ tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
1298
+ xstarts[segment],
1299
+ baseline->
1300
+ step ((xstarts[segment - 1] +
1301
+ xstarts[segment]) / 2.0,
1302
+ (xstarts[segment] +
1303
+ xstarts[segment + 1]) / 2.0),
1304
+ (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
1305
+ (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
1306
+ insert_spline_point (xstarts, segment,
1307
+ (xcoords[leftindex - 1] +
1308
+ xcoords[leftindex]) / 2,
1309
+ (xcoords[rightindex - 1] +
1310
+ xcoords[rightindex]) / 2, segments);
1311
+ doneany = TRUE;
1312
+ }
1313
+ else if (textord_debug_baselines) {
1314
+ tprintf
1315
+ ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
1316
+ startindex, centreindex, endindex,
1317
+ (inT32) textord_spline_medianwin);
1318
+ }
1319
+ }
1320
+ // else tprintf("Spline step at %d is %g\n",
1321
+ // xstarts[segment],
1322
+ // baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
1323
+ // (xstarts[segment]+xstarts[segment+1])/2.0));
1324
+ }
1325
+ return doneany;
1326
+ }
1327
+
1328
+
1329
+ /**********************************************************************
1330
+ * insert_spline_point
1331
+ *
1332
+ * Insert a new spline point and shuffle up the others.
1333
+ **********************************************************************/
1334
+
1335
+ void
1336
+ insert_spline_point ( //get descenders
1337
+ int xstarts[], //starts to shuffle
1338
+ int segment, //insertion pt
1339
+ int coord1, //coords to add
1340
+ int coord2, int &segments //total segments
1341
+ ) {
1342
+ int index; //for shuffling
1343
+
1344
+ for (index = segments; index > segment; index--)
1345
+ xstarts[index + 1] = xstarts[index];
1346
+ segments++;
1347
+ xstarts[segment] = coord1;
1348
+ xstarts[segment + 1] = coord2;
1349
+ }
1350
+
1351
+
1352
+ /**********************************************************************
1353
+ * find_lesser_parts
1354
+ *
1355
+ * Average the step from the spline for the other partitions
1356
+ * and find the commonest partition which has a descender.
1357
+ **********************************************************************/
1358
+
1359
+ void
1360
+ find_lesser_parts ( //get descenders
1361
+ TO_ROW * row, //row to process
1362
+ TBOX blobcoords[], //bounding boxes
1363
+ int blobcount, /*no of blobs */
1364
+ char partids[], /*partition of each blob */
1365
+ int partsizes[], /*size of each part */
1366
+ int partcount, /*no of partitions */
1367
+ int bestpart /*biggest partition */
1368
+ ) {
1369
+ register int blobindex; /*index of blob */
1370
+ register int partition; /*current partition */
1371
+ int xcentre; /*centre of blob */
1372
+ int poscount; /*count of best up step */
1373
+ int negcount; /*count of best down step */
1374
+ float partsteps[MAXPARTS]; /*average step to part */
1375
+ float bestpos; /*best up step */
1376
+ float bestneg; /*best down step */
1377
+ int runlength; /*length of bad run */
1378
+ int biggestrun; /*biggest bad run */
1379
+
1380
+ biggestrun = 0;
1381
+ for (partition = 0; partition < partcount; partition++)
1382
+ partsteps[partition] = 0.0; /*zero accumulators */
1383
+ for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1384
+ xcentre = (blobcoords[blobindex].left ()
1385
+ + blobcoords[blobindex].right ()) >> 1;
1386
+ /*in other parts */
1387
+ if (partids[blobindex] != bestpart) {
1388
+ runlength++; /*run of non bests */
1389
+ if (runlength > biggestrun)
1390
+ biggestrun = runlength;
1391
+ partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()
1392
+ - row->baseline.y (xcentre);
1393
+ }
1394
+ else
1395
+ runlength = 0;
1396
+ }
1397
+ if (biggestrun > MAXBADRUN)
1398
+ row->xheight = -1.0f; /*failed */
1399
+ else
1400
+ row->xheight = 1.0f; /*success */
1401
+ poscount = negcount = 0;
1402
+ bestpos = bestneg = 0.0; /*no step yet */
1403
+ for (partition = 0; partition < partcount; partition++) {
1404
+ if (partition != bestpart) {
1405
+
1406
+ //by jetsoft divide by zero possible
1407
+ if (partsizes[partition]==0)
1408
+ partsteps[partition]=0;
1409
+ else
1410
+ partsteps[partition] /= partsizes[partition];
1411
+ //
1412
+
1413
+
1414
+ if (partsteps[partition] >= MINASCRISE
1415
+ && partsizes[partition] > poscount) {
1416
+ /*ascender rise */
1417
+ bestpos = partsteps[partition];
1418
+ /*2nd most popular */
1419
+ poscount = partsizes[partition];
1420
+ }
1421
+ if (partsteps[partition] <= -MINASCRISE
1422
+ && partsizes[partition] > negcount) {
1423
+ /*ascender rise */
1424
+ bestneg = partsteps[partition];
1425
+ /*2nd most popular */
1426
+ negcount = partsizes[partition];
1427
+ }
1428
+ }
1429
+ }
1430
+ /*average x-height */
1431
+ partsteps[bestpart] /= blobcount;
1432
+ row->descdrop = bestneg;
1433
+ }
1434
+
1435
+
1436
+ /**********************************************************************
1437
+ * old_first_xheight
1438
+ *
1439
+ * Makes an x-height spline by copying the baseline and shifting it.
1440
+ * It estimates the x-height across the line to use as the shift.
1441
+ * It also finds the ascender height if it can.
1442
+ **********************************************************************/
1443
+
1444
+ void
1445
+ old_first_xheight ( //the wiseowl way
1446
+ TO_ROW * row, /*current row */
1447
+ TBOX blobcoords[], /*blob bounding boxes */
1448
+ int initialheight, //initial guess
1449
+ int blobcount, /*blobs in blobcoords */
1450
+ QSPLINE * baseline, /*established */
1451
+ float jumplimit /*min ascender height */
1452
+ ) {
1453
+ register int blobindex; /*current blob */
1454
+ /*height statistics */
1455
+ STATS heightstat (0, MAXHEIGHT);
1456
+ int height; /*height of blob */
1457
+ int xcentre; /*centre of blob */
1458
+ int lineheight; /*approx xheight */
1459
+ float ascenders; /*ascender sum */
1460
+ int asccount; /*no of ascenders */
1461
+ float xsum; /*xheight sum */
1462
+ int xcount; /*xheight count */
1463
+ register float diff; /*height difference */
1464
+
1465
+ if (blobcount > 1) {
1466
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
1467
+ xcentre = (blobcoords[blobindex].left ()
1468
+ + blobcoords[blobindex].right ()) / 2;
1469
+ /*height of blob */
1470
+ height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);
1471
+ if (height > initialheight * oldbl_xhfract
1472
+ && height > textord_min_xheight)
1473
+ heightstat.add (height, 1);
1474
+ }
1475
+ if (heightstat.get_total () > 3) {
1476
+ lineheight = (int) heightstat.ile (0.25);
1477
+ if (lineheight <= 0)
1478
+ lineheight = (int) heightstat.ile (0.5);
1479
+ }
1480
+ else
1481
+ lineheight = initialheight;
1482
+ }
1483
+ else {
1484
+ lineheight = (int) (blobcoords[0].top ()
1485
+ - baseline->y ((blobcoords[0].left ()
1486
+ + blobcoords[0].right ()) / 2) +
1487
+ 0.5);
1488
+ }
1489
+
1490
+ xsum = 0.0f;
1491
+ xcount = 0;
1492
+ for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
1493
+ blobindex++) {
1494
+ xcentre = (blobcoords[blobindex].left ()
1495
+ + blobcoords[blobindex].right ()) / 2;
1496
+ diff = blobcoords[blobindex].top () - baseline->y (xcentre);
1497
+ /*is it ascender */
1498
+ if (diff > lineheight + jumplimit) {
1499
+ ascenders += diff;
1500
+ asccount++; /*count ascenders */
1501
+ }
1502
+ else if (diff > lineheight - jumplimit) {
1503
+ xsum += diff; /*mean xheight */
1504
+ xcount++;
1505
+ }
1506
+ }
1507
+ if (xcount > 0)
1508
+ xsum /= xcount; /*average xheight */
1509
+ else
1510
+ xsum = (float) lineheight; /*guess it */
1511
+ row->xheight *= xsum;
1512
+ if (asccount > 0)
1513
+ row->ascrise = ascenders / asccount - xsum;
1514
+ else
1515
+ row->ascrise = 0.0f; /*had none */
1516
+ if (row->xheight == 0)
1517
+ row->xheight = -1.0f;
1518
+ }
1519
+
1520
+
1521
+ /**********************************************************************
1522
+ * make_first_xheight
1523
+ *
1524
+ * Makes an x-height spline by copying the baseline and shifting it.
1525
+ * It estimates the x-height across the line to use as the shift.
1526
+ * It also finds the ascender height if it can.
1527
+ **********************************************************************/
1528
+
1529
+ void
1530
+ make_first_xheight ( //find xheight
1531
+ TO_ROW * row, /*current row */
1532
+ TBOX blobcoords[], /*blob bounding boxes */
1533
+ int lineheight, //initial guess
1534
+ int init_lineheight, //block level guess
1535
+ int blobcount, /*blobs in blobcoords */
1536
+ QSPLINE * baseline, /*established */
1537
+ float jumplimit /*min ascender height */
1538
+ ) {
1539
+ STATS heightstat (0, HEIGHTBUCKETS);
1540
+ int lefts[HEIGHTBUCKETS];
1541
+ int rights[HEIGHTBUCKETS];
1542
+ int modelist[MODENUM];
1543
+ int blobindex;
1544
+ int mode_count; //blobs to count in thr
1545
+ int sign_bit;
1546
+ int mode_threshold;
1547
+ const int kBaselineTouch = 2; // This really should change with resolution.
1548
+ const int kGoodStrength = 8; // Strength of baseline-touching heights.
1549
+ const float kMinHeight = 0.25; // Min fraction of lineheight to use.
1550
+
1551
+ sign_bit = row->xheight > 0 ? 1 : -1;
1552
+
1553
+ memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
1554
+ memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
1555
+ mode_count = 0;
1556
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
1557
+ int xcenter = (blobcoords[blobindex].left () +
1558
+ blobcoords[blobindex].right ()) / 2;
1559
+ float base = baseline->y(xcenter);
1560
+ float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
1561
+ int strength = textord_ocropus_mode &&
1562
+ bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
1563
+ int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
1564
+ if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
1565
+ if (height > lineheight * oldbl_xhfract
1566
+ && height > textord_min_xheight) {
1567
+ heightstat.add (height, strength);
1568
+ if (height < HEIGHTBUCKETS) {
1569
+ if (xcenter > rights[height])
1570
+ rights[height] = xcenter;
1571
+ if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
1572
+ lefts[height] = xcenter;
1573
+ }
1574
+ }
1575
+ mode_count += strength;
1576
+ }
1577
+ }
1578
+
1579
+ mode_threshold = (int) (blobcount * 0.1);
1580
+ if (oldbl_dot_error_size > 1 || oldbl_xhfix)
1581
+ mode_threshold = (int) (mode_count * 0.1);
1582
+
1583
+ if (textord_oldbl_debug) {
1584
+ tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
1585
+ blobcount, mode_count, mode_threshold);
1586
+ }
1587
+ find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
1588
+ if (textord_oldbl_debug) {
1589
+ for (blobindex = 0; blobindex < MODENUM; blobindex++)
1590
+ tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
1591
+ tprintf ("\n");
1592
+ }
1593
+ pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
1594
+
1595
+ if (textord_oldbl_debug)
1596
+ tprintf ("Output xheight=%g\n", row->xheight);
1597
+ if (row->xheight < 0 && textord_oldbl_debug)
1598
+ tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
1599
+
1600
+ if (sign_bit < 0)
1601
+ row->xheight = -row->xheight;
1602
+ }
1603
+
1604
+ /**********************************************************************
1605
+ * find_top_modes
1606
+ *
1607
+ * Fill the input array with the indices of the top ten modes of the
1608
+ * input distribution.
1609
+ **********************************************************************/
1610
+
1611
+ const int kMinModeFactorOcropus = 32;
1612
+ const int kMinModeFactor = 12;
1613
+
1614
+ void
1615
+ find_top_modes ( //get modes
1616
+ STATS * stats, //stats to hack
1617
+ int statnum, //no of piles
1618
+ int modelist[], int modenum //no of modes to get
1619
+ ) {
1620
+ int mode_count;
1621
+ int last_i = 0;
1622
+ int last_max = MAX_INT32;
1623
+ int i;
1624
+ int mode;
1625
+ int total_max = 0;
1626
+ int mode_factor = textord_ocropus_mode ?
1627
+ kMinModeFactorOcropus : kMinModeFactor;
1628
+
1629
+ for (mode_count = 0; mode_count < modenum; mode_count++) {
1630
+ mode = 0;
1631
+ for (i = 0; i < statnum; i++) {
1632
+ if (stats->pile_count (i) > stats->pile_count (mode)) {
1633
+ if ((stats->pile_count (i) < last_max) ||
1634
+ ((stats->pile_count (i) == last_max) && (i > last_i))) {
1635
+ mode = i;
1636
+ }
1637
+ }
1638
+ }
1639
+ last_i = mode;
1640
+ last_max = stats->pile_count (last_i);
1641
+ total_max += last_max;
1642
+ if (last_max <= total_max / mode_factor)
1643
+ mode = 0;
1644
+ modelist[mode_count] = mode;
1645
+ }
1646
+ }
1647
+
1648
+
1649
+ /**********************************************************************
1650
+ * pick_x_height
1651
+ *
1652
+ * Choose based on the height modes the best x height value.
1653
+ **********************************************************************/
1654
+
1655
+ void pick_x_height(TO_ROW * row, //row to do
1656
+ int modelist[],
1657
+ int lefts[], int rights[],
1658
+ STATS * heightstat,
1659
+ int mode_threshold) {
1660
+ int x;
1661
+ int y;
1662
+ int z;
1663
+ float ratio;
1664
+ int found_one_bigger = FALSE;
1665
+ int best_x_height = 0;
1666
+ int best_asc = 0;
1667
+ int num_in_best;
1668
+
1669
+ for (x = 0; x < MODENUM; x++) {
1670
+ for (y = 0; y < MODENUM; y++) {
1671
+ /* Check for two modes */
1672
+ if (modelist[x] && modelist[y] &&
1673
+ heightstat->pile_count (modelist[x]) > mode_threshold &&
1674
+ (!textord_ocropus_mode ||
1675
+ MIN(rights[modelist[x]], rights[modelist[y]]) >
1676
+ MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
1677
+ ratio = (float) modelist[y] / (float) modelist[x];
1678
+ if (1.2 < ratio && ratio < 1.8) {
1679
+ /* Two modes found */
1680
+ best_x_height = modelist[x];
1681
+ num_in_best = heightstat->pile_count (modelist[x]);
1682
+
1683
+ /* Try to get one higher */
1684
+ do {
1685
+ found_one_bigger = FALSE;
1686
+ for (z = 0; z < MODENUM; z++) {
1687
+ if (modelist[z] == best_x_height + 1 &&
1688
+ (!textord_ocropus_mode ||
1689
+ MIN(rights[modelist[x]], rights[modelist[y]]) >
1690
+ MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
1691
+ ratio = (float) modelist[y] / (float) modelist[z];
1692
+ if ((1.2 < ratio && ratio < 1.8) &&
1693
+ /* Should be half of best */
1694
+ heightstat->pile_count (modelist[z]) >
1695
+ num_in_best * 0.5) {
1696
+ best_x_height++;
1697
+ found_one_bigger = TRUE;
1698
+ break;
1699
+ }
1700
+ }
1701
+ }
1702
+ }
1703
+ while (found_one_bigger);
1704
+
1705
+ /* try to get a higher ascender */
1706
+
1707
+ best_asc = modelist[y];
1708
+ num_in_best = heightstat->pile_count (modelist[y]);
1709
+
1710
+ /* Try to get one higher */
1711
+ do {
1712
+ found_one_bigger = FALSE;
1713
+ for (z = 0; z < MODENUM; z++) {
1714
+ if (modelist[z] > best_asc &&
1715
+ (!textord_ocropus_mode ||
1716
+ MIN(rights[modelist[x]], rights[modelist[y]]) >
1717
+ MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
1718
+ ratio = (float) modelist[z] / (float) best_x_height;
1719
+ if ((1.2 < ratio && ratio < 1.8) &&
1720
+ /* Should be half of best */
1721
+ heightstat->pile_count (modelist[z]) >
1722
+ num_in_best * 0.5) {
1723
+ best_asc = modelist[z];
1724
+ found_one_bigger = TRUE;
1725
+ break;
1726
+ }
1727
+ }
1728
+ }
1729
+ }
1730
+ while (found_one_bigger);
1731
+
1732
+ row->xheight = (float) best_x_height;
1733
+ row->ascrise = (float) best_asc - best_x_height;
1734
+ return;
1735
+ }
1736
+ }
1737
+ }
1738
+ }
1739
+
1740
+ best_x_height = modelist[0]; /* Single Mode found */
1741
+ num_in_best = heightstat->pile_count (best_x_height);
1742
+ do {
1743
+ /* Try to get one higher */
1744
+ found_one_bigger = FALSE;
1745
+ for (z = 1; z < MODENUM; z++) {
1746
+ /* Should be half of best */
1747
+ if ((modelist[z] == best_x_height + 1) &&
1748
+ (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
1749
+ best_x_height++;
1750
+ found_one_bigger = TRUE;
1751
+ break;
1752
+ }
1753
+ }
1754
+ }
1755
+ while (found_one_bigger);
1756
+
1757
+ row->ascrise = 0.0f;
1758
+ row->xheight = (float) best_x_height;
1759
+ if (row->xheight == 0)
1760
+ row->xheight = -1.0f;
1761
+ }