tesseract_bin 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,1939 @@
1
+ #include "mfcpch.h"
2
+ #include "tovars.h"
3
+ #include "drawtord.h"
4
+ #include "tospace.h"
5
+ #include "ndminx.h"
6
+ #include "statistc.h"
7
+
8
+ #define EXTERN
9
+ EXTERN BOOL_VAR (tosp_old_to_method, FALSE, "Space stats use prechopping?");
10
+ EXTERN BOOL_VAR (tosp_only_use_prop_rows, TRUE,
11
+ "Block stats to use fixed pitch rows?");
12
+ EXTERN BOOL_VAR (tosp_use_pre_chopping, FALSE,
13
+ "Space stats use prechopping?");
14
+ EXTERN BOOL_VAR (tosp_old_to_bug_fix, FALSE, "Fix suspected bug in old code");
15
+ EXTERN BOOL_VAR (tosp_block_use_cert_spaces, TRUE,
16
+ "Only stat OBVIOUS spaces");
17
+ EXTERN BOOL_VAR (tosp_row_use_cert_spaces, TRUE, "Only stat OBVIOUS spaces");
18
+ EXTERN BOOL_VAR (tosp_narrow_blobs_not_cert, TRUE,
19
+ "Only stat OBVIOUS spaces");
20
+ EXTERN BOOL_VAR (tosp_row_use_cert_spaces1, TRUE, "Only stat OBVIOUS spaces");
21
+ EXTERN BOOL_VAR (tosp_recovery_isolated_row_stats, TRUE,
22
+ "Use row alone when inadequate cert spaces");
23
+ EXTERN BOOL_VAR (tosp_only_small_gaps_for_kern, FALSE, "Better guess");
24
+ EXTERN BOOL_VAR (tosp_all_flips_fuzzy, FALSE, "Pass ANY flip to context?");
25
+ EXTERN BOOL_VAR (tosp_fuzzy_limit_all, TRUE,
26
+ "Dont restrict kn->sp fuzzy limit to tables");
27
+ EXTERN BOOL_VAR (tosp_stats_use_xht_gaps, TRUE,
28
+ "Use within xht gap for wd breaks");
29
+ EXTERN BOOL_VAR (tosp_use_xht_gaps, TRUE, "Use within xht gap for wd breaks");
30
+ EXTERN BOOL_VAR (tosp_only_use_xht_gaps, FALSE,
31
+ "Only use within xht gap for wd breaks");
32
+ EXTERN BOOL_VAR (tosp_rule_9_test_punct, FALSE,
33
+ "Dont chng kn to space next to punct");
34
+ EXTERN BOOL_VAR (tosp_flip_fuzz_kn_to_sp, TRUE, "Default flip");
35
+ EXTERN BOOL_VAR (tosp_flip_fuzz_sp_to_kn, TRUE, "Default flip");
36
+ EXTERN BOOL_VAR (tosp_improve_thresh, FALSE, "Enable improvement heuristic");
37
+ EXTERN INT_VAR (tosp_debug_level, 0, "Debug data");
38
+ EXTERN INT_VAR (tosp_enough_space_samples_for_median, 3,
39
+ "or should we use mean");
40
+ EXTERN INT_VAR (tosp_redo_kern_limit, 10,
41
+ "No.samples reqd to reestimate for row");
42
+ EXTERN INT_VAR (tosp_few_samples, 40,
43
+ "No.gaps reqd with 1 large gap to treat as a table");
44
+ EXTERN INT_VAR (tosp_short_row, 20,
45
+ "No.gaps reqd with few cert spaces to use certs");
46
+ EXTERN INT_VAR (tosp_sanity_method, 1, "How to avoid being silly");
47
+ EXTERN double_VAR (tosp_threshold_bias1, 0,
48
+ "how far between kern and space?");
49
+ EXTERN double_VAR (tosp_threshold_bias2, 0,
50
+ "how far between kern and space?");
51
+ EXTERN double_VAR (tosp_narrow_fraction, 0.3, "Fract of xheight for narrow");
52
+ EXTERN double_VAR (tosp_narrow_aspect_ratio, 0.48,
53
+ "narrow if w/h less than this");
54
+ EXTERN double_VAR (tosp_wide_fraction, 0.52, "Fract of xheight for wide");
55
+ EXTERN double_VAR (tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this");
56
+ EXTERN double_VAR (tosp_fuzzy_space_factor, 0.6,
57
+ "Fract of xheight for fuzz sp");
58
+ EXTERN double_VAR (tosp_fuzzy_space_factor1, 0.5,
59
+ "Fract of xheight for fuzz sp");
60
+ EXTERN double_VAR (tosp_fuzzy_space_factor2, 0.72,
61
+ "Fract of xheight for fuzz sp");
62
+ EXTERN double_VAR (tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");
63
+ EXTERN double_VAR (tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp");
64
+ EXTERN double_VAR (tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp");
65
+ EXTERN double_VAR (tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp");
66
+ EXTERN double_VAR (tosp_ignore_big_gaps, -1, "xht multiplier");
67
+ EXTERN double_VAR (tosp_ignore_very_big_gaps, 3.5, "xht multiplier");
68
+ EXTERN double_VAR (tosp_rep_space, 1.6, "rep gap multiplier for space");
69
+ EXTERN double_VAR (tosp_enough_small_gaps, 0.65,
70
+ "Fract of kerns reqd for isolated row stats");
71
+ EXTERN double_VAR (tosp_table_kn_sp_ratio, 2.25,
72
+ "Min difference of kn & sp in table");
73
+ EXTERN double_VAR (tosp_table_xht_sp_ratio, 0.33,
74
+ "Expect spaces bigger than this");
75
+ EXTERN double_VAR (tosp_table_fuzzy_kn_sp_ratio, 3.0,
76
+ "Fuzzy if less than this");
77
+ EXTERN double_VAR (tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
78
+ EXTERN double_VAR (tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
79
+ EXTERN double_VAR (tosp_min_sane_kn_sp, 1.5,
80
+ "Dont trust spaces less than this time kn");
81
+ EXTERN double_VAR (tosp_init_guess_kn_mult, 2.2,
82
+ "Thresh guess - mult kn by this");
83
+ EXTERN double_VAR (tosp_init_guess_xht_mult, 0.28,
84
+ "Thresh guess - mult xht by this");
85
+ EXTERN double_VAR (tosp_max_sane_kn_thresh, 5.0,
86
+ "Multiplier on kn to limit thresh");
87
+ EXTERN double_VAR (tosp_flip_caution, 0.0,
88
+ "Dont autoflip kn to sp when large separation");
89
+
90
+ EXTERN double_VAR (tosp_large_kerning, 0.19,
91
+ "Limit use of xht gap with large kns");
92
+ EXTERN double_VAR (tosp_dont_fool_with_small_kerns, -1,
93
+ "Limit use of xht gap with odd small kns");
94
+ EXTERN double_VAR (tosp_near_lh_edge, 0,
95
+ "Dont reduce box if the top left is non blank");
96
+ EXTERN double_VAR (tosp_silly_kn_sp_gap, 0.2,
97
+ "Dont let sp minus kn get too small");
98
+ EXTERN double_VAR (tosp_pass_wide_fuzz_sp_to_context, 0.75,
99
+ "How wide fuzzies need context");
100
+
101
+ #define MAXSPACING 128 /*max expected spacing in pix */
102
+ /**********************************************************************
103
+ * to_spacing
104
+ *
105
+ * Compute fuzzy word spacing thresholds for each row.
106
+ * I.e. set : max_nonspace
107
+ * space_threshold
108
+ * min_space
109
+ * kern_size
110
+ * space_size for each row.
111
+ * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE
112
+ **********************************************************************/
113
+
114
+ void to_spacing( //set spacing
115
+ ICOORD page_tr, //topright of page
116
+ TO_BLOCK_LIST *blocks //blocks on page
117
+ ) {
118
+ TO_BLOCK_IT block_it; //iterator
119
+ TO_BLOCK *block; //current block;
120
+ TO_ROW_IT row_it; //row iterator
121
+ TO_ROW *row; //current row
122
+ int block_index; //block number
123
+ int row_index; //row number
124
+ inT16 block_space_gap_width; //Estimated width of real spaces for whole block
125
+ //Estimate width ofnon space gaps for whole block
126
+ inT16 block_non_space_gap_width;
127
+ //Old fixed/prop result
128
+ BOOL8 old_text_ord_proportional;
129
+ GAPMAP *gapmap = NULL; //map of big vert gaps in blk
130
+
131
+ block_it.set_to_list (blocks);
132
+ block_index = 1;
133
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
134
+ block_it.forward ()) {
135
+ block = block_it.data ();
136
+ gapmap = new GAPMAP (block);
137
+ block_spacing_stats(block,
138
+ gapmap,
139
+ old_text_ord_proportional,
140
+ block_space_gap_width,
141
+ block_non_space_gap_width);
142
+ row_it.set_to_list (block->get_rows ());
143
+ row_index = 1;
144
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
145
+ row = row_it.data ();
146
+ if ((row->pitch_decision == PITCH_DEF_PROP) ||
147
+ (row->pitch_decision == PITCH_CORR_PROP)) {
148
+ if ((tosp_debug_level > 0) && !old_text_ord_proportional)
149
+ tprintf ("Block %d Row %d: Now Proportional\n",
150
+ block_index, row_index);
151
+ row_spacing_stats(row,
152
+ gapmap,
153
+ block_index,
154
+ row_index,
155
+ block_space_gap_width,
156
+ block_non_space_gap_width);
157
+ }
158
+ else {
159
+ if ((tosp_debug_level > 0) && old_text_ord_proportional)
160
+ tprintf
161
+ ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
162
+ block_index, row_index, row->pitch_decision,
163
+ row->fixed_pitch);
164
+ }
165
+ #ifndef GRAPHICS_DISABLED
166
+ if (textord_show_initial_words)
167
+ plot_word_decisions (to_win, (inT16) row->fixed_pitch, row);
168
+ #endif
169
+ row_index++;
170
+ }
171
+ delete gapmap;
172
+ block_index++;
173
+ }
174
+ }
175
+
176
+
177
+ /*************************************************************************
178
+ * block_spacing_stats()
179
+ *************************************************************************/
180
+
181
+ void block_spacing_stats( //DEBUG USE ONLY
182
+ TO_BLOCK *block,
183
+ GAPMAP *gapmap,
184
+ BOOL8 &old_text_ord_proportional,
185
+ inT16 &block_space_gap_width, //resulting estimate
186
+ inT16 &block_non_space_gap_width //resulting estimate
187
+ ) {
188
+ TO_ROW_IT row_it; //row iterator
189
+ TO_ROW *row; //current row
190
+ BLOBNBOX_IT blob_it; //iterator
191
+
192
+ STATS centre_to_centre_stats (0, MAXSPACING);
193
+ //DEBUG USE ONLY
194
+ STATS all_gap_stats (0, MAXSPACING);
195
+ STATS space_gap_stats (0, MAXSPACING);
196
+ inT16 minwidth = MAX_INT16; //narrowest blob
197
+ TBOX blob_box;
198
+ TBOX prev_blob_box;
199
+ inT16 centre_to_centre;
200
+ inT16 gap_width;
201
+ float real_space_threshold;
202
+ float iqr_centre_to_centre; //DEBUG USE ONLY
203
+ float iqr_all_gap_stats; //DEBUG USE ONLY
204
+ inT32 end_of_row;
205
+ inT32 row_length;
206
+
207
+ row_it.set_to_list (block->get_rows ());
208
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
209
+ row = row_it.data ();
210
+ if (!row->blob_list ()->empty () &&
211
+ (!tosp_only_use_prop_rows ||
212
+ (row->pitch_decision == PITCH_DEF_PROP) ||
213
+ (row->pitch_decision == PITCH_CORR_PROP))) {
214
+ blob_it.set_to_list (row->blob_list ());
215
+ blob_it.mark_cycle_pt ();
216
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
217
+ if (tosp_use_pre_chopping)
218
+ blob_box = box_next_pre_chopped (&blob_it);
219
+ else if (tosp_stats_use_xht_gaps)
220
+ blob_box = reduced_box_next (row, &blob_it);
221
+ else
222
+ blob_box = box_next (&blob_it);
223
+ row_length = end_of_row - blob_box.left ();
224
+ if (blob_box.width () < minwidth)
225
+ minwidth = blob_box.width ();
226
+ prev_blob_box = blob_box;
227
+ while (!blob_it.cycled_list ()) {
228
+ if (tosp_use_pre_chopping)
229
+ blob_box = box_next_pre_chopped (&blob_it);
230
+ else if (tosp_stats_use_xht_gaps)
231
+ blob_box = reduced_box_next (row, &blob_it);
232
+ else
233
+ blob_box = box_next (&blob_it);
234
+ if (blob_box.width () < minwidth)
235
+ minwidth = blob_box.width ();
236
+ gap_width = blob_box.left () - prev_blob_box.right ();
237
+ if (!ignore_big_gap (row, row_length, gapmap,
238
+ prev_blob_box.right (), blob_box.left ())) {
239
+ all_gap_stats.add (gap_width, 1);
240
+
241
+ centre_to_centre = (blob_box.left () + blob_box.right () -
242
+ (prev_blob_box.left () +
243
+ prev_blob_box.right ())) / 2;
244
+ //DEBUG
245
+ centre_to_centre_stats.add (centre_to_centre, 1);
246
+ // DEBUG
247
+ }
248
+ prev_blob_box = blob_box;
249
+ }
250
+ }
251
+ }
252
+
253
+ //Inadequate samples
254
+ if (all_gap_stats.get_total () <= 1) {
255
+ block_non_space_gap_width = minwidth;
256
+ block_space_gap_width = -1; //No est. space width
257
+ //DEBUG
258
+ old_text_ord_proportional = TRUE;
259
+ }
260
+ else {
261
+ /* For debug only ..... */
262
+ iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) -
263
+ centre_to_centre_stats.ile (0.25);
264
+ iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25);
265
+ old_text_ord_proportional =
266
+ iqr_centre_to_centre * 2 > iqr_all_gap_stats;
267
+ /* .......For debug only */
268
+
269
+ /*
270
+ The median of the gaps is used as an estimate of the NON-SPACE gap width.
271
+ This RELIES on the assumption that there are more gaps WITHIN words than
272
+ BETWEEN words in a block
273
+
274
+ Now try to estimate the width of a real space for all real spaces in the
275
+ block. Do this by using a crude threshold to ignore "narrow" gaps, then
276
+ find the median of the "wide" gaps and use this.
277
+ */
278
+ block_non_space_gap_width = (inT16) floor (all_gap_stats.median ());
279
+ // median gap
280
+
281
+ row_it.set_to_list (block->get_rows ());
282
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
283
+ row = row_it.data ();
284
+ if (!row->blob_list ()->empty () &&
285
+ (!tosp_only_use_prop_rows ||
286
+ (row->pitch_decision == PITCH_DEF_PROP) ||
287
+ (row->pitch_decision == PITCH_CORR_PROP))) {
288
+ real_space_threshold =
289
+ MAX (tosp_init_guess_kn_mult * block_non_space_gap_width,
290
+ tosp_init_guess_xht_mult * row->xheight);
291
+ blob_it.set_to_list (row->blob_list ());
292
+ blob_it.mark_cycle_pt ();
293
+ end_of_row =
294
+ blob_it.data_relative (-1)->bounding_box ().right ();
295
+ if (tosp_use_pre_chopping)
296
+ blob_box = box_next_pre_chopped (&blob_it);
297
+ else if (tosp_stats_use_xht_gaps)
298
+ blob_box = reduced_box_next (row, &blob_it);
299
+ else
300
+ blob_box = box_next (&blob_it);
301
+ row_length = blob_box.left () - end_of_row;
302
+ prev_blob_box = blob_box;
303
+ while (!blob_it.cycled_list ()) {
304
+ if (tosp_use_pre_chopping)
305
+ blob_box = box_next_pre_chopped (&blob_it);
306
+ else if (tosp_stats_use_xht_gaps)
307
+ blob_box = reduced_box_next (row, &blob_it);
308
+ else
309
+ blob_box = box_next (&blob_it);
310
+ gap_width = blob_box.left () - prev_blob_box.right ();
311
+ if ((gap_width > real_space_threshold) &&
312
+ !ignore_big_gap (row, row_length, gapmap,
313
+ prev_blob_box.right (),
314
+ blob_box.left ())) {
315
+ /*
316
+ If tosp_use_cert_spaces is enabled, the estimate of the space gap is
317
+ restricted to obvious spaces - those wider than half the xht or those
318
+ with wide blobs on both sides - i.e not things that are suspect 1's or
319
+ punctiation that is sometimes widely spaced.
320
+ */
321
+ if (!tosp_block_use_cert_spaces ||
322
+ (gap_width >
323
+ tosp_fuzzy_space_factor2 * row->xheight)
324
+ ||
325
+ ((gap_width >
326
+ tosp_fuzzy_space_factor1 * row->xheight)
327
+ && (!tosp_narrow_blobs_not_cert
328
+ || (!narrow_blob (row, prev_blob_box)
329
+ && !narrow_blob (row, blob_box))))
330
+ || (wide_blob (row, prev_blob_box)
331
+ && wide_blob (row, blob_box)))
332
+ space_gap_stats.add (gap_width, 1);
333
+ }
334
+ prev_blob_box = blob_box;
335
+ }
336
+ }
337
+ }
338
+ //Inadequate samples
339
+ if (space_gap_stats.get_total () <= 2)
340
+ block_space_gap_width = -1;//No est. space width
341
+ else
342
+ block_space_gap_width =
343
+ MAX ((inT16) floor (space_gap_stats.median ()),
344
+ 3 * block_non_space_gap_width);
345
+ }
346
+ }
347
+
348
+
349
+ /*************************************************************************
350
+ * row_spacing_stats()
351
+ * Set values for min_space, max_non_space based on row stats only
352
+ * If failure - return 0 values.
353
+ *************************************************************************/
354
+
355
+ void row_spacing_stats( //estimate for block
356
+ TO_ROW *row,
357
+ GAPMAP *gapmap,
358
+ inT16 block_idx,
359
+ inT16 row_idx,
360
+ inT16 block_space_gap_width,
361
+ inT16 block_non_space_gap_width //estimate for block
362
+ ) {
363
+ //iterator
364
+ BLOBNBOX_IT blob_it = row->blob_list ();
365
+ STATS all_gap_stats (0, MAXSPACING);
366
+ STATS cert_space_gap_stats (0, MAXSPACING);
367
+ STATS all_space_gap_stats (0, MAXSPACING);
368
+ STATS small_gap_stats (0, MAXSPACING);
369
+ TBOX blob_box;
370
+ TBOX prev_blob_box;
371
+ inT16 gap_width;
372
+ inT16 real_space_threshold = 0;
373
+ inT16 max = 0;
374
+ inT16 index;
375
+ inT16 large_gap_count = 0;
376
+ BOOL8 suspected_table;
377
+ inT32 max_max_nonspace; //upper bound
378
+ BOOL8 good_block_space_estimate = block_space_gap_width > 0;
379
+ inT32 end_of_row;
380
+ inT32 row_length = 0;
381
+ float sane_space;
382
+ inT32 sane_threshold;
383
+
384
+ /* Collect first pass stats for row */
385
+
386
+ if (!good_block_space_estimate)
387
+ block_space_gap_width = inT16 (floor (row->xheight / 2));
388
+ if (!row->blob_list ()->empty ()) {
389
+ if (tosp_threshold_bias1 > 0)
390
+ real_space_threshold =
391
+ block_non_space_gap_width +
392
+ inT16 (floor (0.5 +
393
+ tosp_threshold_bias1 * (block_space_gap_width -
394
+ block_non_space_gap_width)));
395
+ else
396
+ real_space_threshold = //Old TO method
397
+ (block_space_gap_width + block_non_space_gap_width) / 2;
398
+ blob_it.set_to_list (row->blob_list ());
399
+ blob_it.mark_cycle_pt ();
400
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
401
+ if (tosp_use_pre_chopping)
402
+ blob_box = box_next_pre_chopped (&blob_it);
403
+ else if (tosp_stats_use_xht_gaps)
404
+ blob_box = reduced_box_next (row, &blob_it);
405
+ else
406
+ blob_box = box_next (&blob_it);
407
+ row_length = end_of_row - blob_box.left ();
408
+ prev_blob_box = blob_box;
409
+ while (!blob_it.cycled_list ()) {
410
+ if (tosp_use_pre_chopping)
411
+ blob_box = box_next_pre_chopped (&blob_it);
412
+ else if (tosp_stats_use_xht_gaps)
413
+ blob_box = reduced_box_next (row, &blob_it);
414
+ else
415
+ blob_box = box_next (&blob_it);
416
+ gap_width = blob_box.left () - prev_blob_box.right ();
417
+ if (ignore_big_gap (row, row_length, gapmap,
418
+ prev_blob_box.right (), blob_box.left ()))
419
+ large_gap_count++;
420
+ else {
421
+ if (gap_width >= real_space_threshold) {
422
+ if (!tosp_row_use_cert_spaces ||
423
+ (gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
424
+ ((gap_width > tosp_fuzzy_space_factor1 * row->xheight)
425
+ && (!tosp_narrow_blobs_not_cert
426
+ || (!narrow_blob (row, prev_blob_box)
427
+ && !narrow_blob (row, blob_box))))
428
+ || (wide_blob (row, prev_blob_box)
429
+ && wide_blob (row, blob_box)))
430
+ cert_space_gap_stats.add (gap_width, 1);
431
+ all_space_gap_stats.add (gap_width, 1);
432
+ }
433
+ else
434
+ small_gap_stats.add (gap_width, 1);
435
+ all_gap_stats.add (gap_width, 1);
436
+ }
437
+ prev_blob_box = blob_box;
438
+ }
439
+ }
440
+ suspected_table = (large_gap_count > 1) ||
441
+ ((large_gap_count > 0) &&
442
+ (all_gap_stats.get_total () <= tosp_few_samples));
443
+
444
+ /* Now determine row kern size, space size and threshold */
445
+
446
+ if ((cert_space_gap_stats.get_total () >=
447
+ tosp_enough_space_samples_for_median) ||
448
+ ((suspected_table ||
449
+ all_gap_stats.get_total () <= tosp_short_row) &&
450
+ cert_space_gap_stats.get_total () > 0))
451
+ old_to_method(row,
452
+ &all_gap_stats,
453
+ &cert_space_gap_stats,
454
+ &small_gap_stats,
455
+ block_space_gap_width,
456
+ block_non_space_gap_width);
457
+ else {
458
+ if (!tosp_recovery_isolated_row_stats ||
459
+ !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,
460
+ block_idx, row_idx)) {
461
+ if (tosp_row_use_cert_spaces && (tosp_debug_level > 5))
462
+ tprintf ("B:%d R:%d -- Inadequate certain spaces.\n",
463
+ block_idx, row_idx);
464
+ if (tosp_row_use_cert_spaces1 && good_block_space_estimate) {
465
+ //Use block default
466
+ row->space_size = block_space_gap_width;
467
+ if (all_gap_stats.get_total () > tosp_redo_kern_limit)
468
+ row->kern_size = all_gap_stats.median ();
469
+ else
470
+ row->kern_size = block_non_space_gap_width;
471
+ row->space_threshold =
472
+ inT32 (floor ((row->space_size + row->kern_size) / 2));
473
+ }
474
+ else
475
+ old_to_method(row,
476
+ &all_gap_stats,
477
+ &all_space_gap_stats,
478
+ &small_gap_stats,
479
+ block_space_gap_width,
480
+ block_non_space_gap_width);
481
+ }
482
+ }
483
+
484
+ if (tosp_improve_thresh && !suspected_table)
485
+ improve_row_threshold(row, &all_gap_stats);
486
+
487
+ /* Now lets try to be careful not to do anything silly with tables when we
488
+ are ignoring big gaps*/
489
+ if (tosp_sanity_method == 0) {
490
+ if (suspected_table &&
491
+ (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) {
492
+ if (tosp_debug_level > 0)
493
+ tprintf ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f.\n",
494
+ block_idx, row_idx,
495
+ row->kern_size, row->space_threshold, row->space_size);
496
+ row->space_threshold =
497
+ (inT32) (tosp_table_kn_sp_ratio * row->kern_size);
498
+ row->space_size = MAX (row->space_threshold + 1, row->xheight);
499
+ }
500
+ }
501
+ else if (tosp_sanity_method == 1) {
502
+ sane_space = row->space_size;
503
+ /* NEVER let space size get too close to kern size */
504
+ if ((row->space_size < tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5))
505
+ || ((row->space_size - row->kern_size) <
506
+ (tosp_silly_kn_sp_gap * row->xheight))) {
507
+ if (good_block_space_estimate &&
508
+ (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size))
509
+ sane_space = block_space_gap_width;
510
+ else
511
+ sane_space =
512
+ MAX (tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5),
513
+ row->xheight / 2);
514
+ if (tosp_debug_level > 0)
515
+ tprintf
516
+ ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
517
+ block_idx, row_idx, row->kern_size, row->space_threshold,
518
+ row->space_size, sane_space);
519
+ row->space_size = sane_space;
520
+ row->space_threshold =
521
+ inT32 (floor ((row->space_size + row->kern_size) / 2));
522
+ }
523
+ /* NEVER let threshold get VERY far away from kern */
524
+ sane_threshold = inT32 (floor (tosp_max_sane_kn_thresh *
525
+ MAX (row->kern_size, 2.5)));
526
+ if (row->space_threshold > sane_threshold) {
527
+ if (tosp_debug_level > 0)
528
+ tprintf ("B:%d R:%d -- DONT BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
529
+ block_idx, row_idx,
530
+ row->kern_size,
531
+ row->space_threshold, row->space_size, sane_threshold);
532
+ row->space_threshold = sane_threshold;
533
+ if (row->space_size <= sane_threshold)
534
+ row->space_size = row->space_threshold + 1.0f;
535
+ }
536
+ /* Beware of tables - there may be NO spaces */
537
+ if (suspected_table) {
538
+ sane_space = MAX (tosp_table_kn_sp_ratio * row->kern_size,
539
+ tosp_table_xht_sp_ratio * row->xheight);
540
+ sane_threshold = inT32 (floor ((sane_space + row->kern_size) / 2));
541
+
542
+ if ((row->space_size < sane_space) ||
543
+ (row->space_threshold < sane_threshold)) {
544
+ if (tosp_debug_level > 0)
545
+ tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",
546
+ block_idx, row_idx,
547
+ row->kern_size,
548
+ row->space_threshold, row->space_size);
549
+ //the minimum sane value
550
+ row->space_threshold = (inT32) sane_space;
551
+ row->space_size = MAX (row->space_threshold + 1, row->xheight);
552
+ }
553
+ }
554
+ }
555
+
556
+ /* Now lets try to put some error limits on the threshold */
557
+
558
+ if (tosp_old_to_method) {
559
+ /* Old textord made a space if gap >= threshold */
560
+ //NO FUZZY SPACES YET
561
+ row->max_nonspace = row->space_threshold;
562
+ //NO FUZZY SPACES YET
563
+ row->min_space = row->space_threshold + 1;
564
+ }
565
+ else {
566
+ /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it:-) */
567
+ row->min_space =
568
+ MIN (inT32 (ceil (tosp_fuzzy_space_factor * row->xheight)),
569
+ inT32 (row->space_size));
570
+ if (row->min_space <= row->space_threshold)
571
+ //Dont be silly
572
+ row->min_space = row->space_threshold + 1;
573
+ /*
574
+ Lets try to guess the max certain kern gap by looking at the cluster of
575
+ kerns for the row. The row is proportional so the kerns should cluster
576
+ tightly at the bottom of the distribution. We also expect most gaps to be
577
+ kerns. Find the maximum of the kern piles between 0 and twice the kern
578
+ estimate. Piles before the first one with less than 1/10 the maximum
579
+ number of samples can be taken as certain kerns.
580
+
581
+ Of course, there are some cases where the kern peak and space peaks merge,
582
+ so we will put an UPPER limit on the max certain kern gap of some fraction
583
+ below the threshold.
584
+ */
585
+
586
+ max_max_nonspace = inT32 ((row->space_threshold + row->kern_size) / 2);
587
+
588
+ //default
589
+ row->max_nonspace = max_max_nonspace;
590
+ for (index = 0; index <= max_max_nonspace; index++) {
591
+ if (all_gap_stats.pile_count (index) > max)
592
+ max = all_gap_stats.pile_count (index);
593
+ if ((index > row->kern_size) &&
594
+ (all_gap_stats.pile_count (index) < 0.1 * max)) {
595
+ row->max_nonspace = index;
596
+ break;
597
+ }
598
+ }
599
+ }
600
+
601
+ /* Yet another algorithm - simpler this time - just choose a fraction of the
602
+ threshold to space range */
603
+
604
+ if ((tosp_fuzzy_sp_fraction > 0) &&
605
+ (row->space_size > row->space_threshold))
606
+ row->min_space = MAX (row->min_space,
607
+ (inT32) ceil (row->space_threshold +
608
+ tosp_fuzzy_sp_fraction *
609
+ (row->space_size -
610
+ row->space_threshold)));
611
+
612
+ /* Ensure that ANY space less than some multiplier times the kern size is
613
+ fuzzy. In tables there is a risk of erroneously setting a small space size
614
+ when there are no real spaces. Sometimes tables have text squashed into
615
+ columns so that the kn->sp ratio is small anyway - this means that we cant
616
+ use this to force a wider separation - hence we rely on context to join any
617
+ dubious breaks. */
618
+
619
+ if ((tosp_table_fuzzy_kn_sp_ratio > 0) &&
620
+ (suspected_table || tosp_fuzzy_limit_all))
621
+ row->min_space = MAX (row->min_space,
622
+ (inT32) ceil (tosp_table_fuzzy_kn_sp_ratio *
623
+ row->kern_size));
624
+
625
+ if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold))
626
+ row->max_nonspace = (inT32) floor (0.5 + row->kern_size +
627
+ tosp_fuzzy_kn_fraction *
628
+ (row->space_threshold -
629
+ row->kern_size));
630
+
631
+ if (row->max_nonspace > row->space_threshold)
632
+ //Dont be silly
633
+ row->max_nonspace = row->space_threshold;
634
+
635
+ if (tosp_debug_level > 5)
636
+ tprintf
637
+ ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",
638
+ block_idx, row_idx, row_length, block_non_space_gap_width,
639
+ block_space_gap_width, real_space_threshold, row->kern_size,
640
+ row->max_nonspace, row->space_threshold, row->min_space,
641
+ row->space_size);
642
+ }
643
+
644
+
645
+ void old_to_method( //estimate for block
646
+ TO_ROW *row,
647
+ STATS *all_gap_stats,
648
+ STATS *space_gap_stats,
649
+ STATS *small_gap_stats,
650
+ inT16 block_space_gap_width,
651
+ inT16 block_non_space_gap_width //estimate for block
652
+ ) {
653
+ /* Old to condition was > 2 */
654
+ if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) {
655
+ //Adequate samples
656
+ /* Set space size to median of spaces BUT limits it if it seems wildly out */
657
+ row->space_size = space_gap_stats->median ();
658
+ if (row->space_size > block_space_gap_width * 1.5) {
659
+ if (tosp_old_to_bug_fix)
660
+ row->space_size = block_space_gap_width * 1.5;
661
+ else
662
+ //BUG??? should be *1.5
663
+ row->space_size = block_space_gap_width;
664
+ }
665
+ if (row->space_size < (block_non_space_gap_width * 2) + 1)
666
+ row->space_size = (block_non_space_gap_width * 2) + 1;
667
+ }
668
+ //Only 1 or 2 samples
669
+ else if (space_gap_stats->get_total () >= 1) {
670
+ //hence mean not median
671
+ row->space_size = space_gap_stats->mean ();
672
+ if (row->space_size > block_space_gap_width * 1.5) {
673
+ if (tosp_old_to_bug_fix)
674
+ row->space_size = block_space_gap_width * 1.5;
675
+ else
676
+ //BUG??? should be *1.5
677
+ row->space_size = block_space_gap_width;
678
+ }
679
+ if (row->space_size < (block_non_space_gap_width * 3) + 1)
680
+ row->space_size = (block_non_space_gap_width * 3) + 1;
681
+ }
682
+ else
683
+ //Use block default
684
+ row->space_size = block_space_gap_width;
685
+
686
+ if ((tosp_only_small_gaps_for_kern) &&
687
+ (small_gap_stats->get_total () > tosp_redo_kern_limit))
688
+ row->kern_size = small_gap_stats->median ();
689
+ else if (all_gap_stats->get_total () > tosp_redo_kern_limit)
690
+ row->kern_size = all_gap_stats->median ();
691
+ else
692
+ //old TO -SAME FOR ALL ROWS
693
+ row->kern_size = block_non_space_gap_width;
694
+
695
+ if (tosp_threshold_bias2 > 0)
696
+ row->space_threshold =
697
+ inT32 (floor (0.5 + row->kern_size +
698
+ tosp_threshold_bias2 * (row->space_size -
699
+ row->kern_size)));
700
+ else
701
+ /*
702
+ NOTE old text ord uses (space_size + kern_size + 1)/2 as the threshold
703
+ and holds this in a float. The use is with a >= test
704
+ NEW textord uses an integer threshold and a > test
705
+ It comes to the same thing.
706
+ (Though there is a difference in that old textor has integer space_size
707
+ and kern_size.)
708
+ */
709
+ row->space_threshold =
710
+ inT32 (floor ((row->space_size + row->kern_size) / 2));
711
+ }
712
+
713
+
714
+ /*************************************************************************
715
+ * isolated_row_stats()
716
+ * Set values for min_space, max_non_space based on row stats only
717
+ *************************************************************************/
718
+
719
+ BOOL8 isolated_row_stats(TO_ROW *row,
720
+ GAPMAP *gapmap,
721
+ STATS *all_gap_stats,
722
+ BOOL8 suspected_table,
723
+ inT16 block_idx,
724
+ inT16 row_idx) {
725
+ float kern_estimate;
726
+ float crude_threshold_estimate;
727
+ inT16 small_gaps_count;
728
+ inT16 total;
729
+ //iterator
730
+ BLOBNBOX_IT blob_it = row->blob_list ();
731
+ STATS cert_space_gap_stats (0, MAXSPACING);
732
+ STATS all_space_gap_stats (0, MAXSPACING);
733
+ STATS small_gap_stats (0, MAXSPACING);
734
+ TBOX blob_box;
735
+ TBOX prev_blob_box;
736
+ inT16 gap_width;
737
+ inT32 end_of_row;
738
+ inT32 row_length;
739
+
740
+ kern_estimate = all_gap_stats->median ();
741
+ crude_threshold_estimate = MAX (tosp_init_guess_kn_mult * kern_estimate,
742
+ tosp_init_guess_xht_mult * row->xheight);
743
+ small_gaps_count = stats_count_under (all_gap_stats,
744
+ (inT16)
745
+ ceil (crude_threshold_estimate));
746
+ total = all_gap_stats->get_total ();
747
+
748
+ if ((total <= tosp_redo_kern_limit) ||
749
+ ((small_gaps_count / (float) total) < tosp_enough_small_gaps) ||
750
+ (total - small_gaps_count < 1)) {
751
+ if (tosp_debug_level > 5)
752
+ tprintf ("B:%d R:%d -- Cant do isolated row stats.\n",
753
+ block_idx, row_idx);
754
+ return FALSE;
755
+ }
756
+ blob_it.set_to_list (row->blob_list ());
757
+ blob_it.mark_cycle_pt ();
758
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
759
+ if (tosp_use_pre_chopping)
760
+ blob_box = box_next_pre_chopped (&blob_it);
761
+ else if (tosp_stats_use_xht_gaps)
762
+ blob_box = reduced_box_next (row, &blob_it);
763
+ else
764
+ blob_box = box_next (&blob_it);
765
+ row_length = end_of_row - blob_box.left ();
766
+ prev_blob_box = blob_box;
767
+ while (!blob_it.cycled_list ()) {
768
+ if (tosp_use_pre_chopping)
769
+ blob_box = box_next_pre_chopped (&blob_it);
770
+ else if (tosp_stats_use_xht_gaps)
771
+ blob_box = reduced_box_next (row, &blob_it);
772
+ else
773
+ blob_box = box_next (&blob_it);
774
+ gap_width = blob_box.left () - prev_blob_box.right ();
775
+ if (!ignore_big_gap (row, row_length, gapmap,
776
+ prev_blob_box.right (), blob_box.left ()) &&
777
+ (gap_width > crude_threshold_estimate)) {
778
+ if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
779
+ ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) &&
780
+ (!tosp_narrow_blobs_not_cert ||
781
+ (!narrow_blob (row, prev_blob_box) &&
782
+ !narrow_blob (row, blob_box)))) ||
783
+ (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))
784
+ cert_space_gap_stats.add (gap_width, 1);
785
+ all_space_gap_stats.add (gap_width, 1);
786
+ }
787
+ if (gap_width < crude_threshold_estimate)
788
+ small_gap_stats.add (gap_width, 1);
789
+
790
+ prev_blob_box = blob_box;
791
+ }
792
+ if (cert_space_gap_stats.get_total () >=
793
+ tosp_enough_space_samples_for_median)
794
+ //median
795
+ row->space_size = cert_space_gap_stats.median ();
796
+ else if (suspected_table && (cert_space_gap_stats.get_total () > 0))
797
+ //to avoid spaced
798
+ row->space_size = cert_space_gap_stats.mean ();
799
+ // 1's in tables
800
+ else if (all_space_gap_stats.get_total () >=
801
+ tosp_enough_space_samples_for_median)
802
+ //median
803
+ row->space_size = all_space_gap_stats.median ();
804
+ else
805
+ row->space_size = all_space_gap_stats.mean ();
806
+
807
+ if (tosp_only_small_gaps_for_kern)
808
+ row->kern_size = small_gap_stats.median ();
809
+ else
810
+ row->kern_size = all_gap_stats->median ();
811
+ row->space_threshold =
812
+ inT32 (floor ((row->space_size + row->kern_size) / 2));
813
+ /* Sanity check */
814
+ if ((row->kern_size >= row->space_threshold) ||
815
+ (row->space_threshold >= row->space_size) ||
816
+ (row->space_threshold <= 0)) {
817
+ if (tosp_debug_level > 0)
818
+ tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",
819
+ block_idx, row_idx,
820
+ row->kern_size, row->space_threshold, row->space_size);
821
+ row->kern_size = 0.0f;
822
+ row->space_threshold = 0;
823
+ row->space_size = 0.0f;
824
+ return FALSE;
825
+ }
826
+
827
+ if (tosp_debug_level > 5)
828
+ tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n",
829
+ block_idx, row_idx,
830
+ row->kern_size, row->space_threshold, row->space_size);
831
+ return TRUE;
832
+ }
833
+
834
+
835
+ inT16 stats_count_under(STATS *stats, inT16 threshold) {
836
+ inT16 index;
837
+ inT16 total = 0;
838
+
839
+ for (index = 0; index < threshold; index++)
840
+ total += stats->pile_count (index);
841
+ return total;
842
+ }
843
+
844
+
845
+ /*************************************************************************
846
+ * improve_row_threshold()
847
+ * Try to recognise a "normal line" -
848
+ * > 25 gaps
849
+ * && space > 3 * kn && space > 10
850
+ * (I.e. reasonably large space and kn:sp ratio)
851
+ * && > 3/4 # gaps < kn + (sp - kn)/3
852
+ * (I.e. most gaps are well away from space estimate)
853
+ * && a gap of max( 3, (sp - kn)/3 ) empty histogram positions is found
854
+ * somewhere in the histogram between kn and sp
855
+ * THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies
856
+ * NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!!
857
+ * try moving the default threshold to within this band but leave the
858
+ * fuzzy limit calculation as at present.
859
+ *************************************************************************/
860
+
861
+ void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) {
862
+ float sp = row->space_size;
863
+ float kn = row->kern_size;
864
+ inT16 reqd_zero_width = 0;
865
+ inT16 zero_width = 0;
866
+ inT16 zero_start = 0;
867
+ inT16 index = 0;
868
+
869
+ if (tosp_debug_level > 10)
870
+ tprintf ("Improve row threshold 0");
871
+ if ((all_gap_stats->get_total () <= 25) ||
872
+ (sp <= 10) ||
873
+ (sp <= 3 * kn) ||
874
+ (stats_count_under (all_gap_stats,
875
+ (inT16) ceil (kn + (sp - kn) / 3 + 0.5)) <
876
+ (0.75 * all_gap_stats->get_total ())))
877
+ return;
878
+ if (tosp_debug_level > 10)
879
+ tprintf (" 1");
880
+ /*
881
+ Look for the first region of all 0's in the histogram which is wider than
882
+ max( 3, (sp - kn)/3 ) and starts between kn and sp. If found, and current
883
+ threshold is not within it, move the threshold so that is is just inside it.
884
+ */
885
+ reqd_zero_width = (inT16) floor ((sp - kn) / 3 + 0.5);
886
+ if (reqd_zero_width < 3)
887
+ reqd_zero_width = 3;
888
+
889
+ for (index = inT16 (ceil (kn)); index < inT16 (floor (sp)); index++) {
890
+ if (all_gap_stats->pile_count (index) == 0) {
891
+ if (zero_width == 0)
892
+ zero_start = index;
893
+ zero_width++;
894
+ }
895
+ else {
896
+ if (zero_width >= reqd_zero_width)
897
+ break;
898
+ else {
899
+ zero_width = 0;
900
+ }
901
+ }
902
+ }
903
+ index--;
904
+ if (tosp_debug_level > 10)
905
+ tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",
906
+ reqd_zero_width, zero_width, zero_start, row->space_threshold);
907
+ if ((zero_width < reqd_zero_width) ||
908
+ ((row->space_threshold >= zero_start) &&
909
+ (row->space_threshold <= index)))
910
+ return;
911
+ if (tosp_debug_level > 10)
912
+ tprintf (" 2");
913
+ if (row->space_threshold < zero_start) {
914
+ if (tosp_debug_level > 5)
915
+ tprintf
916
+ ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
917
+ kn, sp, zero_start, index, row->space_threshold, zero_start);
918
+ row->space_threshold = zero_start;
919
+ }
920
+ if (row->space_threshold > index) {
921
+ if (tosp_debug_level > 5)
922
+ tprintf
923
+ ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
924
+ kn, sp, zero_start, index, row->space_threshold, index);
925
+ row->space_threshold = index;
926
+ }
927
+ }
928
+
929
+
930
+ /**********************************************************************
931
+ * make_prop_words
932
+ *
933
+ * Convert a TO_BLOCK to a BLOCK.
934
+ **********************************************************************/
935
+
936
+ ROW *make_prop_words( //find lines
937
+ TO_ROW *row, //row to make
938
+ FCOORD rotation //for drawing
939
+ ) {
940
+ BOOL8 bol; //start of line
941
+ /* prev_ values are for start of word being built. non prev_ values are for
942
+ the gap between the word being built and the next one. */
943
+ BOOL8 prev_fuzzy_sp; //probably space
944
+ BOOL8 prev_fuzzy_non; //probably not
945
+ uinT8 prev_blanks; //in front of word
946
+ BOOL8 fuzzy_sp; //probably space
947
+ BOOL8 fuzzy_non; //probably not
948
+ uinT8 blanks; //in front of word
949
+ ROW *real_row; //output row
950
+ OUTLINE_IT out_it; //outlines
951
+ C_OUTLINE_IT cout_it;
952
+ PBLOB_LIST blobs; //blobs in word
953
+ C_BLOB_LIST cblobs;
954
+ PBLOB_IT blob_it = &blobs; //iterator
955
+ C_BLOB_IT cblob_it = &cblobs;
956
+ WERD_LIST words;
957
+ WERD_IT word_it; //new words
958
+ WERD *word; //new word
959
+ WERD_IT rep_char_it; //repeated char words
960
+ inT32 next_rep_char_word_right = MAX_INT32;
961
+ float repetition_spacing; //gap between repetitions
962
+ inT32 xstarts[2]; //row ends
963
+ double coeffs[3]; //quadratic
964
+ inT32 prev_x; //end of prev blob
965
+ BLOBNBOX *bblob; //current blob
966
+ TBOX blob_box; //bounding box
967
+ BLOBNBOX_IT box_it; //iterator
968
+ TBOX prev_blob_box;
969
+ TBOX next_blob_box;
970
+ inT16 prev_gap = MAX_INT16;
971
+ inT16 current_gap = MAX_INT16;
972
+ inT16 next_gap = MAX_INT16;
973
+ inT16 prev_within_xht_gap = MAX_INT16;
974
+ inT16 current_within_xht_gap = MAX_INT16;
975
+ inT16 next_within_xht_gap = MAX_INT16;
976
+ inT16 word_count = 0;
977
+ static inT16 row_count = 0;
978
+
979
+ row_count++;
980
+ rep_char_it.set_to_list (&(row->rep_words));
981
+ if (!rep_char_it.empty ()) {
982
+ next_rep_char_word_right =
983
+ rep_char_it.data ()->bounding_box ().right ();
984
+ }
985
+
986
+ prev_x = -MAX_INT16;
987
+ blob_it.set_to_list (&blobs);
988
+ cblob_it.set_to_list (&cblobs);
989
+ box_it.set_to_list (row->blob_list ());
990
+ word_it.set_to_list (&words);
991
+ bol = TRUE;
992
+ prev_blanks = 0;
993
+ prev_fuzzy_sp = FALSE;
994
+ prev_fuzzy_non = FALSE;
995
+ if (!box_it.empty ()) {
996
+ xstarts[0] = box_it.data ()->bounding_box ().left ();
997
+ if (xstarts[0] > next_rep_char_word_right) {
998
+ /* We need to insert a repeated char word at the start of the row */
999
+ word = rep_char_it.extract ();
1000
+ word_it.add_after_then_move (word);
1001
+ /* Set spaces before repeated char word */
1002
+ word->set_flag (W_BOL, TRUE);
1003
+ bol = FALSE;
1004
+ word->set_blanks (0);
1005
+ //NO uncertainty
1006
+ word->set_flag (W_FUZZY_SP, FALSE);
1007
+ word->set_flag (W_FUZZY_NON, FALSE);
1008
+ xstarts[0] = word->bounding_box ().left ();
1009
+ /* Set spaces after repeated char word (and leave current word set) */
1010
+ repetition_spacing = find_mean_blob_spacing (word);
1011
+ current_gap = box_it.data ()->bounding_box ().left () -
1012
+ next_rep_char_word_right;
1013
+ current_within_xht_gap = current_gap;
1014
+ if (current_gap > tosp_rep_space * repetition_spacing) {
1015
+ prev_blanks = (uinT8) floor (current_gap / row->space_size);
1016
+ if (prev_blanks < 1)
1017
+ prev_blanks = 1;
1018
+ }
1019
+ else
1020
+ prev_blanks = 0;
1021
+ if (tosp_debug_level > 5)
1022
+ tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
1023
+ box_it.data ()->bounding_box ().left (),
1024
+ box_it.data ()->bounding_box ().bottom (),
1025
+ repetition_spacing, current_gap);
1026
+ prev_fuzzy_sp = FALSE;
1027
+ prev_fuzzy_non = FALSE;
1028
+ if (rep_char_it.empty ()) {
1029
+ next_rep_char_word_right = MAX_INT32;
1030
+ }
1031
+ else {
1032
+ rep_char_it.forward ();
1033
+ next_rep_char_word_right =
1034
+ rep_char_it.data ()->bounding_box ().right ();
1035
+ }
1036
+ }
1037
+
1038
+ peek_at_next_gap(row,
1039
+ box_it,
1040
+ next_blob_box,
1041
+ next_gap,
1042
+ next_within_xht_gap);
1043
+ do {
1044
+ bblob = box_it.data ();
1045
+ blob_box = bblob->bounding_box ();
1046
+ if (bblob->joined_to_prev ()) {
1047
+ if (bblob->blob () != NULL) {
1048
+ out_it.set_to_list (blob_it.data ()->out_list ());
1049
+ out_it.move_to_last ();
1050
+ out_it.add_list_after (bblob->blob ()->out_list ());
1051
+ delete bblob->blob ();
1052
+ }
1053
+ else if (bblob->cblob () != NULL) {
1054
+ cout_it.set_to_list (cblob_it.data ()->out_list ());
1055
+ cout_it.move_to_last ();
1056
+ cout_it.add_list_after (bblob->cblob ()->out_list ());
1057
+ delete bblob->cblob ();
1058
+ }
1059
+ }
1060
+ else {
1061
+ if (bblob->blob () != NULL)
1062
+ blob_it.add_after_then_move (bblob->blob ());
1063
+ else if (bblob->cblob () != NULL)
1064
+ cblob_it.add_after_then_move (bblob->cblob ());
1065
+ prev_x = blob_box.right ();
1066
+ }
1067
+ box_it.forward (); //next one
1068
+ bblob = box_it.data ();
1069
+ blob_box = bblob->bounding_box ();
1070
+
1071
+ if (!bblob->joined_to_prev () &&
1072
+ (bblob->blob () != NULL || bblob->cblob () != NULL)) {
1073
+ /* Real Blob - not multiple outlines or pre-chopped */
1074
+ prev_gap = current_gap;
1075
+ prev_within_xht_gap = current_within_xht_gap;
1076
+ prev_blob_box = next_blob_box;
1077
+ current_gap = next_gap;
1078
+ current_within_xht_gap = next_within_xht_gap;
1079
+ peek_at_next_gap(row,
1080
+ box_it,
1081
+ next_blob_box,
1082
+ next_gap,
1083
+ next_within_xht_gap);
1084
+
1085
+ if ((blob_box.left () > next_rep_char_word_right) ||
1086
+ (!tosp_only_use_xht_gaps &&
1087
+ make_a_word_break (row, blob_box, prev_gap, prev_blob_box,
1088
+ current_gap, current_within_xht_gap,
1089
+ next_blob_box, next_gap,
1090
+ blanks, fuzzy_sp, fuzzy_non)) ||
1091
+ (tosp_only_use_xht_gaps &&
1092
+ make_a_word_break (row, blob_box, prev_within_xht_gap,
1093
+ prev_blob_box,
1094
+ current_gap, current_within_xht_gap,
1095
+ next_blob_box, next_within_xht_gap,
1096
+ blanks, fuzzy_sp, fuzzy_non)) ||
1097
+ box_it.at_first ()) {
1098
+ /* Form a new word out of the blobs collected */
1099
+ if (!blob_it.empty ()) {
1100
+ word = new WERD (&blobs, prev_blanks, NULL);
1101
+ //make real word
1102
+ word_count++;
1103
+ }
1104
+ else {
1105
+ word = new WERD (&cblobs, prev_blanks, NULL);
1106
+ word_count++;
1107
+ }
1108
+ word_it.add_after_then_move (word);
1109
+ if (bol) {
1110
+ word->set_flag (W_BOL, TRUE);
1111
+ bol = FALSE;
1112
+ }
1113
+ if (prev_fuzzy_sp)
1114
+ //probably space
1115
+ word->set_flag (W_FUZZY_SP, TRUE);
1116
+ else if (prev_fuzzy_non)
1117
+ word->set_flag (W_FUZZY_NON, TRUE);
1118
+ //probably not
1119
+
1120
+ if (blob_box.left () > next_rep_char_word_right) {
1121
+ /* We need to insert a repeated char word */
1122
+ word = rep_char_it.extract ();
1123
+ word_it.add_after_then_move (word);
1124
+
1125
+ /* Set spaces before repeated char word */
1126
+ repetition_spacing = find_mean_blob_spacing (word);
1127
+ current_gap = word->bounding_box ().left () - prev_x;
1128
+ current_within_xht_gap = current_gap;
1129
+ if (current_gap > tosp_rep_space * repetition_spacing) {
1130
+ blanks =
1131
+ (uinT8) floor (current_gap / row->space_size);
1132
+ if (blanks < 1)
1133
+ blanks = 1;
1134
+ }
1135
+ else
1136
+ blanks = 0;
1137
+ if (tosp_debug_level > 5)
1138
+ tprintf
1139
+ ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1140
+ word->bounding_box ().left (),
1141
+ word->bounding_box ().bottom (),
1142
+ repetition_spacing, current_gap, blanks);
1143
+ word->set_blanks (blanks);
1144
+ //NO uncertainty
1145
+ word->set_flag (W_FUZZY_SP, FALSE);
1146
+ word->set_flag (W_FUZZY_NON, FALSE);
1147
+
1148
+ /* Set spaces after repeated char word (and leave current word set) */
1149
+ current_gap =
1150
+ blob_box.left () - next_rep_char_word_right;
1151
+ if (current_gap > tosp_rep_space * repetition_spacing) {
1152
+ blanks = (uinT8) (current_gap / row->space_size);
1153
+ if (blanks < 1)
1154
+ blanks = 1;
1155
+ }
1156
+ else
1157
+ blanks = 0;
1158
+ if (tosp_debug_level > 5)
1159
+ tprintf (" Rgap:%d (%d blanks)\n",
1160
+ current_gap, blanks);
1161
+ fuzzy_sp = FALSE;
1162
+ fuzzy_non = FALSE;
1163
+
1164
+ if (rep_char_it.empty ()) {
1165
+ next_rep_char_word_right = MAX_INT32;
1166
+ }
1167
+ else {
1168
+ rep_char_it.forward ();
1169
+ next_rep_char_word_right =
1170
+ rep_char_it.data ()->bounding_box ().right ();
1171
+ }
1172
+ }
1173
+
1174
+ if (box_it.at_first () && rep_char_it.empty ()) {
1175
+ //at end of line
1176
+ word->set_flag (W_EOL, TRUE);
1177
+ xstarts[1] = prev_x;
1178
+ }
1179
+ else {
1180
+ prev_blanks = blanks;
1181
+ prev_fuzzy_sp = fuzzy_sp;
1182
+ prev_fuzzy_non = fuzzy_non;
1183
+ }
1184
+ }
1185
+ }
1186
+ }
1187
+ while (!box_it.at_first ()); //until back at start
1188
+
1189
+ /* Insert any further repeated char words */
1190
+ while (!rep_char_it.empty ()) {
1191
+ word = rep_char_it.extract ();
1192
+ word_it.add_after_then_move (word);
1193
+
1194
+ /* Set spaces before repeated char word */
1195
+ repetition_spacing = find_mean_blob_spacing (word);
1196
+ current_gap = word->bounding_box ().left () - prev_x;
1197
+ if (current_gap > tosp_rep_space * repetition_spacing) {
1198
+ blanks = (uinT8) floor (current_gap / row->space_size);
1199
+ if (blanks < 1)
1200
+ blanks = 1;
1201
+ }
1202
+ else
1203
+ blanks = 0;
1204
+ if (tosp_debug_level > 5)
1205
+ tprintf
1206
+ ("Repch wd at EOL (%d,%d). rep spacing %d; Lgap:%d (%d blanks)\n",
1207
+ word->bounding_box ().left (), word->bounding_box ().bottom (),
1208
+ repetition_spacing, current_gap, blanks);
1209
+ word->set_blanks (blanks);
1210
+ //NO uncertainty
1211
+ word->set_flag (W_FUZZY_SP, FALSE);
1212
+ word->set_flag (W_FUZZY_NON, FALSE);
1213
+ prev_x = word->bounding_box ().right ();
1214
+ if (rep_char_it.empty ()) {
1215
+ //at end of line
1216
+ word->set_flag (W_EOL, TRUE);
1217
+ xstarts[1] = prev_x;
1218
+ }
1219
+ else {
1220
+ rep_char_it.forward ();
1221
+ }
1222
+ }
1223
+ coeffs[0] = 0;
1224
+ coeffs[1] = row->line_m ();
1225
+ coeffs[2] = row->line_c ();
1226
+ real_row = new ROW (row,
1227
+ (inT16) row->kern_size, (inT16) row->space_size);
1228
+ word_it.set_to_list (real_row->word_list ());
1229
+ //put words in row
1230
+ word_it.add_list_after (&words);
1231
+ real_row->recalc_bounding_box ();
1232
+ if (tosp_debug_level > 9) {
1233
+ tprintf ("Row %d Made %d words in row ((%d,%d)(%d,%d))\n",
1234
+ row_count,
1235
+ word_count,
1236
+ real_row->bounding_box ().left (),
1237
+ real_row->bounding_box ().bottom (),
1238
+ real_row->bounding_box ().right (),
1239
+ real_row->bounding_box ().top ());
1240
+ }
1241
+ return real_row;
1242
+ }
1243
+ return NULL;
1244
+ }
1245
+
1246
+
1247
+ BOOL8 make_a_word_break( //decide on word break
1248
+ TO_ROW *row, //row being made
1249
+ TBOX blob_box, //for next_blob //how many blanks?
1250
+ inT16 prev_gap,
1251
+ TBOX prev_blob_box,
1252
+ inT16 real_current_gap,
1253
+ inT16 within_xht_current_gap,
1254
+ TBOX next_blob_box,
1255
+ inT16 next_gap,
1256
+ uinT8 &blanks,
1257
+ BOOL8 &fuzzy_sp,
1258
+ BOOL8 &fuzzy_non) {
1259
+ static BOOL8 prev_gap_was_a_space;
1260
+ BOOL8 space;
1261
+ inT16 current_gap;
1262
+ float fuzzy_sp_to_kn_limit;
1263
+
1264
+ /* Inhibit using the reduced gap if
1265
+ The kerning is large - chars are not kerned and reducing "f"s can cause
1266
+ erroneous blanks
1267
+ OR The real gap is less than 0
1268
+ OR The real gap is less than the kerning estimate
1269
+ */
1270
+ if ((row->kern_size > tosp_large_kerning * row->xheight) ||
1271
+ ((tosp_dont_fool_with_small_kerns >= 0) &&
1272
+ (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size)))
1273
+ //Ignore the difference
1274
+ within_xht_current_gap = real_current_gap;
1275
+
1276
+ if (tosp_use_xht_gaps && tosp_only_use_xht_gaps)
1277
+ current_gap = within_xht_current_gap;
1278
+ else
1279
+ current_gap = real_current_gap;
1280
+
1281
+ if (tosp_old_to_method) {
1282
+ //Boring old method
1283
+ space = current_gap > row->max_nonspace;
1284
+ if (space && (current_gap < MAX_INT16)) {
1285
+ if (current_gap < row->min_space) {
1286
+ if (current_gap > row->space_threshold) {
1287
+ blanks = 1;
1288
+ fuzzy_sp = TRUE;
1289
+ fuzzy_non = FALSE;
1290
+ }
1291
+ else {
1292
+ blanks = 0;
1293
+ fuzzy_sp = FALSE;
1294
+ fuzzy_non = TRUE;
1295
+ }
1296
+ }
1297
+ else {
1298
+ blanks = (uinT8) (current_gap / row->space_size);
1299
+ if (blanks < 1)
1300
+ blanks = 1;
1301
+ fuzzy_sp = FALSE;
1302
+ fuzzy_non = FALSE;
1303
+ }
1304
+ }
1305
+ return space;
1306
+ }
1307
+ else {
1308
+ /* New exciting heuristic method */
1309
+ if (prev_blob_box.null_box ())
1310
+ //Beginning of row
1311
+ prev_gap_was_a_space = TRUE;
1312
+
1313
+ //Default as old TO
1314
+ space = current_gap > row->space_threshold;
1315
+
1316
+ /* Set defaults for the word break incase we find one. Currently there are
1317
+ no fuzzy spaces. Depending on the reliability of the different heuristics
1318
+ we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY
1319
+ be used if the function returns TRUE - ie the word is to be broken.
1320
+ */
1321
+ blanks = (uinT8) (current_gap / row->space_size);
1322
+ if (blanks < 1)
1323
+ blanks = 1;
1324
+ fuzzy_sp = FALSE;
1325
+ fuzzy_non = FALSE;
1326
+ /*
1327
+ If xht measure causes gap to flip one of the 3 thresholds act accordingly -
1328
+ despite any other heuristics - the MINIMUM action is to pass a fuzzy kern to
1329
+ context.
1330
+ */
1331
+ if (tosp_use_xht_gaps &&
1332
+ (real_current_gap <= row->max_nonspace) &&
1333
+ (within_xht_current_gap > row->max_nonspace)) {
1334
+ space = TRUE;
1335
+ fuzzy_non = TRUE;
1336
+ #ifndef GRAPHICS_DISABLED
1337
+ mark_gap (blob_box, 20,
1338
+ prev_gap, prev_blob_box.width (),
1339
+ current_gap, next_blob_box.width (), next_gap);
1340
+ #endif
1341
+ }
1342
+ else if (tosp_use_xht_gaps &&
1343
+ (real_current_gap <= row->space_threshold) &&
1344
+ (within_xht_current_gap > row->space_threshold)) {
1345
+ space = TRUE;
1346
+ if (tosp_flip_fuzz_kn_to_sp)
1347
+ fuzzy_sp = TRUE;
1348
+ else
1349
+ fuzzy_non = TRUE;
1350
+ #ifndef GRAPHICS_DISABLED
1351
+ mark_gap (blob_box, 21,
1352
+ prev_gap, prev_blob_box.width (),
1353
+ current_gap, next_blob_box.width (), next_gap);
1354
+ #endif
1355
+ }
1356
+ else if (tosp_use_xht_gaps &&
1357
+ (real_current_gap < row->min_space) &&
1358
+ (within_xht_current_gap >= row->min_space)) {
1359
+ space = TRUE;
1360
+ #ifndef GRAPHICS_DISABLED
1361
+ mark_gap (blob_box, 22,
1362
+ prev_gap, prev_blob_box.width (),
1363
+ current_gap, next_blob_box.width (), next_gap);
1364
+ #endif
1365
+ }
1366
+ /* Now continue with normal heuristics */
1367
+ else if ((current_gap < row->min_space) &&
1368
+ (current_gap > row->space_threshold)) {
1369
+ /* Heuristics to turn dubious spaces to kerns */
1370
+ if (tosp_pass_wide_fuzz_sp_to_context > 0)
1371
+ fuzzy_sp_to_kn_limit = row->kern_size +
1372
+ tosp_pass_wide_fuzz_sp_to_context *
1373
+ (row->space_size - row->kern_size);
1374
+ else
1375
+ fuzzy_sp_to_kn_limit = 99999.0f;
1376
+
1377
+ /* If current gap is significantly smaller than the previous space the other
1378
+ side of a narrow blob then this gap is a kern. */
1379
+ if ((prev_blob_box.width () > 0) &&
1380
+ narrow_blob (row, prev_blob_box) &&
1381
+ prev_gap_was_a_space &&
1382
+ (current_gap <= tosp_gap_factor * prev_gap)) {
1383
+ if ((tosp_all_flips_fuzzy) ||
1384
+ (current_gap > fuzzy_sp_to_kn_limit)) {
1385
+ if (tosp_flip_fuzz_sp_to_kn)
1386
+ fuzzy_non = TRUE;
1387
+ else
1388
+ fuzzy_sp = TRUE;
1389
+ }
1390
+ else
1391
+ space = FALSE;
1392
+ #ifndef GRAPHICS_DISABLED
1393
+ mark_gap (blob_box, 1,
1394
+ prev_gap, prev_blob_box.width (),
1395
+ current_gap, next_blob_box.width (), next_gap);
1396
+ #endif
1397
+ }
1398
+ /* If current gap not much bigger than the previous kern the other side of a
1399
+ narrow blob then this gap is a kern as well */
1400
+ else if ((prev_blob_box.width () > 0) &&
1401
+ narrow_blob (row, prev_blob_box) &&
1402
+ !prev_gap_was_a_space &&
1403
+ (current_gap * tosp_gap_factor <= prev_gap)) {
1404
+ if ((tosp_all_flips_fuzzy) ||
1405
+ (current_gap > fuzzy_sp_to_kn_limit)) {
1406
+ if (tosp_flip_fuzz_sp_to_kn)
1407
+ fuzzy_non = TRUE;
1408
+ else
1409
+ fuzzy_sp = TRUE;
1410
+ }
1411
+ else
1412
+ space = FALSE;
1413
+ #ifndef GRAPHICS_DISABLED
1414
+ mark_gap (blob_box, 2,
1415
+ prev_gap, prev_blob_box.width (),
1416
+ current_gap, next_blob_box.width (), next_gap);
1417
+ #endif
1418
+ }
1419
+ else if ((next_blob_box.width () > 0) &&
1420
+ narrow_blob (row, next_blob_box) &&
1421
+ (next_gap > row->space_threshold) &&
1422
+ (current_gap <= tosp_gap_factor * next_gap)) {
1423
+ if ((tosp_all_flips_fuzzy) ||
1424
+ (current_gap > fuzzy_sp_to_kn_limit)) {
1425
+ if (tosp_flip_fuzz_sp_to_kn)
1426
+ fuzzy_non = TRUE;
1427
+ else
1428
+ fuzzy_sp = TRUE;
1429
+ }
1430
+ else
1431
+ space = FALSE;
1432
+ #ifndef GRAPHICS_DISABLED
1433
+ mark_gap (blob_box, 3,
1434
+ prev_gap, prev_blob_box.width (),
1435
+ current_gap, next_blob_box.width (), next_gap);
1436
+ #endif
1437
+ }
1438
+ else if ((next_blob_box.width () > 0) &&
1439
+ narrow_blob (row, next_blob_box) &&
1440
+ (next_gap <= row->space_threshold) &&
1441
+ (current_gap * tosp_gap_factor <= next_gap)) {
1442
+ if ((tosp_all_flips_fuzzy) ||
1443
+ (current_gap > fuzzy_sp_to_kn_limit)) {
1444
+ if (tosp_flip_fuzz_sp_to_kn)
1445
+ fuzzy_non = TRUE;
1446
+ else
1447
+ fuzzy_sp = TRUE;
1448
+ }
1449
+ else
1450
+ space = FALSE;
1451
+ #ifndef GRAPHICS_DISABLED
1452
+ mark_gap (blob_box, 4,
1453
+ prev_gap, prev_blob_box.width (),
1454
+ current_gap, next_blob_box.width (), next_gap);
1455
+ #endif
1456
+ }
1457
+ else if ((((next_blob_box.width () > 0) &&
1458
+ narrow_blob (row, next_blob_box)) ||
1459
+ ((prev_blob_box.width () > 0) &&
1460
+ narrow_blob (row, prev_blob_box)))) {
1461
+ fuzzy_sp = TRUE;
1462
+ #ifndef GRAPHICS_DISABLED
1463
+ mark_gap (blob_box, 6,
1464
+ prev_gap, prev_blob_box.width (),
1465
+ current_gap, next_blob_box.width (), next_gap);
1466
+ #endif
1467
+ }
1468
+ }
1469
+ else if ((current_gap > row->max_nonspace) &&
1470
+ (current_gap <= row->space_threshold)) {
1471
+
1472
+ /* Heuristics to turn dubious kerns to spaces */
1473
+ /* TRIED THIS BUT IT MADE THINGS WORSE
1474
+ if ( prev_gap == MAX_INT16 )
1475
+ prev_gap = 0; //start of row
1476
+ if ( next_gap == MAX_INT16 )
1477
+ next_gap = 0; //end of row
1478
+ */
1479
+ if ((prev_blob_box.width () > 0) &&
1480
+ (next_blob_box.width () > 0) &&
1481
+ (current_gap >=
1482
+ tosp_kern_gap_factor1 * MAX (prev_gap, next_gap)) &&
1483
+ wide_blob (row, prev_blob_box) &&
1484
+ wide_blob (row, next_blob_box)) {
1485
+
1486
+ space = TRUE;
1487
+ /*
1488
+ tosp_flip_caution is an attempt to stop the default changing in cases
1489
+ where there is a large difference between the kern and space estimates.
1490
+ See problem in 'chiefs' where "have" gets split in the quotation.
1491
+ */
1492
+ if ((tosp_flip_fuzz_kn_to_sp) &&
1493
+ ((tosp_flip_caution <= 0) ||
1494
+ (tosp_flip_caution * row->kern_size > row->space_size)))
1495
+ fuzzy_sp = TRUE;
1496
+ else
1497
+ fuzzy_non = TRUE;
1498
+ #ifndef GRAPHICS_DISABLED
1499
+ mark_gap (blob_box, 7,
1500
+ prev_gap, prev_blob_box.width (),
1501
+ current_gap, next_blob_box.width (), next_gap);
1502
+ #endif
1503
+ }
1504
+ else if ((prev_blob_box.width () > 0) &&
1505
+ (next_blob_box.width () > 0) &&
1506
+ (current_gap >=
1507
+ tosp_kern_gap_factor2 * MAX (prev_gap, next_gap)) &&
1508
+ !(narrow_blob (row, prev_blob_box) ||
1509
+ suspected_punct_blob (row, prev_blob_box)) &&
1510
+ !(narrow_blob (row, next_blob_box) ||
1511
+ suspected_punct_blob (row, next_blob_box))) {
1512
+ space = TRUE;
1513
+ fuzzy_non = TRUE;
1514
+ #ifndef GRAPHICS_DISABLED
1515
+ mark_gap (blob_box, 8,
1516
+ prev_gap, prev_blob_box.width (),
1517
+ current_gap, next_blob_box.width (), next_gap);
1518
+ #endif
1519
+ }
1520
+ else if ((tosp_kern_gap_factor3 > 0) &&
1521
+ (prev_blob_box.width () > 0) &&
1522
+ (next_blob_box.width () > 0) &&
1523
+ (current_gap >=
1524
+ tosp_kern_gap_factor3 * MAX (prev_gap, next_gap)) &&
1525
+ (!tosp_rule_9_test_punct ||
1526
+ (!suspected_punct_blob (row, prev_blob_box) &&
1527
+ !suspected_punct_blob (row, next_blob_box)))) {
1528
+ space = TRUE;
1529
+ fuzzy_non = TRUE;
1530
+ #ifndef GRAPHICS_DISABLED
1531
+ mark_gap (blob_box, 9,
1532
+ prev_gap, prev_blob_box.width (),
1533
+ current_gap, next_blob_box.width (), next_gap);
1534
+ #endif
1535
+ }
1536
+ }
1537
+ prev_gap_was_a_space = space && !(fuzzy_non);
1538
+ return space;
1539
+ }
1540
+ }
1541
+
1542
+
1543
+ BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box) {
1544
+ BOOL8 result;
1545
+
1546
+ result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) ||
1547
+ (((float) blob_box.width () / blob_box.height ()) <=
1548
+ tosp_narrow_aspect_ratio));
1549
+ return result;
1550
+ }
1551
+
1552
+
1553
+ BOOL8 wide_blob(TO_ROW *row, TBOX blob_box) {
1554
+ BOOL8 result;
1555
+
1556
+ if (tosp_wide_fraction > 0) {
1557
+ if (tosp_wide_aspect_ratio > 0)
1558
+ result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) &&
1559
+ (((float) blob_box.width () / blob_box.height ()) >
1560
+ tosp_wide_aspect_ratio));
1561
+ else
1562
+ result = (blob_box.width () >= tosp_wide_fraction * row->xheight);
1563
+ }
1564
+ else
1565
+ result = !narrow_blob (row, blob_box);
1566
+ return result;
1567
+ }
1568
+
1569
+
1570
+ BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box) {
1571
+ BOOL8 result;
1572
+ float baseline;
1573
+ float blob_x_centre;
1574
+
1575
+ /* Find baseline of centre of blob */
1576
+
1577
+ blob_x_centre = (box.right () + box.left ()) / 2.0;
1578
+ baseline = row->baseline.y (blob_x_centre);
1579
+
1580
+ result = (box.height () <= 0.66 * row->xheight) ||
1581
+ (box.top () < baseline + row->xheight / 2.0) ||
1582
+ (box.bottom () > baseline + row->xheight / 2.0);
1583
+ return result;
1584
+ }
1585
+
1586
+
1587
+ void peek_at_next_gap( //A COPY FOR PEEKING
1588
+ TO_ROW *row,
1589
+ BLOBNBOX_IT box_it,
1590
+ TBOX &next_blob_box,
1591
+ inT16 &next_gap,
1592
+ inT16 &next_within_xht_gap) {
1593
+ TBOX next_reduced_blob_box;
1594
+ TBOX bit_beyond;
1595
+ BLOBNBOX_IT reduced_box_it = box_it;
1596
+
1597
+ next_blob_box = box_next (&box_it);
1598
+ next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);
1599
+ if (box_it.at_first ()) {
1600
+ next_gap = MAX_INT16;
1601
+ next_within_xht_gap = MAX_INT16;
1602
+ }
1603
+ else {
1604
+ bit_beyond = box_it.data ()->bounding_box ();
1605
+ next_gap = bit_beyond.left () - next_blob_box.right ();
1606
+ bit_beyond = reduced_box_next (row, &reduced_box_it);
1607
+ next_within_xht_gap =
1608
+ bit_beyond.left () - next_reduced_blob_box.right ();
1609
+ }
1610
+ }
1611
+
1612
+
1613
+ #ifndef GRAPHICS_DISABLED
1614
+ void mark_gap( //Debug stuff
1615
+ TBOX blob, //blob following gap
1616
+ inT16 rule, // heuristic id
1617
+ inT16 prev_gap,
1618
+ inT16 prev_blob_width,
1619
+ inT16 current_gap,
1620
+ inT16 next_blob_width,
1621
+ inT16 next_gap) {
1622
+ ScrollView::Color col; //of ellipse marking flipped gap
1623
+
1624
+ switch (rule) {
1625
+ case 1:
1626
+ col = ScrollView::RED;
1627
+ break;
1628
+ case 2:
1629
+ col = ScrollView::CYAN;
1630
+ break;
1631
+ case 3:
1632
+ col = ScrollView::GREEN;
1633
+ break;
1634
+ case 4:
1635
+ col = ScrollView::BLACK;
1636
+ break;
1637
+ case 5:
1638
+ col = ScrollView::MAGENTA;
1639
+ break;
1640
+ case 6:
1641
+ col = ScrollView::BLUE;
1642
+ break;
1643
+
1644
+ case 7:
1645
+ col = ScrollView::WHITE;
1646
+ break;
1647
+ case 8:
1648
+ col = ScrollView::YELLOW;
1649
+ break;
1650
+ case 9:
1651
+ col = ScrollView::BLACK;
1652
+ break;
1653
+
1654
+ case 20:
1655
+ col = ScrollView::CYAN;
1656
+ break;
1657
+ case 21:
1658
+ col = ScrollView::GREEN;
1659
+ break;
1660
+ case 22:
1661
+ col = ScrollView::MAGENTA;
1662
+ break;
1663
+ default:
1664
+ col = ScrollView::BLACK;
1665
+ }
1666
+ if (textord_show_initial_words) {
1667
+ to_win->Pen(col);
1668
+ /* if (rule < 20)
1669
+ //interior_style(to_win, INT_SOLID, FALSE);
1670
+ else
1671
+ //interior_style(to_win, INT_HOLLOW, TRUE);*/
1672
+ //x radius
1673
+ to_win->Ellipse (current_gap / 2.0f,
1674
+ blob.height () / 2.0f, //y radius
1675
+ //x centre
1676
+ blob.left () - current_gap / 2.0f,
1677
+ //y centre
1678
+ blob.bottom () + blob.height () / 2.0f);
1679
+ }
1680
+ if (tosp_debug_level > 0)
1681
+ tprintf (" (%d,%d) Sp<->Kn Rule %d %d %d %d %d\n",
1682
+ blob.left () - current_gap / 2, blob.bottom (), rule,
1683
+ prev_gap, prev_blob_width, current_gap,
1684
+ next_blob_width, next_gap);
1685
+ }
1686
+ #endif
1687
+
1688
+
1689
+ float find_mean_blob_spacing(WERD *word) {
1690
+ PBLOB_IT blob_it;
1691
+ C_BLOB_IT cblob_it;
1692
+ TBOX blob_box;
1693
+ inT32 gap_sum = 0;
1694
+ inT16 gap_count = 0;
1695
+ inT16 prev_right;
1696
+
1697
+ if (word->flag (W_POLYGON)) {
1698
+ blob_it.set_to_list (word->blob_list ());
1699
+ if (!blob_it.empty ()) {
1700
+ blob_it.mark_cycle_pt ();
1701
+ prev_right = blob_it.data ()->bounding_box ().right ();
1702
+ //first blob
1703
+ blob_it.forward ();
1704
+ for (; !blob_it.cycled_list (); blob_it.forward ()) {
1705
+ blob_box = blob_it.data ()->bounding_box ();
1706
+ gap_sum += blob_box.left () - prev_right;
1707
+ gap_count++;
1708
+ prev_right = blob_box.right ();
1709
+ }
1710
+ }
1711
+ }
1712
+ else {
1713
+ cblob_it.set_to_list (word->cblob_list ());
1714
+ if (!cblob_it.empty ()) {
1715
+ cblob_it.mark_cycle_pt ();
1716
+ prev_right = cblob_it.data ()->bounding_box ().right ();
1717
+ //first blob
1718
+ cblob_it.forward ();
1719
+ for (; !cblob_it.cycled_list (); cblob_it.forward ()) {
1720
+ blob_box = cblob_it.data ()->bounding_box ();
1721
+ gap_sum += blob_box.left () - prev_right;
1722
+ gap_count++;
1723
+ prev_right = blob_box.right ();
1724
+ }
1725
+ }
1726
+ }
1727
+ if (gap_count > 0)
1728
+ return (gap_sum / (float) gap_count);
1729
+ else
1730
+ return 0.0f;
1731
+ }
1732
+
1733
+
1734
+ BOOL8 ignore_big_gap(TO_ROW *row,
1735
+ inT32 row_length,
1736
+ GAPMAP *gapmap,
1737
+ inT16 left,
1738
+ inT16 right) {
1739
+ inT16 gap = right - left + 1;
1740
+
1741
+ if (tosp_ignore_big_gaps > 999)
1742
+ return FALSE; //Dont ignore
1743
+ if (tosp_ignore_big_gaps > 0)
1744
+ return (gap > tosp_ignore_big_gaps * row->xheight);
1745
+ if (gap > tosp_ignore_very_big_gaps * row->xheight)
1746
+ return TRUE;
1747
+ if (tosp_ignore_big_gaps == 0) {
1748
+ if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight))
1749
+ return TRUE;
1750
+ if ((gap > 1.75 * row->xheight) &&
1751
+ ((row_length > 35 * row->xheight) ||
1752
+ gapmap->table_gap (left, right)))
1753
+ return TRUE;
1754
+ }
1755
+ else {
1756
+ /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */
1757
+ if ((gap > gapmap_big_gaps * row->xheight) &&
1758
+ gapmap->table_gap (left, right))
1759
+ return TRUE;
1760
+ }
1761
+ return FALSE;
1762
+ }
1763
+
1764
+
1765
+ /**********************************************************************
1766
+ * reduced_box_next
1767
+ *
1768
+ * Compute the bounding box of this blob with merging of x overlaps
1769
+ * but no pre-chopping.
1770
+ * Then move the iterator on to the start of the next blob.
1771
+ * DONT reduce the box for small things - eg punctuation.
1772
+ **********************************************************************/
1773
+
1774
+ TBOX reduced_box_next( //get bounding box
1775
+ TO_ROW *row, //current row
1776
+ BLOBNBOX_IT *it //iterator to blobds
1777
+ ) {
1778
+ BLOBNBOX *blob; //current blob
1779
+ BLOBNBOX *head_blob; //place to store box
1780
+ TBOX full_box; //full blob boundg box
1781
+ TBOX reduced_box; //box of significant part
1782
+ inT16 left_above_xht; //ABOVE xht left limit
1783
+ inT16 new_left_above_xht; //ABOVE xht left limit
1784
+
1785
+ blob = it->data ();
1786
+ if (blob->red_box_set ()) {
1787
+ reduced_box = blob->reduced_box ();
1788
+ do {
1789
+ it->forward ();
1790
+ blob = it->data ();
1791
+ }
1792
+ //until next real blob
1793
+ while ((blob->blob () == NULL && blob->cblob () == NULL) || blob->joined_to_prev ());
1794
+ return reduced_box;
1795
+ }
1796
+ head_blob = blob;
1797
+ full_box = blob->bounding_box ();
1798
+ reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);
1799
+ do {
1800
+ it->forward ();
1801
+ blob = it->data ();
1802
+ if (blob->blob () == NULL && blob->cblob () == NULL)
1803
+ //was pre-chopped
1804
+ full_box += blob->bounding_box ();
1805
+ else if (blob->joined_to_prev ()) {
1806
+ reduced_box +=
1807
+ reduced_box_for_blob(blob, row, &new_left_above_xht);
1808
+ left_above_xht = MIN (left_above_xht, new_left_above_xht);
1809
+ }
1810
+ }
1811
+ //until next real blob
1812
+ while ((blob->blob () == NULL && blob->cblob () == NULL) || blob->joined_to_prev ());
1813
+
1814
+ if ((reduced_box.width () > 0) &&
1815
+ ((reduced_box.left () + tosp_near_lh_edge * reduced_box.width ())
1816
+ < left_above_xht) && (reduced_box.height () > 0.7 * row->xheight)) {
1817
+ #ifndef GRAPHICS_DISABLED
1818
+ if (textord_show_initial_words)
1819
+ reduced_box.plot (to_win, ScrollView::YELLOW, ScrollView::YELLOW);
1820
+ #endif
1821
+ }
1822
+ else
1823
+ reduced_box = full_box;
1824
+ head_blob->set_reduced_box (reduced_box);
1825
+ return reduced_box;
1826
+ }
1827
+
1828
+
1829
+ /*************************************************************************
1830
+ * reduced_box_for_blob()
1831
+ * Find box for blob which is the same height and y position as the whole blob,
1832
+ * but whose left limit is the left most position of the blob ABOVE the
1833
+ * baseline and whose right limit is the right most position of the blob BELOW
1834
+ * the xheight.
1835
+ *
1836
+ *
1837
+ * !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
1838
+ * "home". Perhaps we need something which say if the width ABOVE the
1839
+ * xht alone includes the whole of the reduced width, then use the full
1840
+ * blob box - Might still fail on italic F
1841
+ *
1842
+ * Alternatively we could be a little less severe and only reduce the
1843
+ * left and right edges by half the difference between the full box and
1844
+ * the reduced box.
1845
+ *
1846
+ * NOTE that we need to rotate all the coordinates as
1847
+ * find_blob_limits finds the y min and max within a specified x band
1848
+ *************************************************************************/
1849
+
1850
+ TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht) {
1851
+ float baseline;
1852
+ float blob_x_centre;
1853
+ float left_limit;
1854
+ float right_limit;
1855
+ float junk;
1856
+ TBOX blob_box;
1857
+
1858
+ /* Find baseline of centre of blob */
1859
+
1860
+ blob_box = blob->bounding_box ();
1861
+ blob_x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
1862
+ baseline = row->baseline.y (blob_x_centre);
1863
+
1864
+ /*
1865
+ Find LH limit of blob ABOVE the xht. This is so that we can detect certain
1866
+ caps ht chars which should NOT have their box reduced: T, Y, V, W etc
1867
+ */
1868
+ left_limit = (float) MAX_INT32;
1869
+ junk = (float) -MAX_INT32;
1870
+ if (blob->blob () != NULL)
1871
+ //blob to test
1872
+ find_blob_limits (blob->blob (),
1873
+ (float) -MAX_INT16, //rotated lower limit
1874
+ -(baseline + 1.1 * row->xheight),
1875
+ //rotated upper limit
1876
+ FCOORD (0.0, 1.0), //90deg anticlock rot
1877
+ left_limit, junk); //min y max_y
1878
+ else
1879
+ //blob to test
1880
+ find_cblob_hlimits (blob->cblob (),
1881
+ //rotated lower limit
1882
+ (baseline + 1.1 * row->xheight), (float) MAX_INT16,
1883
+ //rotated upper limit
1884
+ // FCOORD( 0.0, 1.0 ), //90deg anticlock rot
1885
+ left_limit, junk); //min y max_y
1886
+ if (left_limit > junk)
1887
+ *left_above_xht = MAX_INT16; //No area above xht
1888
+ else
1889
+ *left_above_xht = (inT16) floor (left_limit);
1890
+ /*
1891
+ Find reduced LH limit of blob - the left extent of the region ABOVE the
1892
+ baseline.
1893
+ */
1894
+ left_limit = (float) MAX_INT32;
1895
+ junk = (float) -MAX_INT32;
1896
+ if (blob->blob () != NULL)
1897
+ //blob to test
1898
+ find_blob_limits (blob->blob (),
1899
+ (float) -MAX_INT16, //rotated lower limit
1900
+ -baseline, //rotated upper limit
1901
+ FCOORD (0.0, 1.0), //90deg anticlock rot
1902
+ left_limit, junk); //min y max_y
1903
+ else
1904
+ //blob to test
1905
+ find_cblob_hlimits (blob->cblob (),
1906
+ baseline, //rotated upper limit
1907
+ (float) MAX_INT16, //rotated lower limit
1908
+ // FCOORD( 0.0, 1.0 ), //90deg anticlock rot
1909
+ left_limit, junk); //min y max_y
1910
+
1911
+ if (left_limit > junk)
1912
+ return TBOX (); //no area within xht so return empty box
1913
+ /*
1914
+ Find reduced RH limit of blob - the right extent of the region BELOW the xht.
1915
+ */
1916
+ junk = (float) MAX_INT32;
1917
+ right_limit = (float) -MAX_INT32;
1918
+ if (blob->blob () != NULL)
1919
+ //blob to test
1920
+ find_blob_limits (blob->blob (),
1921
+ -(baseline + row->xheight),
1922
+ //rotated lower limit
1923
+ (float) MAX_INT16, //rotated upper limit
1924
+ FCOORD (0.0, 1.0), //90deg anticlock rot
1925
+ junk, right_limit); //min y max_y
1926
+ else
1927
+ //blob to test
1928
+ find_cblob_hlimits (blob->cblob (),
1929
+ (float) -MAX_INT16, //rotated upper limit
1930
+ (baseline + row->xheight),
1931
+ //rotated lower limit
1932
+ // FCOORD( 0.0, 1.0 ), //90deg anticlock rot
1933
+ junk, right_limit); //min y max_y
1934
+ if (junk > right_limit)
1935
+ return TBOX (); //no area within xht so return empty box
1936
+
1937
+ return TBOX (ICOORD ((inT16) floor (left_limit), blob_box.bottom ()),
1938
+ ICOORD ((inT16) ceil (right_limit), blob_box.top ()));
1939
+ }