tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,1939 @@
1
+ #include "mfcpch.h"
2
+ #include "tovars.h"
3
+ #include "drawtord.h"
4
+ #include "tospace.h"
5
+ #include "ndminx.h"
6
+ #include "statistc.h"
7
+
8
+ #define EXTERN
9
+ EXTERN BOOL_VAR (tosp_old_to_method, FALSE, "Space stats use prechopping?");
10
+ EXTERN BOOL_VAR (tosp_only_use_prop_rows, TRUE,
11
+ "Block stats to use fixed pitch rows?");
12
+ EXTERN BOOL_VAR (tosp_use_pre_chopping, FALSE,
13
+ "Space stats use prechopping?");
14
+ EXTERN BOOL_VAR (tosp_old_to_bug_fix, FALSE, "Fix suspected bug in old code");
15
+ EXTERN BOOL_VAR (tosp_block_use_cert_spaces, TRUE,
16
+ "Only stat OBVIOUS spaces");
17
+ EXTERN BOOL_VAR (tosp_row_use_cert_spaces, TRUE, "Only stat OBVIOUS spaces");
18
+ EXTERN BOOL_VAR (tosp_narrow_blobs_not_cert, TRUE,
19
+ "Only stat OBVIOUS spaces");
20
+ EXTERN BOOL_VAR (tosp_row_use_cert_spaces1, TRUE, "Only stat OBVIOUS spaces");
21
+ EXTERN BOOL_VAR (tosp_recovery_isolated_row_stats, TRUE,
22
+ "Use row alone when inadequate cert spaces");
23
+ EXTERN BOOL_VAR (tosp_only_small_gaps_for_kern, FALSE, "Better guess");
24
+ EXTERN BOOL_VAR (tosp_all_flips_fuzzy, FALSE, "Pass ANY flip to context?");
25
+ EXTERN BOOL_VAR (tosp_fuzzy_limit_all, TRUE,
26
+ "Dont restrict kn->sp fuzzy limit to tables");
27
+ EXTERN BOOL_VAR (tosp_stats_use_xht_gaps, TRUE,
28
+ "Use within xht gap for wd breaks");
29
+ EXTERN BOOL_VAR (tosp_use_xht_gaps, TRUE, "Use within xht gap for wd breaks");
30
+ EXTERN BOOL_VAR (tosp_only_use_xht_gaps, FALSE,
31
+ "Only use within xht gap for wd breaks");
32
+ EXTERN BOOL_VAR (tosp_rule_9_test_punct, FALSE,
33
+ "Dont chng kn to space next to punct");
34
+ EXTERN BOOL_VAR (tosp_flip_fuzz_kn_to_sp, TRUE, "Default flip");
35
+ EXTERN BOOL_VAR (tosp_flip_fuzz_sp_to_kn, TRUE, "Default flip");
36
+ EXTERN BOOL_VAR (tosp_improve_thresh, FALSE, "Enable improvement heuristic");
37
+ EXTERN INT_VAR (tosp_debug_level, 0, "Debug data");
38
+ EXTERN INT_VAR (tosp_enough_space_samples_for_median, 3,
39
+ "or should we use mean");
40
+ EXTERN INT_VAR (tosp_redo_kern_limit, 10,
41
+ "No.samples reqd to reestimate for row");
42
+ EXTERN INT_VAR (tosp_few_samples, 40,
43
+ "No.gaps reqd with 1 large gap to treat as a table");
44
+ EXTERN INT_VAR (tosp_short_row, 20,
45
+ "No.gaps reqd with few cert spaces to use certs");
46
+ EXTERN INT_VAR (tosp_sanity_method, 1, "How to avoid being silly");
47
+ EXTERN double_VAR (tosp_threshold_bias1, 0,
48
+ "how far between kern and space?");
49
+ EXTERN double_VAR (tosp_threshold_bias2, 0,
50
+ "how far between kern and space?");
51
+ EXTERN double_VAR (tosp_narrow_fraction, 0.3, "Fract of xheight for narrow");
52
+ EXTERN double_VAR (tosp_narrow_aspect_ratio, 0.48,
53
+ "narrow if w/h less than this");
54
+ EXTERN double_VAR (tosp_wide_fraction, 0.52, "Fract of xheight for wide");
55
+ EXTERN double_VAR (tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this");
56
+ EXTERN double_VAR (tosp_fuzzy_space_factor, 0.6,
57
+ "Fract of xheight for fuzz sp");
58
+ EXTERN double_VAR (tosp_fuzzy_space_factor1, 0.5,
59
+ "Fract of xheight for fuzz sp");
60
+ EXTERN double_VAR (tosp_fuzzy_space_factor2, 0.72,
61
+ "Fract of xheight for fuzz sp");
62
+ EXTERN double_VAR (tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");
63
+ EXTERN double_VAR (tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp");
64
+ EXTERN double_VAR (tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp");
65
+ EXTERN double_VAR (tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp");
66
+ EXTERN double_VAR (tosp_ignore_big_gaps, -1, "xht multiplier");
67
+ EXTERN double_VAR (tosp_ignore_very_big_gaps, 3.5, "xht multiplier");
68
+ EXTERN double_VAR (tosp_rep_space, 1.6, "rep gap multiplier for space");
69
+ EXTERN double_VAR (tosp_enough_small_gaps, 0.65,
70
+ "Fract of kerns reqd for isolated row stats");
71
+ EXTERN double_VAR (tosp_table_kn_sp_ratio, 2.25,
72
+ "Min difference of kn & sp in table");
73
+ EXTERN double_VAR (tosp_table_xht_sp_ratio, 0.33,
74
+ "Expect spaces bigger than this");
75
+ EXTERN double_VAR (tosp_table_fuzzy_kn_sp_ratio, 3.0,
76
+ "Fuzzy if less than this");
77
+ EXTERN double_VAR (tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
78
+ EXTERN double_VAR (tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
79
+ EXTERN double_VAR (tosp_min_sane_kn_sp, 1.5,
80
+ "Dont trust spaces less than this time kn");
81
+ EXTERN double_VAR (tosp_init_guess_kn_mult, 2.2,
82
+ "Thresh guess - mult kn by this");
83
+ EXTERN double_VAR (tosp_init_guess_xht_mult, 0.28,
84
+ "Thresh guess - mult xht by this");
85
+ EXTERN double_VAR (tosp_max_sane_kn_thresh, 5.0,
86
+ "Multiplier on kn to limit thresh");
87
+ EXTERN double_VAR (tosp_flip_caution, 0.0,
88
+ "Dont autoflip kn to sp when large separation");
89
+
90
+ EXTERN double_VAR (tosp_large_kerning, 0.19,
91
+ "Limit use of xht gap with large kns");
92
+ EXTERN double_VAR (tosp_dont_fool_with_small_kerns, -1,
93
+ "Limit use of xht gap with odd small kns");
94
+ EXTERN double_VAR (tosp_near_lh_edge, 0,
95
+ "Dont reduce box if the top left is non blank");
96
+ EXTERN double_VAR (tosp_silly_kn_sp_gap, 0.2,
97
+ "Dont let sp minus kn get too small");
98
+ EXTERN double_VAR (tosp_pass_wide_fuzz_sp_to_context, 0.75,
99
+ "How wide fuzzies need context");
100
+
101
+ #define MAXSPACING 128 /*max expected spacing in pix */
102
+ /**********************************************************************
103
+ * to_spacing
104
+ *
105
+ * Compute fuzzy word spacing thresholds for each row.
106
+ * I.e. set : max_nonspace
107
+ * space_threshold
108
+ * min_space
109
+ * kern_size
110
+ * space_size for each row.
111
+ * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE
112
+ **********************************************************************/
113
+
114
+ void to_spacing( //set spacing
115
+ ICOORD page_tr, //topright of page
116
+ TO_BLOCK_LIST *blocks //blocks on page
117
+ ) {
118
+ TO_BLOCK_IT block_it; //iterator
119
+ TO_BLOCK *block; //current block;
120
+ TO_ROW_IT row_it; //row iterator
121
+ TO_ROW *row; //current row
122
+ int block_index; //block number
123
+ int row_index; //row number
124
+ inT16 block_space_gap_width; //Estimated width of real spaces for whole block
125
+ //Estimate width ofnon space gaps for whole block
126
+ inT16 block_non_space_gap_width;
127
+ //Old fixed/prop result
128
+ BOOL8 old_text_ord_proportional;
129
+ GAPMAP *gapmap = NULL; //map of big vert gaps in blk
130
+
131
+ block_it.set_to_list (blocks);
132
+ block_index = 1;
133
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
134
+ block_it.forward ()) {
135
+ block = block_it.data ();
136
+ gapmap = new GAPMAP (block);
137
+ block_spacing_stats(block,
138
+ gapmap,
139
+ old_text_ord_proportional,
140
+ block_space_gap_width,
141
+ block_non_space_gap_width);
142
+ row_it.set_to_list (block->get_rows ());
143
+ row_index = 1;
144
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
145
+ row = row_it.data ();
146
+ if ((row->pitch_decision == PITCH_DEF_PROP) ||
147
+ (row->pitch_decision == PITCH_CORR_PROP)) {
148
+ if ((tosp_debug_level > 0) && !old_text_ord_proportional)
149
+ tprintf ("Block %d Row %d: Now Proportional\n",
150
+ block_index, row_index);
151
+ row_spacing_stats(row,
152
+ gapmap,
153
+ block_index,
154
+ row_index,
155
+ block_space_gap_width,
156
+ block_non_space_gap_width);
157
+ }
158
+ else {
159
+ if ((tosp_debug_level > 0) && old_text_ord_proportional)
160
+ tprintf
161
+ ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
162
+ block_index, row_index, row->pitch_decision,
163
+ row->fixed_pitch);
164
+ }
165
+ #ifndef GRAPHICS_DISABLED
166
+ if (textord_show_initial_words)
167
+ plot_word_decisions (to_win, (inT16) row->fixed_pitch, row);
168
+ #endif
169
+ row_index++;
170
+ }
171
+ delete gapmap;
172
+ block_index++;
173
+ }
174
+ }
175
+
176
+
177
+ /*************************************************************************
178
+ * block_spacing_stats()
179
+ *************************************************************************/
180
+
181
+ void block_spacing_stats( //DEBUG USE ONLY
182
+ TO_BLOCK *block,
183
+ GAPMAP *gapmap,
184
+ BOOL8 &old_text_ord_proportional,
185
+ inT16 &block_space_gap_width, //resulting estimate
186
+ inT16 &block_non_space_gap_width //resulting estimate
187
+ ) {
188
+ TO_ROW_IT row_it; //row iterator
189
+ TO_ROW *row; //current row
190
+ BLOBNBOX_IT blob_it; //iterator
191
+
192
+ STATS centre_to_centre_stats (0, MAXSPACING);
193
+ //DEBUG USE ONLY
194
+ STATS all_gap_stats (0, MAXSPACING);
195
+ STATS space_gap_stats (0, MAXSPACING);
196
+ inT16 minwidth = MAX_INT16; //narrowest blob
197
+ TBOX blob_box;
198
+ TBOX prev_blob_box;
199
+ inT16 centre_to_centre;
200
+ inT16 gap_width;
201
+ float real_space_threshold;
202
+ float iqr_centre_to_centre; //DEBUG USE ONLY
203
+ float iqr_all_gap_stats; //DEBUG USE ONLY
204
+ inT32 end_of_row;
205
+ inT32 row_length;
206
+
207
+ row_it.set_to_list (block->get_rows ());
208
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
209
+ row = row_it.data ();
210
+ if (!row->blob_list ()->empty () &&
211
+ (!tosp_only_use_prop_rows ||
212
+ (row->pitch_decision == PITCH_DEF_PROP) ||
213
+ (row->pitch_decision == PITCH_CORR_PROP))) {
214
+ blob_it.set_to_list (row->blob_list ());
215
+ blob_it.mark_cycle_pt ();
216
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
217
+ if (tosp_use_pre_chopping)
218
+ blob_box = box_next_pre_chopped (&blob_it);
219
+ else if (tosp_stats_use_xht_gaps)
220
+ blob_box = reduced_box_next (row, &blob_it);
221
+ else
222
+ blob_box = box_next (&blob_it);
223
+ row_length = end_of_row - blob_box.left ();
224
+ if (blob_box.width () < minwidth)
225
+ minwidth = blob_box.width ();
226
+ prev_blob_box = blob_box;
227
+ while (!blob_it.cycled_list ()) {
228
+ if (tosp_use_pre_chopping)
229
+ blob_box = box_next_pre_chopped (&blob_it);
230
+ else if (tosp_stats_use_xht_gaps)
231
+ blob_box = reduced_box_next (row, &blob_it);
232
+ else
233
+ blob_box = box_next (&blob_it);
234
+ if (blob_box.width () < minwidth)
235
+ minwidth = blob_box.width ();
236
+ gap_width = blob_box.left () - prev_blob_box.right ();
237
+ if (!ignore_big_gap (row, row_length, gapmap,
238
+ prev_blob_box.right (), blob_box.left ())) {
239
+ all_gap_stats.add (gap_width, 1);
240
+
241
+ centre_to_centre = (blob_box.left () + blob_box.right () -
242
+ (prev_blob_box.left () +
243
+ prev_blob_box.right ())) / 2;
244
+ //DEBUG
245
+ centre_to_centre_stats.add (centre_to_centre, 1);
246
+ // DEBUG
247
+ }
248
+ prev_blob_box = blob_box;
249
+ }
250
+ }
251
+ }
252
+
253
+ //Inadequate samples
254
+ if (all_gap_stats.get_total () <= 1) {
255
+ block_non_space_gap_width = minwidth;
256
+ block_space_gap_width = -1; //No est. space width
257
+ //DEBUG
258
+ old_text_ord_proportional = TRUE;
259
+ }
260
+ else {
261
+ /* For debug only ..... */
262
+ iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) -
263
+ centre_to_centre_stats.ile (0.25);
264
+ iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25);
265
+ old_text_ord_proportional =
266
+ iqr_centre_to_centre * 2 > iqr_all_gap_stats;
267
+ /* .......For debug only */
268
+
269
+ /*
270
+ The median of the gaps is used as an estimate of the NON-SPACE gap width.
271
+ This RELIES on the assumption that there are more gaps WITHIN words than
272
+ BETWEEN words in a block
273
+
274
+ Now try to estimate the width of a real space for all real spaces in the
275
+ block. Do this by using a crude threshold to ignore "narrow" gaps, then
276
+ find the median of the "wide" gaps and use this.
277
+ */
278
+ block_non_space_gap_width = (inT16) floor (all_gap_stats.median ());
279
+ // median gap
280
+
281
+ row_it.set_to_list (block->get_rows ());
282
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
283
+ row = row_it.data ();
284
+ if (!row->blob_list ()->empty () &&
285
+ (!tosp_only_use_prop_rows ||
286
+ (row->pitch_decision == PITCH_DEF_PROP) ||
287
+ (row->pitch_decision == PITCH_CORR_PROP))) {
288
+ real_space_threshold =
289
+ MAX (tosp_init_guess_kn_mult * block_non_space_gap_width,
290
+ tosp_init_guess_xht_mult * row->xheight);
291
+ blob_it.set_to_list (row->blob_list ());
292
+ blob_it.mark_cycle_pt ();
293
+ end_of_row =
294
+ blob_it.data_relative (-1)->bounding_box ().right ();
295
+ if (tosp_use_pre_chopping)
296
+ blob_box = box_next_pre_chopped (&blob_it);
297
+ else if (tosp_stats_use_xht_gaps)
298
+ blob_box = reduced_box_next (row, &blob_it);
299
+ else
300
+ blob_box = box_next (&blob_it);
301
+ row_length = blob_box.left () - end_of_row;
302
+ prev_blob_box = blob_box;
303
+ while (!blob_it.cycled_list ()) {
304
+ if (tosp_use_pre_chopping)
305
+ blob_box = box_next_pre_chopped (&blob_it);
306
+ else if (tosp_stats_use_xht_gaps)
307
+ blob_box = reduced_box_next (row, &blob_it);
308
+ else
309
+ blob_box = box_next (&blob_it);
310
+ gap_width = blob_box.left () - prev_blob_box.right ();
311
+ if ((gap_width > real_space_threshold) &&
312
+ !ignore_big_gap (row, row_length, gapmap,
313
+ prev_blob_box.right (),
314
+ blob_box.left ())) {
315
+ /*
316
+ If tosp_use_cert_spaces is enabled, the estimate of the space gap is
317
+ restricted to obvious spaces - those wider than half the xht or those
318
+ with wide blobs on both sides - i.e not things that are suspect 1's or
319
+ punctiation that is sometimes widely spaced.
320
+ */
321
+ if (!tosp_block_use_cert_spaces ||
322
+ (gap_width >
323
+ tosp_fuzzy_space_factor2 * row->xheight)
324
+ ||
325
+ ((gap_width >
326
+ tosp_fuzzy_space_factor1 * row->xheight)
327
+ && (!tosp_narrow_blobs_not_cert
328
+ || (!narrow_blob (row, prev_blob_box)
329
+ && !narrow_blob (row, blob_box))))
330
+ || (wide_blob (row, prev_blob_box)
331
+ && wide_blob (row, blob_box)))
332
+ space_gap_stats.add (gap_width, 1);
333
+ }
334
+ prev_blob_box = blob_box;
335
+ }
336
+ }
337
+ }
338
+ //Inadequate samples
339
+ if (space_gap_stats.get_total () <= 2)
340
+ block_space_gap_width = -1;//No est. space width
341
+ else
342
+ block_space_gap_width =
343
+ MAX ((inT16) floor (space_gap_stats.median ()),
344
+ 3 * block_non_space_gap_width);
345
+ }
346
+ }
347
+
348
+
349
+ /*************************************************************************
350
+ * row_spacing_stats()
351
+ * Set values for min_space, max_non_space based on row stats only
352
+ * If failure - return 0 values.
353
+ *************************************************************************/
354
+
355
+ void row_spacing_stats( //estimate for block
356
+ TO_ROW *row,
357
+ GAPMAP *gapmap,
358
+ inT16 block_idx,
359
+ inT16 row_idx,
360
+ inT16 block_space_gap_width,
361
+ inT16 block_non_space_gap_width //estimate for block
362
+ ) {
363
+ //iterator
364
+ BLOBNBOX_IT blob_it = row->blob_list ();
365
+ STATS all_gap_stats (0, MAXSPACING);
366
+ STATS cert_space_gap_stats (0, MAXSPACING);
367
+ STATS all_space_gap_stats (0, MAXSPACING);
368
+ STATS small_gap_stats (0, MAXSPACING);
369
+ TBOX blob_box;
370
+ TBOX prev_blob_box;
371
+ inT16 gap_width;
372
+ inT16 real_space_threshold = 0;
373
+ inT16 max = 0;
374
+ inT16 index;
375
+ inT16 large_gap_count = 0;
376
+ BOOL8 suspected_table;
377
+ inT32 max_max_nonspace; //upper bound
378
+ BOOL8 good_block_space_estimate = block_space_gap_width > 0;
379
+ inT32 end_of_row;
380
+ inT32 row_length = 0;
381
+ float sane_space;
382
+ inT32 sane_threshold;
383
+
384
+ /* Collect first pass stats for row */
385
+
386
+ if (!good_block_space_estimate)
387
+ block_space_gap_width = inT16 (floor (row->xheight / 2));
388
+ if (!row->blob_list ()->empty ()) {
389
+ if (tosp_threshold_bias1 > 0)
390
+ real_space_threshold =
391
+ block_non_space_gap_width +
392
+ inT16 (floor (0.5 +
393
+ tosp_threshold_bias1 * (block_space_gap_width -
394
+ block_non_space_gap_width)));
395
+ else
396
+ real_space_threshold = //Old TO method
397
+ (block_space_gap_width + block_non_space_gap_width) / 2;
398
+ blob_it.set_to_list (row->blob_list ());
399
+ blob_it.mark_cycle_pt ();
400
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
401
+ if (tosp_use_pre_chopping)
402
+ blob_box = box_next_pre_chopped (&blob_it);
403
+ else if (tosp_stats_use_xht_gaps)
404
+ blob_box = reduced_box_next (row, &blob_it);
405
+ else
406
+ blob_box = box_next (&blob_it);
407
+ row_length = end_of_row - blob_box.left ();
408
+ prev_blob_box = blob_box;
409
+ while (!blob_it.cycled_list ()) {
410
+ if (tosp_use_pre_chopping)
411
+ blob_box = box_next_pre_chopped (&blob_it);
412
+ else if (tosp_stats_use_xht_gaps)
413
+ blob_box = reduced_box_next (row, &blob_it);
414
+ else
415
+ blob_box = box_next (&blob_it);
416
+ gap_width = blob_box.left () - prev_blob_box.right ();
417
+ if (ignore_big_gap (row, row_length, gapmap,
418
+ prev_blob_box.right (), blob_box.left ()))
419
+ large_gap_count++;
420
+ else {
421
+ if (gap_width >= real_space_threshold) {
422
+ if (!tosp_row_use_cert_spaces ||
423
+ (gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
424
+ ((gap_width > tosp_fuzzy_space_factor1 * row->xheight)
425
+ && (!tosp_narrow_blobs_not_cert
426
+ || (!narrow_blob (row, prev_blob_box)
427
+ && !narrow_blob (row, blob_box))))
428
+ || (wide_blob (row, prev_blob_box)
429
+ && wide_blob (row, blob_box)))
430
+ cert_space_gap_stats.add (gap_width, 1);
431
+ all_space_gap_stats.add (gap_width, 1);
432
+ }
433
+ else
434
+ small_gap_stats.add (gap_width, 1);
435
+ all_gap_stats.add (gap_width, 1);
436
+ }
437
+ prev_blob_box = blob_box;
438
+ }
439
+ }
440
+ suspected_table = (large_gap_count > 1) ||
441
+ ((large_gap_count > 0) &&
442
+ (all_gap_stats.get_total () <= tosp_few_samples));
443
+
444
+ /* Now determine row kern size, space size and threshold */
445
+
446
+ if ((cert_space_gap_stats.get_total () >=
447
+ tosp_enough_space_samples_for_median) ||
448
+ ((suspected_table ||
449
+ all_gap_stats.get_total () <= tosp_short_row) &&
450
+ cert_space_gap_stats.get_total () > 0))
451
+ old_to_method(row,
452
+ &all_gap_stats,
453
+ &cert_space_gap_stats,
454
+ &small_gap_stats,
455
+ block_space_gap_width,
456
+ block_non_space_gap_width);
457
+ else {
458
+ if (!tosp_recovery_isolated_row_stats ||
459
+ !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,
460
+ block_idx, row_idx)) {
461
+ if (tosp_row_use_cert_spaces && (tosp_debug_level > 5))
462
+ tprintf ("B:%d R:%d -- Inadequate certain spaces.\n",
463
+ block_idx, row_idx);
464
+ if (tosp_row_use_cert_spaces1 && good_block_space_estimate) {
465
+ //Use block default
466
+ row->space_size = block_space_gap_width;
467
+ if (all_gap_stats.get_total () > tosp_redo_kern_limit)
468
+ row->kern_size = all_gap_stats.median ();
469
+ else
470
+ row->kern_size = block_non_space_gap_width;
471
+ row->space_threshold =
472
+ inT32 (floor ((row->space_size + row->kern_size) / 2));
473
+ }
474
+ else
475
+ old_to_method(row,
476
+ &all_gap_stats,
477
+ &all_space_gap_stats,
478
+ &small_gap_stats,
479
+ block_space_gap_width,
480
+ block_non_space_gap_width);
481
+ }
482
+ }
483
+
484
+ if (tosp_improve_thresh && !suspected_table)
485
+ improve_row_threshold(row, &all_gap_stats);
486
+
487
+ /* Now lets try to be careful not to do anything silly with tables when we
488
+ are ignoring big gaps*/
489
+ if (tosp_sanity_method == 0) {
490
+ if (suspected_table &&
491
+ (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) {
492
+ if (tosp_debug_level > 0)
493
+ tprintf ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f.\n",
494
+ block_idx, row_idx,
495
+ row->kern_size, row->space_threshold, row->space_size);
496
+ row->space_threshold =
497
+ (inT32) (tosp_table_kn_sp_ratio * row->kern_size);
498
+ row->space_size = MAX (row->space_threshold + 1, row->xheight);
499
+ }
500
+ }
501
+ else if (tosp_sanity_method == 1) {
502
+ sane_space = row->space_size;
503
+ /* NEVER let space size get too close to kern size */
504
+ if ((row->space_size < tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5))
505
+ || ((row->space_size - row->kern_size) <
506
+ (tosp_silly_kn_sp_gap * row->xheight))) {
507
+ if (good_block_space_estimate &&
508
+ (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size))
509
+ sane_space = block_space_gap_width;
510
+ else
511
+ sane_space =
512
+ MAX (tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5),
513
+ row->xheight / 2);
514
+ if (tosp_debug_level > 0)
515
+ tprintf
516
+ ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
517
+ block_idx, row_idx, row->kern_size, row->space_threshold,
518
+ row->space_size, sane_space);
519
+ row->space_size = sane_space;
520
+ row->space_threshold =
521
+ inT32 (floor ((row->space_size + row->kern_size) / 2));
522
+ }
523
+ /* NEVER let threshold get VERY far away from kern */
524
+ sane_threshold = inT32 (floor (tosp_max_sane_kn_thresh *
525
+ MAX (row->kern_size, 2.5)));
526
+ if (row->space_threshold > sane_threshold) {
527
+ if (tosp_debug_level > 0)
528
+ tprintf ("B:%d R:%d -- DONT BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
529
+ block_idx, row_idx,
530
+ row->kern_size,
531
+ row->space_threshold, row->space_size, sane_threshold);
532
+ row->space_threshold = sane_threshold;
533
+ if (row->space_size <= sane_threshold)
534
+ row->space_size = row->space_threshold + 1.0f;
535
+ }
536
+ /* Beware of tables - there may be NO spaces */
537
+ if (suspected_table) {
538
+ sane_space = MAX (tosp_table_kn_sp_ratio * row->kern_size,
539
+ tosp_table_xht_sp_ratio * row->xheight);
540
+ sane_threshold = inT32 (floor ((sane_space + row->kern_size) / 2));
541
+
542
+ if ((row->space_size < sane_space) ||
543
+ (row->space_threshold < sane_threshold)) {
544
+ if (tosp_debug_level > 0)
545
+ tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",
546
+ block_idx, row_idx,
547
+ row->kern_size,
548
+ row->space_threshold, row->space_size);
549
+ //the minimum sane value
550
+ row->space_threshold = (inT32) sane_space;
551
+ row->space_size = MAX (row->space_threshold + 1, row->xheight);
552
+ }
553
+ }
554
+ }
555
+
556
+ /* Now lets try to put some error limits on the threshold */
557
+
558
+ if (tosp_old_to_method) {
559
+ /* Old textord made a space if gap >= threshold */
560
+ //NO FUZZY SPACES YET
561
+ row->max_nonspace = row->space_threshold;
562
+ //NO FUZZY SPACES YET
563
+ row->min_space = row->space_threshold + 1;
564
+ }
565
+ else {
566
+ /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it:-) */
567
+ row->min_space =
568
+ MIN (inT32 (ceil (tosp_fuzzy_space_factor * row->xheight)),
569
+ inT32 (row->space_size));
570
+ if (row->min_space <= row->space_threshold)
571
+ //Dont be silly
572
+ row->min_space = row->space_threshold + 1;
573
+ /*
574
+ Lets try to guess the max certain kern gap by looking at the cluster of
575
+ kerns for the row. The row is proportional so the kerns should cluster
576
+ tightly at the bottom of the distribution. We also expect most gaps to be
577
+ kerns. Find the maximum of the kern piles between 0 and twice the kern
578
+ estimate. Piles before the first one with less than 1/10 the maximum
579
+ number of samples can be taken as certain kerns.
580
+
581
+ Of course, there are some cases where the kern peak and space peaks merge,
582
+ so we will put an UPPER limit on the max certain kern gap of some fraction
583
+ below the threshold.
584
+ */
585
+
586
+ max_max_nonspace = inT32 ((row->space_threshold + row->kern_size) / 2);
587
+
588
+ //default
589
+ row->max_nonspace = max_max_nonspace;
590
+ for (index = 0; index <= max_max_nonspace; index++) {
591
+ if (all_gap_stats.pile_count (index) > max)
592
+ max = all_gap_stats.pile_count (index);
593
+ if ((index > row->kern_size) &&
594
+ (all_gap_stats.pile_count (index) < 0.1 * max)) {
595
+ row->max_nonspace = index;
596
+ break;
597
+ }
598
+ }
599
+ }
600
+
601
+ /* Yet another algorithm - simpler this time - just choose a fraction of the
602
+ threshold to space range */
603
+
604
+ if ((tosp_fuzzy_sp_fraction > 0) &&
605
+ (row->space_size > row->space_threshold))
606
+ row->min_space = MAX (row->min_space,
607
+ (inT32) ceil (row->space_threshold +
608
+ tosp_fuzzy_sp_fraction *
609
+ (row->space_size -
610
+ row->space_threshold)));
611
+
612
+ /* Ensure that ANY space less than some multiplier times the kern size is
613
+ fuzzy. In tables there is a risk of erroneously setting a small space size
614
+ when there are no real spaces. Sometimes tables have text squashed into
615
+ columns so that the kn->sp ratio is small anyway - this means that we cant
616
+ use this to force a wider separation - hence we rely on context to join any
617
+ dubious breaks. */
618
+
619
+ if ((tosp_table_fuzzy_kn_sp_ratio > 0) &&
620
+ (suspected_table || tosp_fuzzy_limit_all))
621
+ row->min_space = MAX (row->min_space,
622
+ (inT32) ceil (tosp_table_fuzzy_kn_sp_ratio *
623
+ row->kern_size));
624
+
625
+ if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold))
626
+ row->max_nonspace = (inT32) floor (0.5 + row->kern_size +
627
+ tosp_fuzzy_kn_fraction *
628
+ (row->space_threshold -
629
+ row->kern_size));
630
+
631
+ if (row->max_nonspace > row->space_threshold)
632
+ //Dont be silly
633
+ row->max_nonspace = row->space_threshold;
634
+
635
+ if (tosp_debug_level > 5)
636
+ tprintf
637
+ ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",
638
+ block_idx, row_idx, row_length, block_non_space_gap_width,
639
+ block_space_gap_width, real_space_threshold, row->kern_size,
640
+ row->max_nonspace, row->space_threshold, row->min_space,
641
+ row->space_size);
642
+ }
643
+
644
+
645
+ void old_to_method( //estimate for block
646
+ TO_ROW *row,
647
+ STATS *all_gap_stats,
648
+ STATS *space_gap_stats,
649
+ STATS *small_gap_stats,
650
+ inT16 block_space_gap_width,
651
+ inT16 block_non_space_gap_width //estimate for block
652
+ ) {
653
+ /* Old to condition was > 2 */
654
+ if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) {
655
+ //Adequate samples
656
+ /* Set space size to median of spaces BUT limits it if it seems wildly out */
657
+ row->space_size = space_gap_stats->median ();
658
+ if (row->space_size > block_space_gap_width * 1.5) {
659
+ if (tosp_old_to_bug_fix)
660
+ row->space_size = block_space_gap_width * 1.5;
661
+ else
662
+ //BUG??? should be *1.5
663
+ row->space_size = block_space_gap_width;
664
+ }
665
+ if (row->space_size < (block_non_space_gap_width * 2) + 1)
666
+ row->space_size = (block_non_space_gap_width * 2) + 1;
667
+ }
668
+ //Only 1 or 2 samples
669
+ else if (space_gap_stats->get_total () >= 1) {
670
+ //hence mean not median
671
+ row->space_size = space_gap_stats->mean ();
672
+ if (row->space_size > block_space_gap_width * 1.5) {
673
+ if (tosp_old_to_bug_fix)
674
+ row->space_size = block_space_gap_width * 1.5;
675
+ else
676
+ //BUG??? should be *1.5
677
+ row->space_size = block_space_gap_width;
678
+ }
679
+ if (row->space_size < (block_non_space_gap_width * 3) + 1)
680
+ row->space_size = (block_non_space_gap_width * 3) + 1;
681
+ }
682
+ else
683
+ //Use block default
684
+ row->space_size = block_space_gap_width;
685
+
686
+ if ((tosp_only_small_gaps_for_kern) &&
687
+ (small_gap_stats->get_total () > tosp_redo_kern_limit))
688
+ row->kern_size = small_gap_stats->median ();
689
+ else if (all_gap_stats->get_total () > tosp_redo_kern_limit)
690
+ row->kern_size = all_gap_stats->median ();
691
+ else
692
+ //old TO -SAME FOR ALL ROWS
693
+ row->kern_size = block_non_space_gap_width;
694
+
695
+ if (tosp_threshold_bias2 > 0)
696
+ row->space_threshold =
697
+ inT32 (floor (0.5 + row->kern_size +
698
+ tosp_threshold_bias2 * (row->space_size -
699
+ row->kern_size)));
700
+ else
701
+ /*
702
+ NOTE old text ord uses (space_size + kern_size + 1)/2 as the threshold
703
+ and holds this in a float. The use is with a >= test
704
+ NEW textord uses an integer threshold and a > test
705
+ It comes to the same thing.
706
+ (Though there is a difference in that old textor has integer space_size
707
+ and kern_size.)
708
+ */
709
+ row->space_threshold =
710
+ inT32 (floor ((row->space_size + row->kern_size) / 2));
711
+ }
712
+
713
+
714
+ /*************************************************************************
715
+ * isolated_row_stats()
716
+ * Set values for min_space, max_non_space based on row stats only
717
+ *************************************************************************/
718
+
719
+ BOOL8 isolated_row_stats(TO_ROW *row,
720
+ GAPMAP *gapmap,
721
+ STATS *all_gap_stats,
722
+ BOOL8 suspected_table,
723
+ inT16 block_idx,
724
+ inT16 row_idx) {
725
+ float kern_estimate;
726
+ float crude_threshold_estimate;
727
+ inT16 small_gaps_count;
728
+ inT16 total;
729
+ //iterator
730
+ BLOBNBOX_IT blob_it = row->blob_list ();
731
+ STATS cert_space_gap_stats (0, MAXSPACING);
732
+ STATS all_space_gap_stats (0, MAXSPACING);
733
+ STATS small_gap_stats (0, MAXSPACING);
734
+ TBOX blob_box;
735
+ TBOX prev_blob_box;
736
+ inT16 gap_width;
737
+ inT32 end_of_row;
738
+ inT32 row_length;
739
+
740
+ kern_estimate = all_gap_stats->median ();
741
+ crude_threshold_estimate = MAX (tosp_init_guess_kn_mult * kern_estimate,
742
+ tosp_init_guess_xht_mult * row->xheight);
743
+ small_gaps_count = stats_count_under (all_gap_stats,
744
+ (inT16)
745
+ ceil (crude_threshold_estimate));
746
+ total = all_gap_stats->get_total ();
747
+
748
+ if ((total <= tosp_redo_kern_limit) ||
749
+ ((small_gaps_count / (float) total) < tosp_enough_small_gaps) ||
750
+ (total - small_gaps_count < 1)) {
751
+ if (tosp_debug_level > 5)
752
+ tprintf ("B:%d R:%d -- Cant do isolated row stats.\n",
753
+ block_idx, row_idx);
754
+ return FALSE;
755
+ }
756
+ blob_it.set_to_list (row->blob_list ());
757
+ blob_it.mark_cycle_pt ();
758
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
759
+ if (tosp_use_pre_chopping)
760
+ blob_box = box_next_pre_chopped (&blob_it);
761
+ else if (tosp_stats_use_xht_gaps)
762
+ blob_box = reduced_box_next (row, &blob_it);
763
+ else
764
+ blob_box = box_next (&blob_it);
765
+ row_length = end_of_row - blob_box.left ();
766
+ prev_blob_box = blob_box;
767
+ while (!blob_it.cycled_list ()) {
768
+ if (tosp_use_pre_chopping)
769
+ blob_box = box_next_pre_chopped (&blob_it);
770
+ else if (tosp_stats_use_xht_gaps)
771
+ blob_box = reduced_box_next (row, &blob_it);
772
+ else
773
+ blob_box = box_next (&blob_it);
774
+ gap_width = blob_box.left () - prev_blob_box.right ();
775
+ if (!ignore_big_gap (row, row_length, gapmap,
776
+ prev_blob_box.right (), blob_box.left ()) &&
777
+ (gap_width > crude_threshold_estimate)) {
778
+ if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
779
+ ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) &&
780
+ (!tosp_narrow_blobs_not_cert ||
781
+ (!narrow_blob (row, prev_blob_box) &&
782
+ !narrow_blob (row, blob_box)))) ||
783
+ (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))
784
+ cert_space_gap_stats.add (gap_width, 1);
785
+ all_space_gap_stats.add (gap_width, 1);
786
+ }
787
+ if (gap_width < crude_threshold_estimate)
788
+ small_gap_stats.add (gap_width, 1);
789
+
790
+ prev_blob_box = blob_box;
791
+ }
792
+ if (cert_space_gap_stats.get_total () >=
793
+ tosp_enough_space_samples_for_median)
794
+ //median
795
+ row->space_size = cert_space_gap_stats.median ();
796
+ else if (suspected_table && (cert_space_gap_stats.get_total () > 0))
797
+ //to avoid spaced
798
+ row->space_size = cert_space_gap_stats.mean ();
799
+ // 1's in tables
800
+ else if (all_space_gap_stats.get_total () >=
801
+ tosp_enough_space_samples_for_median)
802
+ //median
803
+ row->space_size = all_space_gap_stats.median ();
804
+ else
805
+ row->space_size = all_space_gap_stats.mean ();
806
+
807
+ if (tosp_only_small_gaps_for_kern)
808
+ row->kern_size = small_gap_stats.median ();
809
+ else
810
+ row->kern_size = all_gap_stats->median ();
811
+ row->space_threshold =
812
+ inT32 (floor ((row->space_size + row->kern_size) / 2));
813
+ /* Sanity check */
814
+ if ((row->kern_size >= row->space_threshold) ||
815
+ (row->space_threshold >= row->space_size) ||
816
+ (row->space_threshold <= 0)) {
817
+ if (tosp_debug_level > 0)
818
+ tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",
819
+ block_idx, row_idx,
820
+ row->kern_size, row->space_threshold, row->space_size);
821
+ row->kern_size = 0.0f;
822
+ row->space_threshold = 0;
823
+ row->space_size = 0.0f;
824
+ return FALSE;
825
+ }
826
+
827
+ if (tosp_debug_level > 5)
828
+ tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n",
829
+ block_idx, row_idx,
830
+ row->kern_size, row->space_threshold, row->space_size);
831
+ return TRUE;
832
+ }
833
+
834
+
835
+ inT16 stats_count_under(STATS *stats, inT16 threshold) {
836
+ inT16 index;
837
+ inT16 total = 0;
838
+
839
+ for (index = 0; index < threshold; index++)
840
+ total += stats->pile_count (index);
841
+ return total;
842
+ }
843
+
844
+
845
+ /*************************************************************************
846
+ * improve_row_threshold()
847
+ * Try to recognise a "normal line" -
848
+ * > 25 gaps
849
+ * && space > 3 * kn && space > 10
850
+ * (I.e. reasonably large space and kn:sp ratio)
851
+ * && > 3/4 # gaps < kn + (sp - kn)/3
852
+ * (I.e. most gaps are well away from space estimate)
853
+ * && a gap of max( 3, (sp - kn)/3 ) empty histogram positions is found
854
+ * somewhere in the histogram between kn and sp
855
+ * THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies
856
+ * NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!!
857
+ * try moving the default threshold to within this band but leave the
858
+ * fuzzy limit calculation as at present.
859
+ *************************************************************************/
860
+
861
+ void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) {
862
+ float sp = row->space_size;
863
+ float kn = row->kern_size;
864
+ inT16 reqd_zero_width = 0;
865
+ inT16 zero_width = 0;
866
+ inT16 zero_start = 0;
867
+ inT16 index = 0;
868
+
869
+ if (tosp_debug_level > 10)
870
+ tprintf ("Improve row threshold 0");
871
+ if ((all_gap_stats->get_total () <= 25) ||
872
+ (sp <= 10) ||
873
+ (sp <= 3 * kn) ||
874
+ (stats_count_under (all_gap_stats,
875
+ (inT16) ceil (kn + (sp - kn) / 3 + 0.5)) <
876
+ (0.75 * all_gap_stats->get_total ())))
877
+ return;
878
+ if (tosp_debug_level > 10)
879
+ tprintf (" 1");
880
+ /*
881
+ Look for the first region of all 0's in the histogram which is wider than
882
+ max( 3, (sp - kn)/3 ) and starts between kn and sp. If found, and current
883
+ threshold is not within it, move the threshold so that is is just inside it.
884
+ */
885
+ reqd_zero_width = (inT16) floor ((sp - kn) / 3 + 0.5);
886
+ if (reqd_zero_width < 3)
887
+ reqd_zero_width = 3;
888
+
889
+ for (index = inT16 (ceil (kn)); index < inT16 (floor (sp)); index++) {
890
+ if (all_gap_stats->pile_count (index) == 0) {
891
+ if (zero_width == 0)
892
+ zero_start = index;
893
+ zero_width++;
894
+ }
895
+ else {
896
+ if (zero_width >= reqd_zero_width)
897
+ break;
898
+ else {
899
+ zero_width = 0;
900
+ }
901
+ }
902
+ }
903
+ index--;
904
+ if (tosp_debug_level > 10)
905
+ tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",
906
+ reqd_zero_width, zero_width, zero_start, row->space_threshold);
907
+ if ((zero_width < reqd_zero_width) ||
908
+ ((row->space_threshold >= zero_start) &&
909
+ (row->space_threshold <= index)))
910
+ return;
911
+ if (tosp_debug_level > 10)
912
+ tprintf (" 2");
913
+ if (row->space_threshold < zero_start) {
914
+ if (tosp_debug_level > 5)
915
+ tprintf
916
+ ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
917
+ kn, sp, zero_start, index, row->space_threshold, zero_start);
918
+ row->space_threshold = zero_start;
919
+ }
920
+ if (row->space_threshold > index) {
921
+ if (tosp_debug_level > 5)
922
+ tprintf
923
+ ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
924
+ kn, sp, zero_start, index, row->space_threshold, index);
925
+ row->space_threshold = index;
926
+ }
927
+ }
928
+
929
+
930
+ /**********************************************************************
931
+ * make_prop_words
932
+ *
933
+ * Convert a TO_BLOCK to a BLOCK.
934
+ **********************************************************************/
935
+
936
+ ROW *make_prop_words( //find lines
937
+ TO_ROW *row, //row to make
938
+ FCOORD rotation //for drawing
939
+ ) {
940
+ BOOL8 bol; //start of line
941
+ /* prev_ values are for start of word being built. non prev_ values are for
942
+ the gap between the word being built and the next one. */
943
+ BOOL8 prev_fuzzy_sp; //probably space
944
+ BOOL8 prev_fuzzy_non; //probably not
945
+ uinT8 prev_blanks; //in front of word
946
+ BOOL8 fuzzy_sp; //probably space
947
+ BOOL8 fuzzy_non; //probably not
948
+ uinT8 blanks; //in front of word
949
+ ROW *real_row; //output row
950
+ OUTLINE_IT out_it; //outlines
951
+ C_OUTLINE_IT cout_it;
952
+ PBLOB_LIST blobs; //blobs in word
953
+ C_BLOB_LIST cblobs;
954
+ PBLOB_IT blob_it = &blobs; //iterator
955
+ C_BLOB_IT cblob_it = &cblobs;
956
+ WERD_LIST words;
957
+ WERD_IT word_it; //new words
958
+ WERD *word; //new word
959
+ WERD_IT rep_char_it; //repeated char words
960
+ inT32 next_rep_char_word_right = MAX_INT32;
961
+ float repetition_spacing; //gap between repetitions
962
+ inT32 xstarts[2]; //row ends
963
+ double coeffs[3]; //quadratic
964
+ inT32 prev_x; //end of prev blob
965
+ BLOBNBOX *bblob; //current blob
966
+ TBOX blob_box; //bounding box
967
+ BLOBNBOX_IT box_it; //iterator
968
+ TBOX prev_blob_box;
969
+ TBOX next_blob_box;
970
+ inT16 prev_gap = MAX_INT16;
971
+ inT16 current_gap = MAX_INT16;
972
+ inT16 next_gap = MAX_INT16;
973
+ inT16 prev_within_xht_gap = MAX_INT16;
974
+ inT16 current_within_xht_gap = MAX_INT16;
975
+ inT16 next_within_xht_gap = MAX_INT16;
976
+ inT16 word_count = 0;
977
+ static inT16 row_count = 0;
978
+
979
+ row_count++;
980
+ rep_char_it.set_to_list (&(row->rep_words));
981
+ if (!rep_char_it.empty ()) {
982
+ next_rep_char_word_right =
983
+ rep_char_it.data ()->bounding_box ().right ();
984
+ }
985
+
986
+ prev_x = -MAX_INT16;
987
+ blob_it.set_to_list (&blobs);
988
+ cblob_it.set_to_list (&cblobs);
989
+ box_it.set_to_list (row->blob_list ());
990
+ word_it.set_to_list (&words);
991
+ bol = TRUE;
992
+ prev_blanks = 0;
993
+ prev_fuzzy_sp = FALSE;
994
+ prev_fuzzy_non = FALSE;
995
+ if (!box_it.empty ()) {
996
+ xstarts[0] = box_it.data ()->bounding_box ().left ();
997
+ if (xstarts[0] > next_rep_char_word_right) {
998
+ /* We need to insert a repeated char word at the start of the row */
999
+ word = rep_char_it.extract ();
1000
+ word_it.add_after_then_move (word);
1001
+ /* Set spaces before repeated char word */
1002
+ word->set_flag (W_BOL, TRUE);
1003
+ bol = FALSE;
1004
+ word->set_blanks (0);
1005
+ //NO uncertainty
1006
+ word->set_flag (W_FUZZY_SP, FALSE);
1007
+ word->set_flag (W_FUZZY_NON, FALSE);
1008
+ xstarts[0] = word->bounding_box ().left ();
1009
+ /* Set spaces after repeated char word (and leave current word set) */
1010
+ repetition_spacing = find_mean_blob_spacing (word);
1011
+ current_gap = box_it.data ()->bounding_box ().left () -
1012
+ next_rep_char_word_right;
1013
+ current_within_xht_gap = current_gap;
1014
+ if (current_gap > tosp_rep_space * repetition_spacing) {
1015
+ prev_blanks = (uinT8) floor (current_gap / row->space_size);
1016
+ if (prev_blanks < 1)
1017
+ prev_blanks = 1;
1018
+ }
1019
+ else
1020
+ prev_blanks = 0;
1021
+ if (tosp_debug_level > 5)
1022
+ tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
1023
+ box_it.data ()->bounding_box ().left (),
1024
+ box_it.data ()->bounding_box ().bottom (),
1025
+ repetition_spacing, current_gap);
1026
+ prev_fuzzy_sp = FALSE;
1027
+ prev_fuzzy_non = FALSE;
1028
+ if (rep_char_it.empty ()) {
1029
+ next_rep_char_word_right = MAX_INT32;
1030
+ }
1031
+ else {
1032
+ rep_char_it.forward ();
1033
+ next_rep_char_word_right =
1034
+ rep_char_it.data ()->bounding_box ().right ();
1035
+ }
1036
+ }
1037
+
1038
+ peek_at_next_gap(row,
1039
+ box_it,
1040
+ next_blob_box,
1041
+ next_gap,
1042
+ next_within_xht_gap);
1043
+ do {
1044
+ bblob = box_it.data ();
1045
+ blob_box = bblob->bounding_box ();
1046
+ if (bblob->joined_to_prev ()) {
1047
+ if (bblob->blob () != NULL) {
1048
+ out_it.set_to_list (blob_it.data ()->out_list ());
1049
+ out_it.move_to_last ();
1050
+ out_it.add_list_after (bblob->blob ()->out_list ());
1051
+ delete bblob->blob ();
1052
+ }
1053
+ else if (bblob->cblob () != NULL) {
1054
+ cout_it.set_to_list (cblob_it.data ()->out_list ());
1055
+ cout_it.move_to_last ();
1056
+ cout_it.add_list_after (bblob->cblob ()->out_list ());
1057
+ delete bblob->cblob ();
1058
+ }
1059
+ }
1060
+ else {
1061
+ if (bblob->blob () != NULL)
1062
+ blob_it.add_after_then_move (bblob->blob ());
1063
+ else if (bblob->cblob () != NULL)
1064
+ cblob_it.add_after_then_move (bblob->cblob ());
1065
+ prev_x = blob_box.right ();
1066
+ }
1067
+ box_it.forward (); //next one
1068
+ bblob = box_it.data ();
1069
+ blob_box = bblob->bounding_box ();
1070
+
1071
+ if (!bblob->joined_to_prev () &&
1072
+ (bblob->blob () != NULL || bblob->cblob () != NULL)) {
1073
+ /* Real Blob - not multiple outlines or pre-chopped */
1074
+ prev_gap = current_gap;
1075
+ prev_within_xht_gap = current_within_xht_gap;
1076
+ prev_blob_box = next_blob_box;
1077
+ current_gap = next_gap;
1078
+ current_within_xht_gap = next_within_xht_gap;
1079
+ peek_at_next_gap(row,
1080
+ box_it,
1081
+ next_blob_box,
1082
+ next_gap,
1083
+ next_within_xht_gap);
1084
+
1085
+ if ((blob_box.left () > next_rep_char_word_right) ||
1086
+ (!tosp_only_use_xht_gaps &&
1087
+ make_a_word_break (row, blob_box, prev_gap, prev_blob_box,
1088
+ current_gap, current_within_xht_gap,
1089
+ next_blob_box, next_gap,
1090
+ blanks, fuzzy_sp, fuzzy_non)) ||
1091
+ (tosp_only_use_xht_gaps &&
1092
+ make_a_word_break (row, blob_box, prev_within_xht_gap,
1093
+ prev_blob_box,
1094
+ current_gap, current_within_xht_gap,
1095
+ next_blob_box, next_within_xht_gap,
1096
+ blanks, fuzzy_sp, fuzzy_non)) ||
1097
+ box_it.at_first ()) {
1098
+ /* Form a new word out of the blobs collected */
1099
+ if (!blob_it.empty ()) {
1100
+ word = new WERD (&blobs, prev_blanks, NULL);
1101
+ //make real word
1102
+ word_count++;
1103
+ }
1104
+ else {
1105
+ word = new WERD (&cblobs, prev_blanks, NULL);
1106
+ word_count++;
1107
+ }
1108
+ word_it.add_after_then_move (word);
1109
+ if (bol) {
1110
+ word->set_flag (W_BOL, TRUE);
1111
+ bol = FALSE;
1112
+ }
1113
+ if (prev_fuzzy_sp)
1114
+ //probably space
1115
+ word->set_flag (W_FUZZY_SP, TRUE);
1116
+ else if (prev_fuzzy_non)
1117
+ word->set_flag (W_FUZZY_NON, TRUE);
1118
+ //probably not
1119
+
1120
+ if (blob_box.left () > next_rep_char_word_right) {
1121
+ /* We need to insert a repeated char word */
1122
+ word = rep_char_it.extract ();
1123
+ word_it.add_after_then_move (word);
1124
+
1125
+ /* Set spaces before repeated char word */
1126
+ repetition_spacing = find_mean_blob_spacing (word);
1127
+ current_gap = word->bounding_box ().left () - prev_x;
1128
+ current_within_xht_gap = current_gap;
1129
+ if (current_gap > tosp_rep_space * repetition_spacing) {
1130
+ blanks =
1131
+ (uinT8) floor (current_gap / row->space_size);
1132
+ if (blanks < 1)
1133
+ blanks = 1;
1134
+ }
1135
+ else
1136
+ blanks = 0;
1137
+ if (tosp_debug_level > 5)
1138
+ tprintf
1139
+ ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1140
+ word->bounding_box ().left (),
1141
+ word->bounding_box ().bottom (),
1142
+ repetition_spacing, current_gap, blanks);
1143
+ word->set_blanks (blanks);
1144
+ //NO uncertainty
1145
+ word->set_flag (W_FUZZY_SP, FALSE);
1146
+ word->set_flag (W_FUZZY_NON, FALSE);
1147
+
1148
+ /* Set spaces after repeated char word (and leave current word set) */
1149
+ current_gap =
1150
+ blob_box.left () - next_rep_char_word_right;
1151
+ if (current_gap > tosp_rep_space * repetition_spacing) {
1152
+ blanks = (uinT8) (current_gap / row->space_size);
1153
+ if (blanks < 1)
1154
+ blanks = 1;
1155
+ }
1156
+ else
1157
+ blanks = 0;
1158
+ if (tosp_debug_level > 5)
1159
+ tprintf (" Rgap:%d (%d blanks)\n",
1160
+ current_gap, blanks);
1161
+ fuzzy_sp = FALSE;
1162
+ fuzzy_non = FALSE;
1163
+
1164
+ if (rep_char_it.empty ()) {
1165
+ next_rep_char_word_right = MAX_INT32;
1166
+ }
1167
+ else {
1168
+ rep_char_it.forward ();
1169
+ next_rep_char_word_right =
1170
+ rep_char_it.data ()->bounding_box ().right ();
1171
+ }
1172
+ }
1173
+
1174
+ if (box_it.at_first () && rep_char_it.empty ()) {
1175
+ //at end of line
1176
+ word->set_flag (W_EOL, TRUE);
1177
+ xstarts[1] = prev_x;
1178
+ }
1179
+ else {
1180
+ prev_blanks = blanks;
1181
+ prev_fuzzy_sp = fuzzy_sp;
1182
+ prev_fuzzy_non = fuzzy_non;
1183
+ }
1184
+ }
1185
+ }
1186
+ }
1187
+ while (!box_it.at_first ()); //until back at start
1188
+
1189
+ /* Insert any further repeated char words */
1190
+ while (!rep_char_it.empty ()) {
1191
+ word = rep_char_it.extract ();
1192
+ word_it.add_after_then_move (word);
1193
+
1194
+ /* Set spaces before repeated char word */
1195
+ repetition_spacing = find_mean_blob_spacing (word);
1196
+ current_gap = word->bounding_box ().left () - prev_x;
1197
+ if (current_gap > tosp_rep_space * repetition_spacing) {
1198
+ blanks = (uinT8) floor (current_gap / row->space_size);
1199
+ if (blanks < 1)
1200
+ blanks = 1;
1201
+ }
1202
+ else
1203
+ blanks = 0;
1204
+ if (tosp_debug_level > 5)
1205
+ tprintf
1206
+ ("Repch wd at EOL (%d,%d). rep spacing %d; Lgap:%d (%d blanks)\n",
1207
+ word->bounding_box ().left (), word->bounding_box ().bottom (),
1208
+ repetition_spacing, current_gap, blanks);
1209
+ word->set_blanks (blanks);
1210
+ //NO uncertainty
1211
+ word->set_flag (W_FUZZY_SP, FALSE);
1212
+ word->set_flag (W_FUZZY_NON, FALSE);
1213
+ prev_x = word->bounding_box ().right ();
1214
+ if (rep_char_it.empty ()) {
1215
+ //at end of line
1216
+ word->set_flag (W_EOL, TRUE);
1217
+ xstarts[1] = prev_x;
1218
+ }
1219
+ else {
1220
+ rep_char_it.forward ();
1221
+ }
1222
+ }
1223
+ coeffs[0] = 0;
1224
+ coeffs[1] = row->line_m ();
1225
+ coeffs[2] = row->line_c ();
1226
+ real_row = new ROW (row,
1227
+ (inT16) row->kern_size, (inT16) row->space_size);
1228
+ word_it.set_to_list (real_row->word_list ());
1229
+ //put words in row
1230
+ word_it.add_list_after (&words);
1231
+ real_row->recalc_bounding_box ();
1232
+ if (tosp_debug_level > 9) {
1233
+ tprintf ("Row %d Made %d words in row ((%d,%d)(%d,%d))\n",
1234
+ row_count,
1235
+ word_count,
1236
+ real_row->bounding_box ().left (),
1237
+ real_row->bounding_box ().bottom (),
1238
+ real_row->bounding_box ().right (),
1239
+ real_row->bounding_box ().top ());
1240
+ }
1241
+ return real_row;
1242
+ }
1243
+ return NULL;
1244
+ }
1245
+
1246
+
1247
+ BOOL8 make_a_word_break( //decide on word break
1248
+ TO_ROW *row, //row being made
1249
+ TBOX blob_box, //for next_blob //how many blanks?
1250
+ inT16 prev_gap,
1251
+ TBOX prev_blob_box,
1252
+ inT16 real_current_gap,
1253
+ inT16 within_xht_current_gap,
1254
+ TBOX next_blob_box,
1255
+ inT16 next_gap,
1256
+ uinT8 &blanks,
1257
+ BOOL8 &fuzzy_sp,
1258
+ BOOL8 &fuzzy_non) {
1259
+ static BOOL8 prev_gap_was_a_space;
1260
+ BOOL8 space;
1261
+ inT16 current_gap;
1262
+ float fuzzy_sp_to_kn_limit;
1263
+
1264
+ /* Inhibit using the reduced gap if
1265
+ The kerning is large - chars are not kerned and reducing "f"s can cause
1266
+ erroneous blanks
1267
+ OR The real gap is less than 0
1268
+ OR The real gap is less than the kerning estimate
1269
+ */
1270
+ if ((row->kern_size > tosp_large_kerning * row->xheight) ||
1271
+ ((tosp_dont_fool_with_small_kerns >= 0) &&
1272
+ (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size)))
1273
+ //Ignore the difference
1274
+ within_xht_current_gap = real_current_gap;
1275
+
1276
+ if (tosp_use_xht_gaps && tosp_only_use_xht_gaps)
1277
+ current_gap = within_xht_current_gap;
1278
+ else
1279
+ current_gap = real_current_gap;
1280
+
1281
+ if (tosp_old_to_method) {
1282
+ //Boring old method
1283
+ space = current_gap > row->max_nonspace;
1284
+ if (space && (current_gap < MAX_INT16)) {
1285
+ if (current_gap < row->min_space) {
1286
+ if (current_gap > row->space_threshold) {
1287
+ blanks = 1;
1288
+ fuzzy_sp = TRUE;
1289
+ fuzzy_non = FALSE;
1290
+ }
1291
+ else {
1292
+ blanks = 0;
1293
+ fuzzy_sp = FALSE;
1294
+ fuzzy_non = TRUE;
1295
+ }
1296
+ }
1297
+ else {
1298
+ blanks = (uinT8) (current_gap / row->space_size);
1299
+ if (blanks < 1)
1300
+ blanks = 1;
1301
+ fuzzy_sp = FALSE;
1302
+ fuzzy_non = FALSE;
1303
+ }
1304
+ }
1305
+ return space;
1306
+ }
1307
+ else {
1308
+ /* New exciting heuristic method */
1309
+ if (prev_blob_box.null_box ())
1310
+ //Beginning of row
1311
+ prev_gap_was_a_space = TRUE;
1312
+
1313
+ //Default as old TO
1314
+ space = current_gap > row->space_threshold;
1315
+
1316
+ /* Set defaults for the word break incase we find one. Currently there are
1317
+ no fuzzy spaces. Depending on the reliability of the different heuristics
1318
+ we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY
1319
+ be used if the function returns TRUE - ie the word is to be broken.
1320
+ */
1321
+ blanks = (uinT8) (current_gap / row->space_size);
1322
+ if (blanks < 1)
1323
+ blanks = 1;
1324
+ fuzzy_sp = FALSE;
1325
+ fuzzy_non = FALSE;
1326
+ /*
1327
+ If xht measure causes gap to flip one of the 3 thresholds act accordingly -
1328
+ despite any other heuristics - the MINIMUM action is to pass a fuzzy kern to
1329
+ context.
1330
+ */
1331
+ if (tosp_use_xht_gaps &&
1332
+ (real_current_gap <= row->max_nonspace) &&
1333
+ (within_xht_current_gap > row->max_nonspace)) {
1334
+ space = TRUE;
1335
+ fuzzy_non = TRUE;
1336
+ #ifndef GRAPHICS_DISABLED
1337
+ mark_gap (blob_box, 20,
1338
+ prev_gap, prev_blob_box.width (),
1339
+ current_gap, next_blob_box.width (), next_gap);
1340
+ #endif
1341
+ }
1342
+ else if (tosp_use_xht_gaps &&
1343
+ (real_current_gap <= row->space_threshold) &&
1344
+ (within_xht_current_gap > row->space_threshold)) {
1345
+ space = TRUE;
1346
+ if (tosp_flip_fuzz_kn_to_sp)
1347
+ fuzzy_sp = TRUE;
1348
+ else
1349
+ fuzzy_non = TRUE;
1350
+ #ifndef GRAPHICS_DISABLED
1351
+ mark_gap (blob_box, 21,
1352
+ prev_gap, prev_blob_box.width (),
1353
+ current_gap, next_blob_box.width (), next_gap);
1354
+ #endif
1355
+ }
1356
+ else if (tosp_use_xht_gaps &&
1357
+ (real_current_gap < row->min_space) &&
1358
+ (within_xht_current_gap >= row->min_space)) {
1359
+ space = TRUE;
1360
+ #ifndef GRAPHICS_DISABLED
1361
+ mark_gap (blob_box, 22,
1362
+ prev_gap, prev_blob_box.width (),
1363
+ current_gap, next_blob_box.width (), next_gap);
1364
+ #endif
1365
+ }
1366
+ /* Now continue with normal heuristics */
1367
+ else if ((current_gap < row->min_space) &&
1368
+ (current_gap > row->space_threshold)) {
1369
+ /* Heuristics to turn dubious spaces to kerns */
1370
+ if (tosp_pass_wide_fuzz_sp_to_context > 0)
1371
+ fuzzy_sp_to_kn_limit = row->kern_size +
1372
+ tosp_pass_wide_fuzz_sp_to_context *
1373
+ (row->space_size - row->kern_size);
1374
+ else
1375
+ fuzzy_sp_to_kn_limit = 99999.0f;
1376
+
1377
+ /* If current gap is significantly smaller than the previous space the other
1378
+ side of a narrow blob then this gap is a kern. */
1379
+ if ((prev_blob_box.width () > 0) &&
1380
+ narrow_blob (row, prev_blob_box) &&
1381
+ prev_gap_was_a_space &&
1382
+ (current_gap <= tosp_gap_factor * prev_gap)) {
1383
+ if ((tosp_all_flips_fuzzy) ||
1384
+ (current_gap > fuzzy_sp_to_kn_limit)) {
1385
+ if (tosp_flip_fuzz_sp_to_kn)
1386
+ fuzzy_non = TRUE;
1387
+ else
1388
+ fuzzy_sp = TRUE;
1389
+ }
1390
+ else
1391
+ space = FALSE;
1392
+ #ifndef GRAPHICS_DISABLED
1393
+ mark_gap (blob_box, 1,
1394
+ prev_gap, prev_blob_box.width (),
1395
+ current_gap, next_blob_box.width (), next_gap);
1396
+ #endif
1397
+ }
1398
+ /* If current gap not much bigger than the previous kern the other side of a
1399
+ narrow blob then this gap is a kern as well */
1400
+ else if ((prev_blob_box.width () > 0) &&
1401
+ narrow_blob (row, prev_blob_box) &&
1402
+ !prev_gap_was_a_space &&
1403
+ (current_gap * tosp_gap_factor <= prev_gap)) {
1404
+ if ((tosp_all_flips_fuzzy) ||
1405
+ (current_gap > fuzzy_sp_to_kn_limit)) {
1406
+ if (tosp_flip_fuzz_sp_to_kn)
1407
+ fuzzy_non = TRUE;
1408
+ else
1409
+ fuzzy_sp = TRUE;
1410
+ }
1411
+ else
1412
+ space = FALSE;
1413
+ #ifndef GRAPHICS_DISABLED
1414
+ mark_gap (blob_box, 2,
1415
+ prev_gap, prev_blob_box.width (),
1416
+ current_gap, next_blob_box.width (), next_gap);
1417
+ #endif
1418
+ }
1419
+ else if ((next_blob_box.width () > 0) &&
1420
+ narrow_blob (row, next_blob_box) &&
1421
+ (next_gap > row->space_threshold) &&
1422
+ (current_gap <= tosp_gap_factor * next_gap)) {
1423
+ if ((tosp_all_flips_fuzzy) ||
1424
+ (current_gap > fuzzy_sp_to_kn_limit)) {
1425
+ if (tosp_flip_fuzz_sp_to_kn)
1426
+ fuzzy_non = TRUE;
1427
+ else
1428
+ fuzzy_sp = TRUE;
1429
+ }
1430
+ else
1431
+ space = FALSE;
1432
+ #ifndef GRAPHICS_DISABLED
1433
+ mark_gap (blob_box, 3,
1434
+ prev_gap, prev_blob_box.width (),
1435
+ current_gap, next_blob_box.width (), next_gap);
1436
+ #endif
1437
+ }
1438
+ else if ((next_blob_box.width () > 0) &&
1439
+ narrow_blob (row, next_blob_box) &&
1440
+ (next_gap <= row->space_threshold) &&
1441
+ (current_gap * tosp_gap_factor <= next_gap)) {
1442
+ if ((tosp_all_flips_fuzzy) ||
1443
+ (current_gap > fuzzy_sp_to_kn_limit)) {
1444
+ if (tosp_flip_fuzz_sp_to_kn)
1445
+ fuzzy_non = TRUE;
1446
+ else
1447
+ fuzzy_sp = TRUE;
1448
+ }
1449
+ else
1450
+ space = FALSE;
1451
+ #ifndef GRAPHICS_DISABLED
1452
+ mark_gap (blob_box, 4,
1453
+ prev_gap, prev_blob_box.width (),
1454
+ current_gap, next_blob_box.width (), next_gap);
1455
+ #endif
1456
+ }
1457
+ else if ((((next_blob_box.width () > 0) &&
1458
+ narrow_blob (row, next_blob_box)) ||
1459
+ ((prev_blob_box.width () > 0) &&
1460
+ narrow_blob (row, prev_blob_box)))) {
1461
+ fuzzy_sp = TRUE;
1462
+ #ifndef GRAPHICS_DISABLED
1463
+ mark_gap (blob_box, 6,
1464
+ prev_gap, prev_blob_box.width (),
1465
+ current_gap, next_blob_box.width (), next_gap);
1466
+ #endif
1467
+ }
1468
+ }
1469
+ else if ((current_gap > row->max_nonspace) &&
1470
+ (current_gap <= row->space_threshold)) {
1471
+
1472
+ /* Heuristics to turn dubious kerns to spaces */
1473
+ /* TRIED THIS BUT IT MADE THINGS WORSE
1474
+ if ( prev_gap == MAX_INT16 )
1475
+ prev_gap = 0; //start of row
1476
+ if ( next_gap == MAX_INT16 )
1477
+ next_gap = 0; //end of row
1478
+ */
1479
+ if ((prev_blob_box.width () > 0) &&
1480
+ (next_blob_box.width () > 0) &&
1481
+ (current_gap >=
1482
+ tosp_kern_gap_factor1 * MAX (prev_gap, next_gap)) &&
1483
+ wide_blob (row, prev_blob_box) &&
1484
+ wide_blob (row, next_blob_box)) {
1485
+
1486
+ space = TRUE;
1487
+ /*
1488
+ tosp_flip_caution is an attempt to stop the default changing in cases
1489
+ where there is a large difference between the kern and space estimates.
1490
+ See problem in 'chiefs' where "have" gets split in the quotation.
1491
+ */
1492
+ if ((tosp_flip_fuzz_kn_to_sp) &&
1493
+ ((tosp_flip_caution <= 0) ||
1494
+ (tosp_flip_caution * row->kern_size > row->space_size)))
1495
+ fuzzy_sp = TRUE;
1496
+ else
1497
+ fuzzy_non = TRUE;
1498
+ #ifndef GRAPHICS_DISABLED
1499
+ mark_gap (blob_box, 7,
1500
+ prev_gap, prev_blob_box.width (),
1501
+ current_gap, next_blob_box.width (), next_gap);
1502
+ #endif
1503
+ }
1504
+ else if ((prev_blob_box.width () > 0) &&
1505
+ (next_blob_box.width () > 0) &&
1506
+ (current_gap >=
1507
+ tosp_kern_gap_factor2 * MAX (prev_gap, next_gap)) &&
1508
+ !(narrow_blob (row, prev_blob_box) ||
1509
+ suspected_punct_blob (row, prev_blob_box)) &&
1510
+ !(narrow_blob (row, next_blob_box) ||
1511
+ suspected_punct_blob (row, next_blob_box))) {
1512
+ space = TRUE;
1513
+ fuzzy_non = TRUE;
1514
+ #ifndef GRAPHICS_DISABLED
1515
+ mark_gap (blob_box, 8,
1516
+ prev_gap, prev_blob_box.width (),
1517
+ current_gap, next_blob_box.width (), next_gap);
1518
+ #endif
1519
+ }
1520
+ else if ((tosp_kern_gap_factor3 > 0) &&
1521
+ (prev_blob_box.width () > 0) &&
1522
+ (next_blob_box.width () > 0) &&
1523
+ (current_gap >=
1524
+ tosp_kern_gap_factor3 * MAX (prev_gap, next_gap)) &&
1525
+ (!tosp_rule_9_test_punct ||
1526
+ (!suspected_punct_blob (row, prev_blob_box) &&
1527
+ !suspected_punct_blob (row, next_blob_box)))) {
1528
+ space = TRUE;
1529
+ fuzzy_non = TRUE;
1530
+ #ifndef GRAPHICS_DISABLED
1531
+ mark_gap (blob_box, 9,
1532
+ prev_gap, prev_blob_box.width (),
1533
+ current_gap, next_blob_box.width (), next_gap);
1534
+ #endif
1535
+ }
1536
+ }
1537
+ prev_gap_was_a_space = space && !(fuzzy_non);
1538
+ return space;
1539
+ }
1540
+ }
1541
+
1542
+
1543
+ BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box) {
1544
+ BOOL8 result;
1545
+
1546
+ result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) ||
1547
+ (((float) blob_box.width () / blob_box.height ()) <=
1548
+ tosp_narrow_aspect_ratio));
1549
+ return result;
1550
+ }
1551
+
1552
+
1553
+ BOOL8 wide_blob(TO_ROW *row, TBOX blob_box) {
1554
+ BOOL8 result;
1555
+
1556
+ if (tosp_wide_fraction > 0) {
1557
+ if (tosp_wide_aspect_ratio > 0)
1558
+ result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) &&
1559
+ (((float) blob_box.width () / blob_box.height ()) >
1560
+ tosp_wide_aspect_ratio));
1561
+ else
1562
+ result = (blob_box.width () >= tosp_wide_fraction * row->xheight);
1563
+ }
1564
+ else
1565
+ result = !narrow_blob (row, blob_box);
1566
+ return result;
1567
+ }
1568
+
1569
+
1570
+ BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box) {
1571
+ BOOL8 result;
1572
+ float baseline;
1573
+ float blob_x_centre;
1574
+
1575
+ /* Find baseline of centre of blob */
1576
+
1577
+ blob_x_centre = (box.right () + box.left ()) / 2.0;
1578
+ baseline = row->baseline.y (blob_x_centre);
1579
+
1580
+ result = (box.height () <= 0.66 * row->xheight) ||
1581
+ (box.top () < baseline + row->xheight / 2.0) ||
1582
+ (box.bottom () > baseline + row->xheight / 2.0);
1583
+ return result;
1584
+ }
1585
+
1586
+
1587
+ void peek_at_next_gap( //A COPY FOR PEEKING
1588
+ TO_ROW *row,
1589
+ BLOBNBOX_IT box_it,
1590
+ TBOX &next_blob_box,
1591
+ inT16 &next_gap,
1592
+ inT16 &next_within_xht_gap) {
1593
+ TBOX next_reduced_blob_box;
1594
+ TBOX bit_beyond;
1595
+ BLOBNBOX_IT reduced_box_it = box_it;
1596
+
1597
+ next_blob_box = box_next (&box_it);
1598
+ next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);
1599
+ if (box_it.at_first ()) {
1600
+ next_gap = MAX_INT16;
1601
+ next_within_xht_gap = MAX_INT16;
1602
+ }
1603
+ else {
1604
+ bit_beyond = box_it.data ()->bounding_box ();
1605
+ next_gap = bit_beyond.left () - next_blob_box.right ();
1606
+ bit_beyond = reduced_box_next (row, &reduced_box_it);
1607
+ next_within_xht_gap =
1608
+ bit_beyond.left () - next_reduced_blob_box.right ();
1609
+ }
1610
+ }
1611
+
1612
+
1613
+ #ifndef GRAPHICS_DISABLED
1614
+ void mark_gap( //Debug stuff
1615
+ TBOX blob, //blob following gap
1616
+ inT16 rule, // heuristic id
1617
+ inT16 prev_gap,
1618
+ inT16 prev_blob_width,
1619
+ inT16 current_gap,
1620
+ inT16 next_blob_width,
1621
+ inT16 next_gap) {
1622
+ ScrollView::Color col; //of ellipse marking flipped gap
1623
+
1624
+ switch (rule) {
1625
+ case 1:
1626
+ col = ScrollView::RED;
1627
+ break;
1628
+ case 2:
1629
+ col = ScrollView::CYAN;
1630
+ break;
1631
+ case 3:
1632
+ col = ScrollView::GREEN;
1633
+ break;
1634
+ case 4:
1635
+ col = ScrollView::BLACK;
1636
+ break;
1637
+ case 5:
1638
+ col = ScrollView::MAGENTA;
1639
+ break;
1640
+ case 6:
1641
+ col = ScrollView::BLUE;
1642
+ break;
1643
+
1644
+ case 7:
1645
+ col = ScrollView::WHITE;
1646
+ break;
1647
+ case 8:
1648
+ col = ScrollView::YELLOW;
1649
+ break;
1650
+ case 9:
1651
+ col = ScrollView::BLACK;
1652
+ break;
1653
+
1654
+ case 20:
1655
+ col = ScrollView::CYAN;
1656
+ break;
1657
+ case 21:
1658
+ col = ScrollView::GREEN;
1659
+ break;
1660
+ case 22:
1661
+ col = ScrollView::MAGENTA;
1662
+ break;
1663
+ default:
1664
+ col = ScrollView::BLACK;
1665
+ }
1666
+ if (textord_show_initial_words) {
1667
+ to_win->Pen(col);
1668
+ /* if (rule < 20)
1669
+ //interior_style(to_win, INT_SOLID, FALSE);
1670
+ else
1671
+ //interior_style(to_win, INT_HOLLOW, TRUE);*/
1672
+ //x radius
1673
+ to_win->Ellipse (current_gap / 2.0f,
1674
+ blob.height () / 2.0f, //y radius
1675
+ //x centre
1676
+ blob.left () - current_gap / 2.0f,
1677
+ //y centre
1678
+ blob.bottom () + blob.height () / 2.0f);
1679
+ }
1680
+ if (tosp_debug_level > 0)
1681
+ tprintf (" (%d,%d) Sp<->Kn Rule %d %d %d %d %d\n",
1682
+ blob.left () - current_gap / 2, blob.bottom (), rule,
1683
+ prev_gap, prev_blob_width, current_gap,
1684
+ next_blob_width, next_gap);
1685
+ }
1686
+ #endif
1687
+
1688
+
1689
+ float find_mean_blob_spacing(WERD *word) {
1690
+ PBLOB_IT blob_it;
1691
+ C_BLOB_IT cblob_it;
1692
+ TBOX blob_box;
1693
+ inT32 gap_sum = 0;
1694
+ inT16 gap_count = 0;
1695
+ inT16 prev_right;
1696
+
1697
+ if (word->flag (W_POLYGON)) {
1698
+ blob_it.set_to_list (word->blob_list ());
1699
+ if (!blob_it.empty ()) {
1700
+ blob_it.mark_cycle_pt ();
1701
+ prev_right = blob_it.data ()->bounding_box ().right ();
1702
+ //first blob
1703
+ blob_it.forward ();
1704
+ for (; !blob_it.cycled_list (); blob_it.forward ()) {
1705
+ blob_box = blob_it.data ()->bounding_box ();
1706
+ gap_sum += blob_box.left () - prev_right;
1707
+ gap_count++;
1708
+ prev_right = blob_box.right ();
1709
+ }
1710
+ }
1711
+ }
1712
+ else {
1713
+ cblob_it.set_to_list (word->cblob_list ());
1714
+ if (!cblob_it.empty ()) {
1715
+ cblob_it.mark_cycle_pt ();
1716
+ prev_right = cblob_it.data ()->bounding_box ().right ();
1717
+ //first blob
1718
+ cblob_it.forward ();
1719
+ for (; !cblob_it.cycled_list (); cblob_it.forward ()) {
1720
+ blob_box = cblob_it.data ()->bounding_box ();
1721
+ gap_sum += blob_box.left () - prev_right;
1722
+ gap_count++;
1723
+ prev_right = blob_box.right ();
1724
+ }
1725
+ }
1726
+ }
1727
+ if (gap_count > 0)
1728
+ return (gap_sum / (float) gap_count);
1729
+ else
1730
+ return 0.0f;
1731
+ }
1732
+
1733
+
1734
+ BOOL8 ignore_big_gap(TO_ROW *row,
1735
+ inT32 row_length,
1736
+ GAPMAP *gapmap,
1737
+ inT16 left,
1738
+ inT16 right) {
1739
+ inT16 gap = right - left + 1;
1740
+
1741
+ if (tosp_ignore_big_gaps > 999)
1742
+ return FALSE; //Dont ignore
1743
+ if (tosp_ignore_big_gaps > 0)
1744
+ return (gap > tosp_ignore_big_gaps * row->xheight);
1745
+ if (gap > tosp_ignore_very_big_gaps * row->xheight)
1746
+ return TRUE;
1747
+ if (tosp_ignore_big_gaps == 0) {
1748
+ if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight))
1749
+ return TRUE;
1750
+ if ((gap > 1.75 * row->xheight) &&
1751
+ ((row_length > 35 * row->xheight) ||
1752
+ gapmap->table_gap (left, right)))
1753
+ return TRUE;
1754
+ }
1755
+ else {
1756
+ /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */
1757
+ if ((gap > gapmap_big_gaps * row->xheight) &&
1758
+ gapmap->table_gap (left, right))
1759
+ return TRUE;
1760
+ }
1761
+ return FALSE;
1762
+ }
1763
+
1764
+
1765
+ /**********************************************************************
1766
+ * reduced_box_next
1767
+ *
1768
+ * Compute the bounding box of this blob with merging of x overlaps
1769
+ * but no pre-chopping.
1770
+ * Then move the iterator on to the start of the next blob.
1771
+ * DONT reduce the box for small things - eg punctuation.
1772
+ **********************************************************************/
1773
+
1774
+ TBOX reduced_box_next( //get bounding box
1775
+ TO_ROW *row, //current row
1776
+ BLOBNBOX_IT *it //iterator to blobds
1777
+ ) {
1778
+ BLOBNBOX *blob; //current blob
1779
+ BLOBNBOX *head_blob; //place to store box
1780
+ TBOX full_box; //full blob boundg box
1781
+ TBOX reduced_box; //box of significant part
1782
+ inT16 left_above_xht; //ABOVE xht left limit
1783
+ inT16 new_left_above_xht; //ABOVE xht left limit
1784
+
1785
+ blob = it->data ();
1786
+ if (blob->red_box_set ()) {
1787
+ reduced_box = blob->reduced_box ();
1788
+ do {
1789
+ it->forward ();
1790
+ blob = it->data ();
1791
+ }
1792
+ //until next real blob
1793
+ while ((blob->blob () == NULL && blob->cblob () == NULL) || blob->joined_to_prev ());
1794
+ return reduced_box;
1795
+ }
1796
+ head_blob = blob;
1797
+ full_box = blob->bounding_box ();
1798
+ reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);
1799
+ do {
1800
+ it->forward ();
1801
+ blob = it->data ();
1802
+ if (blob->blob () == NULL && blob->cblob () == NULL)
1803
+ //was pre-chopped
1804
+ full_box += blob->bounding_box ();
1805
+ else if (blob->joined_to_prev ()) {
1806
+ reduced_box +=
1807
+ reduced_box_for_blob(blob, row, &new_left_above_xht);
1808
+ left_above_xht = MIN (left_above_xht, new_left_above_xht);
1809
+ }
1810
+ }
1811
+ //until next real blob
1812
+ while ((blob->blob () == NULL && blob->cblob () == NULL) || blob->joined_to_prev ());
1813
+
1814
+ if ((reduced_box.width () > 0) &&
1815
+ ((reduced_box.left () + tosp_near_lh_edge * reduced_box.width ())
1816
+ < left_above_xht) && (reduced_box.height () > 0.7 * row->xheight)) {
1817
+ #ifndef GRAPHICS_DISABLED
1818
+ if (textord_show_initial_words)
1819
+ reduced_box.plot (to_win, ScrollView::YELLOW, ScrollView::YELLOW);
1820
+ #endif
1821
+ }
1822
+ else
1823
+ reduced_box = full_box;
1824
+ head_blob->set_reduced_box (reduced_box);
1825
+ return reduced_box;
1826
+ }
1827
+
1828
+
1829
+ /*************************************************************************
1830
+ * reduced_box_for_blob()
1831
+ * Find box for blob which is the same height and y position as the whole blob,
1832
+ * but whose left limit is the left most position of the blob ABOVE the
1833
+ * baseline and whose right limit is the right most position of the blob BELOW
1834
+ * the xheight.
1835
+ *
1836
+ *
1837
+ * !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
1838
+ * "home". Perhaps we need something which say if the width ABOVE the
1839
+ * xht alone includes the whole of the reduced width, then use the full
1840
+ * blob box - Might still fail on italic F
1841
+ *
1842
+ * Alternatively we could be a little less severe and only reduce the
1843
+ * left and right edges by half the difference between the full box and
1844
+ * the reduced box.
1845
+ *
1846
+ * NOTE that we need to rotate all the coordinates as
1847
+ * find_blob_limits finds the y min and max within a specified x band
1848
+ *************************************************************************/
1849
+
1850
+ TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht) {
1851
+ float baseline;
1852
+ float blob_x_centre;
1853
+ float left_limit;
1854
+ float right_limit;
1855
+ float junk;
1856
+ TBOX blob_box;
1857
+
1858
+ /* Find baseline of centre of blob */
1859
+
1860
+ blob_box = blob->bounding_box ();
1861
+ blob_x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
1862
+ baseline = row->baseline.y (blob_x_centre);
1863
+
1864
+ /*
1865
+ Find LH limit of blob ABOVE the xht. This is so that we can detect certain
1866
+ caps ht chars which should NOT have their box reduced: T, Y, V, W etc
1867
+ */
1868
+ left_limit = (float) MAX_INT32;
1869
+ junk = (float) -MAX_INT32;
1870
+ if (blob->blob () != NULL)
1871
+ //blob to test
1872
+ find_blob_limits (blob->blob (),
1873
+ (float) -MAX_INT16, //rotated lower limit
1874
+ -(baseline + 1.1 * row->xheight),
1875
+ //rotated upper limit
1876
+ FCOORD (0.0, 1.0), //90deg anticlock rot
1877
+ left_limit, junk); //min y max_y
1878
+ else
1879
+ //blob to test
1880
+ find_cblob_hlimits (blob->cblob (),
1881
+ //rotated lower limit
1882
+ (baseline + 1.1 * row->xheight), (float) MAX_INT16,
1883
+ //rotated upper limit
1884
+ // FCOORD( 0.0, 1.0 ), //90deg anticlock rot
1885
+ left_limit, junk); //min y max_y
1886
+ if (left_limit > junk)
1887
+ *left_above_xht = MAX_INT16; //No area above xht
1888
+ else
1889
+ *left_above_xht = (inT16) floor (left_limit);
1890
+ /*
1891
+ Find reduced LH limit of blob - the left extent of the region ABOVE the
1892
+ baseline.
1893
+ */
1894
+ left_limit = (float) MAX_INT32;
1895
+ junk = (float) -MAX_INT32;
1896
+ if (blob->blob () != NULL)
1897
+ //blob to test
1898
+ find_blob_limits (blob->blob (),
1899
+ (float) -MAX_INT16, //rotated lower limit
1900
+ -baseline, //rotated upper limit
1901
+ FCOORD (0.0, 1.0), //90deg anticlock rot
1902
+ left_limit, junk); //min y max_y
1903
+ else
1904
+ //blob to test
1905
+ find_cblob_hlimits (blob->cblob (),
1906
+ baseline, //rotated upper limit
1907
+ (float) MAX_INT16, //rotated lower limit
1908
+ // FCOORD( 0.0, 1.0 ), //90deg anticlock rot
1909
+ left_limit, junk); //min y max_y
1910
+
1911
+ if (left_limit > junk)
1912
+ return TBOX (); //no area within xht so return empty box
1913
+ /*
1914
+ Find reduced RH limit of blob - the right extent of the region BELOW the xht.
1915
+ */
1916
+ junk = (float) MAX_INT32;
1917
+ right_limit = (float) -MAX_INT32;
1918
+ if (blob->blob () != NULL)
1919
+ //blob to test
1920
+ find_blob_limits (blob->blob (),
1921
+ -(baseline + row->xheight),
1922
+ //rotated lower limit
1923
+ (float) MAX_INT16, //rotated upper limit
1924
+ FCOORD (0.0, 1.0), //90deg anticlock rot
1925
+ junk, right_limit); //min y max_y
1926
+ else
1927
+ //blob to test
1928
+ find_cblob_hlimits (blob->cblob (),
1929
+ (float) -MAX_INT16, //rotated upper limit
1930
+ (baseline + row->xheight),
1931
+ //rotated lower limit
1932
+ // FCOORD( 0.0, 1.0 ), //90deg anticlock rot
1933
+ junk, right_limit); //min y max_y
1934
+ if (junk > right_limit)
1935
+ return TBOX (); //no area within xht so return empty box
1936
+
1937
+ return TBOX (ICOORD ((inT16) floor (left_limit), blob_box.bottom ()),
1938
+ ICOORD ((inT16) ceil (right_limit), blob_box.top ()));
1939
+ }