tesseract_bin 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,2019 @@
1
+ /**********************************************************************
2
+ * File: topitch.cpp (Formerly to_pitch.c)
3
+ * Description: Code to determine fixed pitchness and the pitch if fixed.
4
+ * Author: Ray Smith
5
+ * Created: Tue Aug 24 16:57:29 BST 1993
6
+ *
7
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #include "mfcpch.h"
21
+ #ifdef __UNIX__
22
+ #include <assert.h>
23
+ #endif
24
+ #include "stderr.h"
25
+ #include "blobbox.h"
26
+ #include "lmedsq.h"
27
+ #include "statistc.h"
28
+ #include "drawtord.h"
29
+ #include "makerow.h"
30
+ #include "pitsync1.h"
31
+ #include "pithsync.h"
32
+ #include "blobcmpl.h"
33
+ #include "tovars.h"
34
+ #include "wordseg.h"
35
+ #include "topitch.h"
36
+ #include "secname.h"
37
+
38
+ #define EXTERN
39
+
40
+ EXTERN BOOL_VAR (textord_all_prop, FALSE, "All doc is proportial text");
41
+ EXTERN BOOL_VAR (textord_debug_pitch_test, FALSE,
42
+ "Debug on fixed pitch test");
43
+ EXTERN BOOL_VAR (textord_disable_pitch_test, FALSE,
44
+ "Turn off dp fixed pitch algorithm");
45
+ EXTERN BOOL_VAR (textord_fast_pitch_test, FALSE,
46
+ "Do even faster pitch algorithm");
47
+ EXTERN BOOL_VAR (textord_debug_pitch_metric, FALSE,
48
+ "Write full metric stuff");
49
+ EXTERN BOOL_VAR (textord_show_row_cuts, FALSE, "Draw row-level cuts");
50
+ EXTERN BOOL_VAR (textord_show_page_cuts, FALSE, "Draw page-level cuts");
51
+ EXTERN BOOL_VAR (textord_pitch_cheat, FALSE,
52
+ "Use correct answer for fixed/prop");
53
+ EXTERN BOOL_VAR (textord_blockndoc_fixed, FALSE,
54
+ "Attempt whole doc/block fixed pitch");
55
+ EXTERN double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts");
56
+ EXTERN double_VAR (textord_balance_factor, 1.0,
57
+ "Ding rate for unbalanced char cells");
58
+ EXTERN double_VAR (textord_repch_width_variance, 0.2,
59
+ "Max width change of gap/blob");
60
+
61
+ #define FIXED_WIDTH_MULTIPLE 5
62
+ #define BLOCK_STATS_CLUSTERS 10
63
+ #define MAX_ALLOWED_PITCH 100 //max pixel pitch.
64
+
65
+ /**********************************************************************
66
+ * compute_fixed_pitch
67
+ *
68
+ * Decide whether each row is fixed pitch individually.
69
+ * Correlate definite and uncertain results to obtain an individual
70
+ * result for each row in the TO_ROW class.
71
+ **********************************************************************/
72
+
73
+ void compute_fixed_pitch( //determine pitch
74
+ ICOORD page_tr, //top right
75
+ TO_BLOCK_LIST *port_blocks, //input list
76
+ float gradient, //page skew
77
+ FCOORD rotation, //for drawing
78
+ BOOL8 testing_on //correct orientation
79
+ ) {
80
+ TO_BLOCK_IT block_it; //iterator
81
+ TO_BLOCK *block; //current block;
82
+ TO_ROW_IT row_it; //row iterator
83
+ TO_ROW *row; //current row
84
+ int block_index; //block number
85
+ int row_index; //row number
86
+
87
+ #ifndef GRAPHICS_DISABLED
88
+ if (textord_show_initial_words && testing_on) {
89
+ if (to_win == NULL)
90
+ create_to_win(page_tr);
91
+ }
92
+ #endif
93
+
94
+ block_it.set_to_list (port_blocks);
95
+ block_index = 1;
96
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
97
+ block_it.forward ()) {
98
+ block = block_it.data ();
99
+ compute_block_pitch(block, rotation, block_index, testing_on);
100
+ block_index++;
101
+ }
102
+
103
+ if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
104
+ block_index = 1;
105
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
106
+ block_it.forward ()) {
107
+ block = block_it.data ();
108
+ if (!try_block_fixed (block, block_index))
109
+ try_rows_fixed(block, block_index, testing_on);
110
+ block_index++;
111
+ }
112
+ }
113
+
114
+ block_index = 1;
115
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
116
+ block_it.forward ()) {
117
+ block = block_it.data ();
118
+ row_it.set_to_list (block->get_rows ());
119
+ row_index = 1;
120
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
121
+ row = row_it.data ();
122
+ fix_row_pitch(row, block, port_blocks, row_index, block_index);
123
+ row_index++;
124
+ }
125
+ if (testing_on
126
+ && ((textord_debug_pitch_test && block->block->text_region () != NULL)
127
+ || textord_blocksall_fixed || textord_blocksall_prop)) {
128
+ tprintf ("Corr:");
129
+ print_block_counts(block, block_index);
130
+ }
131
+ block_index++;
132
+ }
133
+ #ifndef GRAPHICS_DISABLED
134
+ if (textord_show_initial_words && testing_on) {
135
+ //overlap_picture_ops(TRUE);
136
+ ScrollView::Update();
137
+ }
138
+ #endif
139
+ }
140
+
141
+
142
+ /**********************************************************************
143
+ * fix_row_pitch
144
+ *
145
+ * Get a pitch_decision for this row by voting among similar rows in the
146
+ * block, then similar rows over all the page, or any other rows at all.
147
+ **********************************************************************/
148
+
149
+ void fix_row_pitch( //get some value
150
+ TO_ROW *bad_row, //row to fix
151
+ TO_BLOCK *bad_block, //block of bad_row
152
+ TO_BLOCK_LIST *blocks, //blocks to scan
153
+ inT32 row_target, //number of row
154
+ inT32 block_target //number of block
155
+ ) {
156
+ const char *res_string; //decision on line
157
+ inT16 mid_cuts;
158
+ int block_votes; //votes in block
159
+ int like_votes; //votes over page
160
+ int other_votes; //votes of unlike blocks
161
+ int block_index; //number of block
162
+ int row_index; //number of row
163
+ int maxwidth; //max pitch
164
+ TO_BLOCK_IT block_it = blocks; //block iterator
165
+ TO_ROW_IT row_it;
166
+ TO_BLOCK *block; //current block
167
+ TO_ROW *row; //current row
168
+ float sp_sd; //space deviation
169
+ STATS block_stats; //pitches in block
170
+ STATS like_stats; //pitches in page
171
+
172
+ block_votes = like_votes = other_votes = 0;
173
+ maxwidth = (inT32) ceil (bad_row->xheight * textord_words_maxspace);
174
+ if (bad_row->pitch_decision != PITCH_DEF_FIXED
175
+ && bad_row->pitch_decision != PITCH_DEF_PROP) {
176
+ block_stats.set_range (0, maxwidth);
177
+ like_stats.set_range (0, maxwidth);
178
+ block_index = 1;
179
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
180
+ block_it.forward ()) {
181
+ block = block_it.data ();
182
+ row_index = 1;
183
+ row_it.set_to_list (block->get_rows ());
184
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
185
+ row_it.forward ()) {
186
+ row = row_it.data ();
187
+ if ((bad_row->all_caps
188
+ && row->xheight + row->ascrise
189
+ <
190
+ (bad_row->xheight + bad_row->ascrise) * (1 +
191
+ textord_pitch_rowsimilarity)
192
+ && row->xheight + row->ascrise >
193
+ (bad_row->xheight + bad_row->ascrise) * (1 -
194
+ textord_pitch_rowsimilarity))
195
+ || (!bad_row->all_caps
196
+ && row->xheight <
197
+ bad_row->xheight * (1 + textord_pitch_rowsimilarity)
198
+ && row->xheight >
199
+ bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
200
+ if (block_index == block_target) {
201
+ if (row->pitch_decision == PITCH_DEF_FIXED) {
202
+ block_votes += textord_words_veto_power;
203
+ block_stats.add ((inT32) row->fixed_pitch,
204
+ textord_words_veto_power);
205
+ }
206
+ else if (row->pitch_decision == PITCH_MAYBE_FIXED
207
+ || row->pitch_decision == PITCH_CORR_FIXED) {
208
+ block_votes++;
209
+ block_stats.add ((inT32) row->fixed_pitch, 1);
210
+ }
211
+ else if (row->pitch_decision == PITCH_DEF_PROP)
212
+ block_votes -= textord_words_veto_power;
213
+ else if (row->pitch_decision == PITCH_MAYBE_PROP
214
+ || row->pitch_decision == PITCH_CORR_PROP)
215
+ block_votes--;
216
+ }
217
+ else {
218
+ if (row->pitch_decision == PITCH_DEF_FIXED) {
219
+ like_votes += textord_words_veto_power;
220
+ like_stats.add ((inT32) row->fixed_pitch,
221
+ textord_words_veto_power);
222
+ }
223
+ else if (row->pitch_decision == PITCH_MAYBE_FIXED
224
+ || row->pitch_decision == PITCH_CORR_FIXED) {
225
+ like_votes++;
226
+ like_stats.add ((inT32) row->fixed_pitch, 1);
227
+ }
228
+ else if (row->pitch_decision == PITCH_DEF_PROP)
229
+ like_votes -= textord_words_veto_power;
230
+ else if (row->pitch_decision == PITCH_MAYBE_PROP
231
+ || row->pitch_decision == PITCH_CORR_PROP)
232
+ like_votes--;
233
+ }
234
+ }
235
+ else {
236
+ if (row->pitch_decision == PITCH_DEF_FIXED)
237
+ other_votes += textord_words_veto_power;
238
+ else if (row->pitch_decision == PITCH_MAYBE_FIXED
239
+ || row->pitch_decision == PITCH_CORR_FIXED)
240
+ other_votes++;
241
+ else if (row->pitch_decision == PITCH_DEF_PROP)
242
+ other_votes -= textord_words_veto_power;
243
+ else if (row->pitch_decision == PITCH_MAYBE_PROP
244
+ || row->pitch_decision == PITCH_CORR_PROP)
245
+ other_votes--;
246
+ }
247
+ row_index++;
248
+ }
249
+ block_index++;
250
+ }
251
+ if (block_votes > textord_words_veto_power) {
252
+ bad_row->fixed_pitch = block_stats.ile (0.5);
253
+ bad_row->pitch_decision = PITCH_CORR_FIXED;
254
+ }
255
+ else if (block_votes <= textord_words_veto_power && like_votes > 0) {
256
+ bad_row->fixed_pitch = like_stats.ile (0.5);
257
+ bad_row->pitch_decision = PITCH_CORR_FIXED;
258
+ }
259
+ else {
260
+ bad_row->pitch_decision = PITCH_CORR_PROP;
261
+ #ifndef SECURE_NAMES
262
+ if (block_votes == 0 && like_votes == 0 && other_votes > 0
263
+ && (textord_debug_pitch_test || textord_debug_pitch_metric))
264
+ tprintf
265
+ ("Warning:row %d of block %d set prop with no like rows against trend\n",
266
+ row_target, block_target);
267
+ #endif
268
+ }
269
+ }
270
+ if (textord_debug_pitch_metric) {
271
+ tprintf (":b_votes=%d:l_votes=%d:o_votes=%d",
272
+ block_votes, like_votes, other_votes);
273
+ if (bad_row->pitch_decision == PITCH_CORR_PROP
274
+ || bad_row->pitch_decision == PITCH_DEF_PROP) {
275
+ res_string = bad_block->block->text_region () != NULL ?
276
+ (bad_block->block->text_region ()->
277
+ is_prop ()? "CP" : "WP") : "XP";
278
+ }
279
+ else {
280
+ res_string = bad_block->block->text_region () != NULL ?
281
+ (bad_block->block->text_region ()->
282
+ is_prop ()? "WF" : "CF") : "XF";
283
+ }
284
+ tprintf (":Blk=%d:Row=%d:%c:",
285
+ block_target, row_target,
286
+ bad_block->block->text_region () != NULL ?
287
+ (bad_block->block->text_region ()->
288
+ is_prop ()? 'P' : 'F') : 'X');
289
+ tprintf ("x=%g:asc=%g:corr_res=%s\n", bad_row->xheight,
290
+ bad_row->ascrise, res_string);
291
+ }
292
+ if (textord_pitch_cheat && bad_block->block->text_region () != NULL)
293
+ bad_row->pitch_decision =
294
+ bad_block->block->text_region ()->
295
+ is_prop ()? PITCH_CORR_PROP : PITCH_CORR_FIXED;
296
+ if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
297
+ if (bad_row->fixed_pitch < textord_min_xheight) {
298
+ if (block_votes > 0)
299
+ bad_row->fixed_pitch = block_stats.ile (0.5);
300
+ else if (block_votes == 0 && like_votes > 0)
301
+ bad_row->fixed_pitch = like_stats.ile (0.5);
302
+ else {
303
+ tprintf
304
+ ("Warning:guessing pitch as xheight on row %d, block %d\n",
305
+ row_target, block_target);
306
+ bad_row->fixed_pitch = bad_row->xheight;
307
+ }
308
+ }
309
+ if (bad_row->fixed_pitch < textord_min_xheight)
310
+ bad_row->fixed_pitch = (float) textord_min_xheight;
311
+ bad_row->kern_size = bad_row->fixed_pitch / 4;
312
+ bad_row->min_space = (inT32) (bad_row->fixed_pitch * 0.6);
313
+ bad_row->max_nonspace = (inT32) (bad_row->fixed_pitch * 0.4);
314
+ bad_row->space_threshold =
315
+ (bad_row->min_space + bad_row->max_nonspace) / 2;
316
+ bad_row->space_size = bad_row->fixed_pitch;
317
+ if (bad_row->char_cells.empty ())
318
+ tune_row_pitch (bad_row, &bad_row->projection,
319
+ bad_row->projection_left, bad_row->projection_right,
320
+ (bad_row->fixed_pitch +
321
+ bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
322
+ sp_sd, mid_cuts, &bad_row->char_cells, FALSE);
323
+ }
324
+ else if (bad_row->pitch_decision == PITCH_CORR_PROP
325
+ || bad_row->pitch_decision == PITCH_DEF_PROP) {
326
+ bad_row->fixed_pitch = 0.0f;
327
+ bad_row->char_cells.clear ();
328
+ }
329
+ }
330
+
331
+
332
+ /**********************************************************************
333
+ * compute_block_pitch
334
+ *
335
+ * Decide whether each block is fixed pitch individually.
336
+ **********************************************************************/
337
+
338
+ void compute_block_pitch( //process each block
339
+ TO_BLOCK *block, //input list
340
+ FCOORD rotation, //for drawing
341
+ inT32 block_index, //block number
342
+ BOOL8 testing_on //correct orientation
343
+ ) {
344
+ TBOX block_box; //bounding box
345
+
346
+ block_box = block->block->bounding_box ();
347
+ if (testing_on && textord_debug_pitch_test) {
348
+ tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
349
+ block_index,
350
+ block_box.left (), block_box.bottom (),
351
+ block_box.right (), block_box.top ());
352
+ }
353
+ block->min_space = (inT32) floor (block->xheight
354
+ * textord_words_default_minspace);
355
+ block->max_nonspace = (inT32) ceil (block->xheight
356
+ * textord_words_default_nonspace);
357
+ block->fixed_pitch = 0.0f;
358
+ block->space_size = (float) block->min_space;
359
+ block->kern_size = (float) block->max_nonspace;
360
+ block->pr_nonsp = block->xheight * words_default_prop_nonspace;
361
+ block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop;
362
+ if (!block->get_rows ()->empty ()) {
363
+ ASSERT_HOST (block->xheight > 0);
364
+ if (textord_repeat_extraction)
365
+ find_repeated_chars(block, textord_show_initial_words &&testing_on);
366
+ #ifndef GRAPHICS_DISABLED
367
+ if (textord_show_initial_words && testing_on)
368
+ //overlap_picture_ops(TRUE);
369
+ ScrollView::Update();
370
+ #endif
371
+ compute_rows_pitch(block,
372
+ block_index,
373
+ textord_debug_pitch_test &&testing_on);
374
+ }
375
+ }
376
+
377
+
378
+ /**********************************************************************
379
+ * compute_rows_pitch
380
+ *
381
+ * Decide whether each row is fixed pitch individually.
382
+ **********************************************************************/
383
+
384
+ BOOL8 compute_rows_pitch( //find line stats
385
+ TO_BLOCK *block, //block to do
386
+ inT32 block_index, //block number
387
+ BOOL8 testing_on //correct orientation
388
+ ) {
389
+ inT32 maxwidth; //of spaces
390
+ TO_ROW *row; //current row
391
+ inT32 row_index; //row number.
392
+ float lower, upper; //cluster thresholds
393
+ TO_ROW_IT row_it = block->get_rows ();
394
+
395
+ row_index = 1;
396
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
397
+ row = row_it.data ();
398
+ ASSERT_HOST (row->xheight > 0);
399
+ row->compute_vertical_projection ();
400
+ maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
401
+ if (row_pitch_stats (row, maxwidth, testing_on)
402
+ && find_row_pitch (row, maxwidth,
403
+ textord_dotmatrix_gap + 1, block, block_index,
404
+ row_index, testing_on)) {
405
+ if (row->fixed_pitch == 0) {
406
+ lower = row->pr_nonsp;
407
+ upper = row->pr_space;
408
+ row->space_size = upper;
409
+ row->kern_size = lower;
410
+ }
411
+ }
412
+ else {
413
+ row->fixed_pitch = 0.0f; //insufficient data
414
+ row->pitch_decision = PITCH_DUNNO;
415
+ }
416
+ row_index++;
417
+ }
418
+ return FALSE;
419
+ }
420
+
421
+
422
+ /**********************************************************************
423
+ * try_doc_fixed
424
+ *
425
+ * Attempt to call the entire document fixed pitch.
426
+ **********************************************************************/
427
+
428
+ BOOL8 try_doc_fixed( //determine pitch
429
+ ICOORD page_tr, //top right
430
+ TO_BLOCK_LIST *port_blocks, //input list
431
+ float gradient //page skew
432
+ ) {
433
+ inT16 master_x; //uniform shifts
434
+ inT16 pitch; //median pitch.
435
+ int x; //profile coord
436
+ int prop_blocks; //correct counts
437
+ int fixed_blocks;
438
+ int total_row_count; //total in page
439
+ //iterator
440
+ TO_BLOCK_IT block_it = port_blocks;
441
+ TO_BLOCK *block; //current block;
442
+ TO_ROW_IT row_it; //row iterator
443
+ TO_ROW *row; //current row
444
+ inT16 projection_left; //edges
445
+ inT16 projection_right;
446
+ inT16 row_left; //edges of row
447
+ inT16 row_right;
448
+ ICOORDELT_LIST *master_cells; //cells for page
449
+ float master_y; //uniform shifts
450
+ float shift_factor; //page skew correction
451
+ float row_shift; //shift for row
452
+ float final_pitch; //output pitch
453
+ float row_y; //baseline
454
+ STATS projection; //entire page
455
+ STATS pitches (0, MAX_ALLOWED_PITCH);
456
+ //for median
457
+ float sp_sd; //space sd
458
+ inT16 mid_cuts; //no of cheap cuts
459
+ float pitch_sd; //sync rating
460
+
461
+ if (block_it.empty ()
462
+ // || block_it.data()==block_it.data_relative(1)
463
+ || !textord_blockndoc_fixed)
464
+ return FALSE;
465
+ shift_factor = gradient / (gradient * gradient + 1);
466
+ row_it.set_to_list (block_it.data ()->get_rows ());
467
+ master_x = row_it.data ()->projection_left;
468
+ master_y = row_it.data ()->baseline.y (master_x);
469
+ projection_left = MAX_INT16;
470
+ projection_right = -MAX_INT16;
471
+ prop_blocks = 0;
472
+ fixed_blocks = 0;
473
+ total_row_count = 0;
474
+
475
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
476
+ block_it.forward ()) {
477
+ block = block_it.data ();
478
+ if (block->block->text_region () != NULL) {
479
+ if (block->block->text_region ()->is_prop ())
480
+ prop_blocks++;
481
+ else
482
+ fixed_blocks++;
483
+ }
484
+ row_it.set_to_list (block->get_rows ());
485
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
486
+ row = row_it.data ();
487
+ total_row_count++;
488
+ if (row->fixed_pitch > 0)
489
+ pitches.add ((inT32) (row->fixed_pitch), 1);
490
+ //find median
491
+ row_y = row->baseline.y (master_x);
492
+ row_left =
493
+ (inT16) (row->projection_left -
494
+ shift_factor * (master_y - row_y));
495
+ row_right =
496
+ (inT16) (row->projection_right -
497
+ shift_factor * (master_y - row_y));
498
+ if (row_left < projection_left)
499
+ projection_left = row_left;
500
+ if (row_right > projection_right)
501
+ projection_right = row_right;
502
+ }
503
+ }
504
+ if (pitches.get_total () == 0)
505
+ return FALSE;
506
+ projection.set_range (projection_left, projection_right);
507
+
508
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
509
+ block_it.forward ()) {
510
+ block = block_it.data ();
511
+ row_it.set_to_list (block->get_rows ());
512
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
513
+ row = row_it.data ();
514
+ row_y = row->baseline.y (master_x);
515
+ row_left =
516
+ (inT16) (row->projection_left -
517
+ shift_factor * (master_y - row_y));
518
+ for (x = row->projection_left; x < row->projection_right;
519
+ x++, row_left++) {
520
+ projection.add (row_left, row->projection.pile_count (x));
521
+ }
522
+ }
523
+ }
524
+
525
+ row_it.set_to_list (block_it.data ()->get_rows ());
526
+ row = row_it.data ();
527
+ #ifndef GRAPHICS_DISABLED
528
+ if (textord_show_page_cuts && to_win != NULL)
529
+ projection.plot (to_win, projection_left,
530
+ row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
531
+ #endif
532
+ final_pitch = pitches.ile (0.5);
533
+ pitch = (inT16) final_pitch;
534
+ pitch_sd =
535
+ tune_row_pitch (row, &projection, projection_left, projection_right,
536
+ pitch * 0.75, final_pitch, sp_sd, mid_cuts,
537
+ &row->char_cells, FALSE);
538
+
539
+ if (textord_debug_pitch_metric)
540
+ tprintf
541
+ ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
542
+ prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
543
+ pitch_sd / total_row_count, pitch_sd / pitch,
544
+ pitch_sd / total_row_count / pitch);
545
+
546
+ #ifndef GRAPHICS_DISABLED
547
+ if (textord_show_page_cuts && to_win != NULL) {
548
+ master_cells = &row->char_cells;
549
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
550
+ block_it.forward ()) {
551
+ block = block_it.data ();
552
+ row_it.set_to_list (block->get_rows ());
553
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
554
+ row_it.forward ()) {
555
+ row = row_it.data ();
556
+ row_y = row->baseline.y (master_x);
557
+ row_shift = shift_factor * (master_y - row_y);
558
+ plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
559
+ }
560
+ }
561
+ }
562
+ #endif
563
+ row->char_cells.clear ();
564
+ return FALSE;
565
+ }
566
+
567
+
568
+ /**********************************************************************
569
+ * try_block_fixed
570
+ *
571
+ * Try to call the entire block fixed.
572
+ **********************************************************************/
573
+
574
+ BOOL8 try_block_fixed( //find line stats
575
+ TO_BLOCK *block, //block to do
576
+ inT32 block_index //block number
577
+ ) {
578
+ return FALSE;
579
+ }
580
+
581
+
582
+ /**********************************************************************
583
+ * try_rows_fixed
584
+ *
585
+ * Decide whether each row is fixed pitch individually.
586
+ **********************************************************************/
587
+
588
+ BOOL8 try_rows_fixed( //find line stats
589
+ TO_BLOCK *block, //block to do
590
+ inT32 block_index, //block number
591
+ BOOL8 testing_on //correct orientation
592
+ ) {
593
+ inT32 maxwidth; //of spaces
594
+ TO_ROW *row; //current row
595
+ inT32 row_index; //row number.
596
+ inT32 def_fixed = 0; //counters
597
+ inT32 def_prop = 0;
598
+ inT32 maybe_fixed = 0;
599
+ inT32 maybe_prop = 0;
600
+ inT32 dunno = 0;
601
+ inT32 corr_fixed = 0;
602
+ inT32 corr_prop = 0;
603
+ float lower, upper; //cluster thresholds
604
+ TO_ROW_IT row_it = block->get_rows ();
605
+
606
+ row_index = 1;
607
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
608
+ row = row_it.data ();
609
+ ASSERT_HOST (row->xheight > 0);
610
+ maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
611
+ if (row->fixed_pitch > 0 && fixed_pitch_row (row, block_index)) {
612
+ if (row->fixed_pitch == 0) {
613
+ lower = row->pr_nonsp;
614
+ upper = row->pr_space;
615
+ row->space_size = upper;
616
+ row->kern_size = lower;
617
+ }
618
+ }
619
+ row_index++;
620
+ }
621
+ count_block_votes(block,
622
+ def_fixed,
623
+ def_prop,
624
+ maybe_fixed,
625
+ maybe_prop,
626
+ corr_fixed,
627
+ corr_prop,
628
+ dunno);
629
+ if (testing_on
630
+ && (textord_debug_pitch_test
631
+ || textord_blocksall_prop || textord_blocksall_fixed)) {
632
+ tprintf ("Initially:");
633
+ print_block_counts(block, block_index);
634
+ }
635
+ if (def_fixed > def_prop * textord_words_veto_power)
636
+ block->pitch_decision = PITCH_DEF_FIXED;
637
+ else if (def_prop > def_fixed * textord_words_veto_power)
638
+ block->pitch_decision = PITCH_DEF_PROP;
639
+ else if (def_fixed > 0 || def_prop > 0)
640
+ block->pitch_decision = PITCH_DUNNO;
641
+ else if (maybe_fixed > maybe_prop * textord_words_veto_power)
642
+ block->pitch_decision = PITCH_MAYBE_FIXED;
643
+ else if (maybe_prop > maybe_fixed * textord_words_veto_power)
644
+ block->pitch_decision = PITCH_MAYBE_PROP;
645
+ else
646
+ block->pitch_decision = PITCH_DUNNO;
647
+ return FALSE;
648
+ }
649
+
650
+
651
+ /**********************************************************************
652
+ * print_block_counts
653
+ *
654
+ * Count up how many rows have what decision and print the results.
655
+ **********************************************************************/
656
+
657
+ void print_block_counts( //find line stats
658
+ TO_BLOCK *block, //block to do
659
+ inT32 block_index //block number
660
+ ) {
661
+ inT32 def_fixed = 0; //counters
662
+ inT32 def_prop = 0;
663
+ inT32 maybe_fixed = 0;
664
+ inT32 maybe_prop = 0;
665
+ inT32 dunno = 0;
666
+ inT32 corr_fixed = 0;
667
+ inT32 corr_prop = 0;
668
+
669
+ count_block_votes(block,
670
+ def_fixed,
671
+ def_prop,
672
+ maybe_fixed,
673
+ maybe_prop,
674
+ corr_fixed,
675
+ corr_prop,
676
+ dunno);
677
+ tprintf ("Block %d has (%d,%d,%d)",
678
+ block_index, def_fixed, maybe_fixed, corr_fixed);
679
+ if ((textord_blocksall_prop
680
+ || (block->block->text_region () != NULL
681
+ && block->block->text_region ()->is_prop ())) && (def_fixed
682
+ || maybe_fixed
683
+ || corr_fixed))
684
+ tprintf (" (Wrongly)");
685
+ tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
686
+ if ((textord_blocksall_fixed
687
+ || (block->block->text_region () != NULL
688
+ && !block->block->text_region ()->is_prop ())) && (def_prop
689
+ || maybe_prop
690
+ || corr_prop))
691
+ tprintf (" (Wrongly)");
692
+ tprintf (" prop, %d dunno\n", dunno);
693
+ }
694
+
695
+
696
+ /**********************************************************************
697
+ * count_block_votes
698
+ *
699
+ * Count the number of rows in the block with each kind of pitch_decision.
700
+ **********************************************************************/
701
+
702
+ void count_block_votes( //find line stats
703
+ TO_BLOCK *block, //block to do
704
+ inT32 &def_fixed, //add to counts
705
+ inT32 &def_prop,
706
+ inT32 &maybe_fixed,
707
+ inT32 &maybe_prop,
708
+ inT32 &corr_fixed,
709
+ inT32 &corr_prop,
710
+ inT32 &dunno) {
711
+ TO_ROW *row; //current row
712
+ TO_ROW_IT row_it = block->get_rows ();
713
+
714
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
715
+ row = row_it.data ();
716
+ switch (row->pitch_decision) {
717
+ case PITCH_DUNNO:
718
+ dunno++;
719
+ break;
720
+ case PITCH_DEF_PROP:
721
+ def_prop++;
722
+ break;
723
+ case PITCH_MAYBE_PROP:
724
+ maybe_prop++;
725
+ break;
726
+ case PITCH_DEF_FIXED:
727
+ def_fixed++;
728
+ break;
729
+ case PITCH_MAYBE_FIXED:
730
+ maybe_fixed++;
731
+ break;
732
+ case PITCH_CORR_PROP:
733
+ corr_prop++;
734
+ break;
735
+ case PITCH_CORR_FIXED:
736
+ corr_fixed++;
737
+ break;
738
+ }
739
+ }
740
+ }
741
+
742
+
743
+ /**********************************************************************
744
+ * row_pitch_stats
745
+ *
746
+ * Decide whether each row is fixed pitch individually.
747
+ **********************************************************************/
748
+
749
+ BOOL8 row_pitch_stats( //find line stats
750
+ TO_ROW *row, //current row
751
+ inT32 maxwidth, //of spaces
752
+ BOOL8 testing_on //correct orientation
753
+ ) {
754
+ BLOBNBOX *blob; //current blob
755
+ int gap_index; //current gap
756
+ inT32 prev_x; //end of prev blob
757
+ inT32 cluster_count; //no of clusters
758
+ inT32 prev_count; //of clusters
759
+ inT32 smooth_factor; //for smoothing stats
760
+ TBOX blob_box; //bounding box
761
+ float lower, upper; //cluster thresholds
762
+ //gap sizes
763
+ float gaps[BLOCK_STATS_CLUSTERS];
764
+ //blobs
765
+ BLOBNBOX_IT blob_it = row->blob_list ();
766
+ STATS gap_stats (0, maxwidth);
767
+ STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
768
+ //clusters
769
+
770
+ smooth_factor =
771
+ (inT32) (row->xheight * textord_wordstats_smooth_factor + 1.5);
772
+ if (!blob_it.empty ()) {
773
+ prev_x = blob_it.data ()->bounding_box ().right ();
774
+ blob_it.forward ();
775
+ while (!blob_it.at_first ()) {
776
+ blob = blob_it.data ();
777
+ if (!blob->joined_to_prev ()) {
778
+ blob_box = blob->bounding_box ();
779
+ if (blob_box.left () - prev_x < maxwidth)
780
+ gap_stats.add (blob_box.left () - prev_x, 1);
781
+ prev_x = blob_box.right ();
782
+ }
783
+ blob_it.forward ();
784
+ }
785
+ }
786
+ if (gap_stats.get_total () == 0) {
787
+ return FALSE;
788
+ }
789
+ cluster_count = 0;
790
+ lower = row->xheight * words_initial_lower;
791
+ upper = row->xheight * words_initial_upper;
792
+ gap_stats.smooth (smooth_factor);
793
+ do {
794
+ prev_count = cluster_count;
795
+ cluster_count = gap_stats.cluster (lower, upper,
796
+ textord_spacesize_ratioprop,
797
+ BLOCK_STATS_CLUSTERS, cluster_stats);
798
+ }
799
+ while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
800
+ if (cluster_count < 1) {
801
+ return FALSE;
802
+ }
803
+ for (gap_index = 0; gap_index < cluster_count; gap_index++)
804
+ gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
805
+ //get medians
806
+ if (testing_on) {
807
+ tprintf ("cluster_count=%d:", cluster_count);
808
+ for (gap_index = 0; gap_index < cluster_count; gap_index++)
809
+ tprintf (" %g(%d)", gaps[gap_index],
810
+ cluster_stats[gap_index + 1].get_total ());
811
+ tprintf ("\n");
812
+ }
813
+ qsort (gaps, cluster_count, sizeof (float), sort_floats2);
814
+
815
+ //Try to find proportional non-space and space for row.
816
+ lower = row->xheight * words_default_prop_nonspace;
817
+ upper = row->xheight * textord_words_min_minspace;
818
+ for (gap_index = 0; gap_index < cluster_count
819
+ && gaps[gap_index] < lower; gap_index++);
820
+ if (gap_index == 0) {
821
+ if (testing_on)
822
+ tprintf ("No clusters below nonspace threshold!!\n");
823
+ if (cluster_count > 1) {
824
+ row->pr_nonsp = gaps[0];
825
+ row->pr_space = gaps[1];
826
+ }
827
+ else {
828
+ row->pr_nonsp = lower;
829
+ row->pr_space = gaps[0];
830
+ }
831
+ }
832
+ else {
833
+ row->pr_nonsp = gaps[gap_index - 1];
834
+ while (gap_index < cluster_count && gaps[gap_index] < upper)
835
+ gap_index++;
836
+ if (gap_index == cluster_count) {
837
+ if (testing_on)
838
+ tprintf ("No clusters above nonspace threshold!!\n");
839
+ row->pr_space = lower * textord_spacesize_ratioprop;
840
+ }
841
+ else
842
+ row->pr_space = gaps[gap_index];
843
+ }
844
+
845
+ //Now try to find the fixed pitch space and non-space.
846
+ upper = row->xheight * words_default_fixed_space;
847
+ for (gap_index = 0; gap_index < cluster_count
848
+ && gaps[gap_index] < upper; gap_index++);
849
+ if (gap_index == 0) {
850
+ if (testing_on)
851
+ tprintf ("No clusters below space threshold!!\n");
852
+ row->fp_nonsp = upper;
853
+ row->fp_space = gaps[0];
854
+ }
855
+ else {
856
+ row->fp_nonsp = gaps[gap_index - 1];
857
+ if (gap_index == cluster_count) {
858
+ if (testing_on)
859
+ tprintf ("No clusters above space threshold!!\n");
860
+ row->fp_space = row->xheight;
861
+ }
862
+ else
863
+ row->fp_space = gaps[gap_index];
864
+ }
865
+ if (testing_on) {
866
+ tprintf
867
+ ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
868
+ row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
869
+ }
870
+ return TRUE; //computed some stats
871
+ }
872
+
873
+
874
+ /**********************************************************************
875
+ * find_row_pitch
876
+ *
877
+ * Check to see if this row could be fixed pitch using the given spacings.
878
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
879
+ * The larger threshold is the word gap threshold.
880
+ **********************************************************************/
881
+
882
+ BOOL8 find_row_pitch( //find lines
883
+ TO_ROW *row, //row to do
884
+ inT32 maxwidth, //max permitted space
885
+ inT32 dm_gap, //ignorable gaps
886
+ TO_BLOCK *block, //block of row
887
+ inT32 block_index, //block_number
888
+ inT32 row_index, //number of row
889
+ BOOL8 testing_on //correct orientation
890
+ ) {
891
+ BOOL8 used_dm_model; //looks lik dot matrix
892
+ float min_space; //estimate threshold
893
+ float non_space; //gap size
894
+ float gap_iqr; //interquartile range
895
+ float pitch_iqr;
896
+ float dm_gap_iqr; //interquartile range
897
+ float dm_pitch_iqr;
898
+ float dm_pitch; //pitch with dm on
899
+ float pitch; //revised estimate
900
+ float initial_pitch; //guess at pitch
901
+ STATS gap_stats (0, maxwidth);
902
+ //centre-centre
903
+ STATS pitch_stats (0, maxwidth);
904
+
905
+ row->fixed_pitch = 0.0f;
906
+ initial_pitch = row->fp_space;
907
+ if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
908
+ initial_pitch = row->xheight;//keep pitch decent
909
+ non_space = row->fp_nonsp;
910
+ if (non_space > initial_pitch)
911
+ non_space = initial_pitch;
912
+ min_space = (initial_pitch + non_space) / 2;
913
+
914
+ if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
915
+ initial_pitch, min_space, TRUE, FALSE, dm_gap)) {
916
+ dm_gap_iqr = 0.0001;
917
+ dm_pitch_iqr = maxwidth * 2.0f;
918
+ dm_pitch = initial_pitch;
919
+ }
920
+ else {
921
+ dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
922
+ dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
923
+ dm_pitch = pitch_stats.ile (0.5);
924
+ }
925
+ gap_stats.clear ();
926
+ pitch_stats.clear ();
927
+ if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
928
+ initial_pitch, min_space, TRUE, FALSE, 0)) {
929
+ gap_iqr = 0.0001;
930
+ pitch_iqr = maxwidth * 3.0f;
931
+ }
932
+ else {
933
+ gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
934
+ pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
935
+ if (testing_on)
936
+ tprintf
937
+ ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
938
+ initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
939
+ initial_pitch = pitch_stats.ile (0.5);
940
+ if (min_space > initial_pitch
941
+ && count_pitch_stats (row, &gap_stats, &pitch_stats,
942
+ initial_pitch, initial_pitch, TRUE, FALSE, 0)) {
943
+ min_space = initial_pitch;
944
+ gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
945
+ pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
946
+ if (testing_on)
947
+ tprintf
948
+ ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
949
+ initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
950
+ initial_pitch = pitch_stats.ile (0.5);
951
+ }
952
+ }
953
+ if (textord_debug_pitch_metric)
954
+ tprintf ("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
955
+ block_index, row_index,
956
+ block->block->text_region () != NULL ?
957
+ (block->block->text_region ()->is_prop ()? 'P' : 'F') : 'X',
958
+ pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
959
+ pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D'
960
+ : (pitch_iqr * dm_gap_iqr <=
961
+ dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
962
+ if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
963
+ row->pitch_decision = PITCH_DUNNO;
964
+ if (textord_debug_pitch_metric)
965
+ tprintf ("\n");
966
+ return FALSE; //insufficient data
967
+ }
968
+ if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
969
+ if (testing_on)
970
+ tprintf
971
+ ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
972
+ pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
973
+ gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
974
+ pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
975
+ pitch = pitch_stats.ile (0.5);
976
+ used_dm_model = FALSE;
977
+ }
978
+ else {
979
+ if (testing_on)
980
+ tprintf
981
+ ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
982
+ pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
983
+ gap_iqr = dm_gap_iqr;
984
+ pitch_iqr = dm_pitch_iqr;
985
+ pitch = dm_pitch;
986
+ used_dm_model = TRUE;
987
+ }
988
+ if (textord_debug_pitch_metric) {
989
+ tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
990
+ pitch_iqr, gap_iqr, pitch);
991
+ tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
992
+ pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
993
+ pitch_iqr < gap_iqr * textord_fpiqr_ratio
994
+ && pitch_iqr < block->xheight * textord_max_pitch_iqr
995
+ && pitch < block->xheight * textord_words_default_maxspace
996
+ ? 'F' : 'P');
997
+ }
998
+ if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
999
+ && pitch_iqr < block->xheight * textord_max_pitch_iqr
1000
+ && pitch < block->xheight * textord_words_default_maxspace)
1001
+ row->pitch_decision = PITCH_MAYBE_FIXED;
1002
+ else
1003
+ row->pitch_decision = PITCH_MAYBE_PROP;
1004
+ row->fixed_pitch = pitch;
1005
+ row->kern_size = gap_stats.ile (0.5);
1006
+ row->min_space = (inT32) (row->fixed_pitch + non_space) / 2;
1007
+ if (row->min_space > row->fixed_pitch)
1008
+ row->min_space = (inT32) row->fixed_pitch;
1009
+ row->max_nonspace = row->min_space;
1010
+ row->space_size = row->fixed_pitch;
1011
+ row->space_threshold = (row->max_nonspace + row->min_space) / 2;
1012
+ row->used_dm_model = used_dm_model;
1013
+ return TRUE;
1014
+ }
1015
+
1016
+
1017
+ /**********************************************************************
1018
+ * fixed_pitch_row
1019
+ *
1020
+ * Check to see if this row could be fixed pitch using the given spacings.
1021
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
1022
+ * The larger threshold is the word gap threshold.
1023
+ **********************************************************************/
1024
+
1025
+ BOOL8 fixed_pitch_row( //find lines
1026
+ TO_ROW *row, //row to do
1027
+ inT32 block_index //block_number
1028
+ ) {
1029
+ const char *res_string; //pitch result
1030
+ inT16 mid_cuts; //no of cheap cuts
1031
+ float non_space; //gap size
1032
+ float pitch_sd; //error on pitch
1033
+ float sp_sd; //space sd
1034
+
1035
+ non_space = row->fp_nonsp;
1036
+ if (non_space > row->fixed_pitch)
1037
+ non_space = row->fixed_pitch;
1038
+ if (textord_all_prop) {
1039
+ // Set the decision to definitely proportional.
1040
+ pitch_sd = textord_words_def_prop * row->fixed_pitch;
1041
+ row->pitch_decision = PITCH_DEF_PROP;
1042
+ } else {
1043
+ pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
1044
+ row->projection_right,
1045
+ (row->fixed_pitch + non_space * 3) / 4,
1046
+ row->fixed_pitch, sp_sd, mid_cuts,
1047
+ &row->char_cells,
1048
+ block_index == textord_debug_block);
1049
+ if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1050
+ && ((pitsync_linear_version & 3) < 3
1051
+ || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
1052
+ || sp_sd > 20
1053
+ || (pitch_sd == 0 && sp_sd > 10))))) {
1054
+ if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1055
+ && !row->all_caps
1056
+ && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1057
+ row->pitch_decision = PITCH_DEF_FIXED;
1058
+ else
1059
+ row->pitch_decision = PITCH_MAYBE_FIXED;
1060
+ }
1061
+ else if ((pitsync_linear_version & 3) < 3
1062
+ || sp_sd > 20
1063
+ || mid_cuts > 0
1064
+ || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
1065
+ if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
1066
+ row->pitch_decision = PITCH_MAYBE_PROP;
1067
+ else
1068
+ row->pitch_decision = PITCH_DEF_PROP;
1069
+ }
1070
+ else
1071
+ row->pitch_decision = PITCH_DUNNO;
1072
+ }
1073
+
1074
+ if (textord_debug_pitch_metric) {
1075
+ res_string = "??";
1076
+ switch (row->pitch_decision) {
1077
+ case PITCH_DEF_PROP:
1078
+ res_string = "DP";
1079
+ break;
1080
+ case PITCH_MAYBE_PROP:
1081
+ res_string = "MP";
1082
+ break;
1083
+ case PITCH_DEF_FIXED:
1084
+ res_string = "DF";
1085
+ break;
1086
+ case PITCH_MAYBE_FIXED:
1087
+ res_string = "MF";
1088
+ default:
1089
+ res_string = "??";
1090
+ }
1091
+ tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
1092
+ pitch_sd / row->fixed_pitch, sp_sd, res_string);
1093
+ }
1094
+ return TRUE;
1095
+ }
1096
+
1097
+
1098
+ /**********************************************************************
1099
+ * count_pitch_stats
1100
+ *
1101
+ * Count up the gap and pitch stats on the block to see if it is fixed pitch.
1102
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
1103
+ * The larger threshold is the word gap threshold.
1104
+ * The return value indicates whether there were any decent values to use.
1105
+ **********************************************************************/
1106
+
1107
+ BOOL8 count_pitch_stats( //find lines
1108
+ TO_ROW *row, //row to do
1109
+ STATS *gap_stats, //blob gaps
1110
+ STATS *pitch_stats, //centre-centre stats
1111
+ float initial_pitch, //guess at pitch
1112
+ float min_space, //estimate space size
1113
+ BOOL8 ignore_outsize, //discard big objects
1114
+ BOOL8 split_outsize, //split big objects
1115
+ inT32 dm_gap //ignorable gaps
1116
+ ) {
1117
+ BOOL8 prev_valid; //not word broken
1118
+ BLOBNBOX *blob; //current blob
1119
+ //blobs
1120
+ BLOBNBOX_IT blob_it = row->blob_list ();
1121
+ inT32 prev_right; //end of prev blob
1122
+ inT32 prev_centre; //centre of previous blob
1123
+ inT32 x_centre; //centre of this blob
1124
+ inT32 blob_width; //width of blob
1125
+ inT32 width_units; //no of widths in blob
1126
+ float width; //blob width
1127
+ TBOX blob_box; //bounding box
1128
+ TBOX joined_box; //of super blob
1129
+
1130
+ gap_stats->clear ();
1131
+ pitch_stats->clear ();
1132
+ if (blob_it.empty ())
1133
+ return FALSE;
1134
+ prev_valid = FALSE;
1135
+ prev_centre = 0;
1136
+ prev_right = 0; //stop complier warning
1137
+ joined_box = blob_it.data ()->bounding_box ();
1138
+ do {
1139
+ blob_it.forward ();
1140
+ blob = blob_it.data ();
1141
+ if (!blob->joined_to_prev ()) {
1142
+ blob_box = blob->bounding_box ();
1143
+ if ((blob_box.left () - joined_box.right () < dm_gap
1144
+ && !blob_it.at_first ())
1145
+ || (blob->cblob () == NULL && blob->blob () == NULL))
1146
+ joined_box += blob_box; //merge blobs
1147
+ else {
1148
+ blob_width = joined_box.width ();
1149
+ if (split_outsize) {
1150
+ width_units =
1151
+ (inT32) floor ((float) blob_width / initial_pitch + 0.5);
1152
+ if (width_units < 1)
1153
+ width_units = 1;
1154
+ width_units--;
1155
+ }
1156
+ else if (ignore_outsize) {
1157
+ width = (float) blob_width / initial_pitch;
1158
+ width_units = width < 1 + words_default_fixed_limit
1159
+ && width > 1 - words_default_fixed_limit ? 0 : -1;
1160
+ }
1161
+ else
1162
+ width_units = 0; //everything in
1163
+ x_centre = (inT32) (joined_box.left ()
1164
+ + (blob_width -
1165
+ width_units * initial_pitch) / 2);
1166
+ if (prev_valid && width_units >= 0) {
1167
+ // if (width_units>0)
1168
+ // {
1169
+ // tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
1170
+ // width_units,blob_width,x_centre,x_centre-prev_centre);
1171
+ // }
1172
+ gap_stats->add (joined_box.left () - prev_right, 1);
1173
+ pitch_stats->add (x_centre - prev_centre, 1);
1174
+ }
1175
+ prev_centre = (inT32) (x_centre + width_units * initial_pitch);
1176
+ prev_right = joined_box.right ();
1177
+ prev_valid = blob_box.left () - joined_box.right () < min_space;
1178
+ prev_valid = prev_valid && width_units >= 0;
1179
+ joined_box = blob_box;
1180
+ }
1181
+ }
1182
+ }
1183
+ while (!blob_it.at_first ());
1184
+ return gap_stats->get_total () >= 3;
1185
+ }
1186
+
1187
+
1188
+ /**********************************************************************
1189
+ * tune_row_pitch
1190
+ *
1191
+ * Use a dp algorithm to fit the character cells and return the sd of
1192
+ * the cell size over the row.
1193
+ **********************************************************************/
1194
+
1195
+ float tune_row_pitch( //find fp cells
1196
+ TO_ROW *row, //row to do
1197
+ STATS *projection, //vertical projection
1198
+ inT16 projection_left, //edge of projection
1199
+ inT16 projection_right, //edge of projection
1200
+ float space_size, //size of blank
1201
+ float &initial_pitch, //guess at pitch
1202
+ float &best_sp_sd, //space sd
1203
+ inT16 &best_mid_cuts, //no of cheap cuts
1204
+ ICOORDELT_LIST *best_cells, //row cells
1205
+ BOOL8 testing_on //inidividual words
1206
+ ) {
1207
+ int pitch_delta; //offset pitch
1208
+ inT16 mid_cuts; //cheap cuts
1209
+ float pitch_sd; //current sd
1210
+ float best_sd; //best result
1211
+ float best_pitch; //pitch for best result
1212
+ float initial_sd; //starting error
1213
+ float sp_sd; //space sd
1214
+ ICOORDELT_LIST test_cells; //row cells
1215
+ ICOORDELT_IT best_it; //start of best list
1216
+
1217
+ if (textord_fast_pitch_test)
1218
+ return tune_row_pitch2 (row, projection, projection_left,
1219
+ projection_right, space_size, initial_pitch,
1220
+ best_sp_sd,
1221
+ //space sd
1222
+ best_mid_cuts, best_cells, testing_on);
1223
+ if (textord_disable_pitch_test) {
1224
+ best_sp_sd = initial_pitch;
1225
+ return initial_pitch;
1226
+ }
1227
+ initial_sd =
1228
+ compute_pitch_sd(row,
1229
+ projection,
1230
+ projection_left,
1231
+ projection_right,
1232
+ space_size,
1233
+ initial_pitch,
1234
+ best_sp_sd,
1235
+ best_mid_cuts,
1236
+ best_cells,
1237
+ testing_on);
1238
+ best_sd = initial_sd;
1239
+ best_pitch = initial_pitch;
1240
+ if (testing_on)
1241
+ tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1242
+ for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1243
+ pitch_sd =
1244
+ compute_pitch_sd (row, projection, projection_left, projection_right,
1245
+ space_size, initial_pitch + pitch_delta, sp_sd,
1246
+ mid_cuts, &test_cells, testing_on);
1247
+ if (testing_on)
1248
+ tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1249
+ pitch_sd);
1250
+ if (pitch_sd < best_sd) {
1251
+ best_sd = pitch_sd;
1252
+ best_mid_cuts = mid_cuts;
1253
+ best_sp_sd = sp_sd;
1254
+ best_pitch = initial_pitch + pitch_delta;
1255
+ best_cells->clear ();
1256
+ best_it.set_to_list (best_cells);
1257
+ best_it.add_list_after (&test_cells);
1258
+ }
1259
+ else
1260
+ test_cells.clear ();
1261
+ if (pitch_sd > initial_sd)
1262
+ break; //getting worse
1263
+ }
1264
+ for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1265
+ pitch_sd =
1266
+ compute_pitch_sd (row, projection, projection_left, projection_right,
1267
+ space_size, initial_pitch - pitch_delta, sp_sd,
1268
+ mid_cuts, &test_cells, testing_on);
1269
+ if (testing_on)
1270
+ tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1271
+ pitch_sd);
1272
+ if (pitch_sd < best_sd) {
1273
+ best_sd = pitch_sd;
1274
+ best_mid_cuts = mid_cuts;
1275
+ best_sp_sd = sp_sd;
1276
+ best_pitch = initial_pitch - pitch_delta;
1277
+ best_cells->clear ();
1278
+ best_it.set_to_list (best_cells);
1279
+ best_it.add_list_after (&test_cells);
1280
+ }
1281
+ else
1282
+ test_cells.clear ();
1283
+ if (pitch_sd > initial_sd)
1284
+ break;
1285
+ }
1286
+ initial_pitch = best_pitch;
1287
+
1288
+ if (textord_debug_pitch_metric)
1289
+ print_pitch_sd(row,
1290
+ projection,
1291
+ projection_left,
1292
+ projection_right,
1293
+ space_size,
1294
+ best_pitch);
1295
+
1296
+ return best_sd;
1297
+ }
1298
+
1299
+
1300
+ /**********************************************************************
1301
+ * tune_row_pitch
1302
+ *
1303
+ * Use a dp algorithm to fit the character cells and return the sd of
1304
+ * the cell size over the row.
1305
+ **********************************************************************/
1306
+
1307
+ float tune_row_pitch2( //find fp cells
1308
+ TO_ROW *row, //row to do
1309
+ STATS *projection, //vertical projection
1310
+ inT16 projection_left, //edge of projection
1311
+ inT16 projection_right, //edge of projection
1312
+ float space_size, //size of blank
1313
+ float &initial_pitch, //guess at pitch
1314
+ float &best_sp_sd, //space sd
1315
+ inT16 &best_mid_cuts, //no of cheap cuts
1316
+ ICOORDELT_LIST *best_cells, //row cells
1317
+ BOOL8 testing_on //inidividual words
1318
+ ) {
1319
+ int pitch_delta; //offset pitch
1320
+ inT16 pixel; //pixel coord
1321
+ inT16 best_pixel; //pixel coord
1322
+ inT16 best_delta; //best pitch
1323
+ inT16 best_pitch; //best pitch
1324
+ inT16 start; //of good range
1325
+ inT16 end; //of good range
1326
+ inT32 best_count; //lowest sum
1327
+ float best_sd; //best result
1328
+ STATS *sum_proj; //summed projection
1329
+
1330
+ best_sp_sd = initial_pitch;
1331
+
1332
+ if (textord_disable_pitch_test) {
1333
+ return initial_pitch;
1334
+ }
1335
+ sum_proj = new STATS[textord_pitch_range * 2 + 1];
1336
+ if (sum_proj == NULL)
1337
+ return initial_pitch;
1338
+ best_pitch = (inT32) initial_pitch;
1339
+
1340
+ for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1341
+ pitch_delta++)
1342
+ sum_proj[textord_pitch_range + pitch_delta].set_range (0,
1343
+ best_pitch +
1344
+ pitch_delta + 1);
1345
+ for (pixel = projection_left; pixel <= projection_right; pixel++) {
1346
+ for (pitch_delta = -textord_pitch_range;
1347
+ pitch_delta <= textord_pitch_range; pitch_delta++)
1348
+ sum_proj[textord_pitch_range +
1349
+ pitch_delta].add ((pixel - projection_left) % (best_pitch +
1350
+ pitch_delta),
1351
+ projection->pile_count (pixel));
1352
+ }
1353
+ best_count = sum_proj[textord_pitch_range].pile_count (0);
1354
+ best_delta = 0;
1355
+ best_pixel = 0;
1356
+ for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1357
+ pitch_delta++) {
1358
+ for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1359
+ if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
1360
+ < best_count) {
1361
+ best_count =
1362
+ sum_proj[textord_pitch_range +
1363
+ pitch_delta].pile_count (pixel);
1364
+ best_delta = pitch_delta;
1365
+ best_pixel = pixel;
1366
+ }
1367
+ }
1368
+ }
1369
+ if (testing_on)
1370
+ tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1371
+ initial_pitch, best_delta, best_count);
1372
+ best_pitch += best_delta;
1373
+ initial_pitch = best_pitch;
1374
+ best_count++;
1375
+ best_count += best_count;
1376
+ for (start = best_pixel - 2; start > best_pixel - best_pitch
1377
+ && sum_proj[textord_pitch_range +
1378
+ best_delta].pile_count (start % best_pitch) <= best_count;
1379
+ start--);
1380
+ for (end = best_pixel + 2;
1381
+ end < best_pixel + best_pitch
1382
+ && sum_proj[textord_pitch_range +
1383
+ best_delta].pile_count (end % best_pitch) <= best_count;
1384
+ end++);
1385
+
1386
+ best_sd =
1387
+ compute_pitch_sd(row,
1388
+ projection,
1389
+ projection_left,
1390
+ projection_right,
1391
+ space_size,
1392
+ initial_pitch,
1393
+ best_sp_sd,
1394
+ best_mid_cuts,
1395
+ best_cells,
1396
+ testing_on,
1397
+ start,
1398
+ end);
1399
+ if (testing_on)
1400
+ tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
1401
+ best_sd);
1402
+
1403
+ if (textord_debug_pitch_metric)
1404
+ print_pitch_sd(row,
1405
+ projection,
1406
+ projection_left,
1407
+ projection_right,
1408
+ space_size,
1409
+ initial_pitch);
1410
+
1411
+ delete[]sum_proj;
1412
+
1413
+ return best_sd;
1414
+ }
1415
+
1416
+
1417
+ /**********************************************************************
1418
+ * compute_pitch_sd
1419
+ *
1420
+ * Use a dp algorithm to fit the character cells and return the sd of
1421
+ * the cell size over the row.
1422
+ **********************************************************************/
1423
+
1424
+ float compute_pitch_sd( //find fp cells
1425
+ TO_ROW *row, //row to do
1426
+ STATS *projection, //vertical projection
1427
+ inT16 projection_left, //edge
1428
+ inT16 projection_right, //edge
1429
+ float space_size, //size of blank
1430
+ float initial_pitch, //guess at pitch
1431
+ float &sp_sd, //space sd
1432
+ inT16 &mid_cuts, //no of free cuts
1433
+ ICOORDELT_LIST *row_cells, //list of chop pts
1434
+ BOOL8 testing_on, //inidividual words
1435
+ inT16 start, //start of good range
1436
+ inT16 end //end of good range
1437
+ ) {
1438
+ inT16 occupation; //no of cells in word.
1439
+ //blobs
1440
+ BLOBNBOX_IT blob_it = row->blob_list ();
1441
+ BLOBNBOX_IT start_it; //start of word
1442
+ BLOBNBOX_IT plot_it; //for plotting
1443
+ inT16 blob_count; //no of blobs
1444
+ TBOX blob_box; //bounding box
1445
+ TBOX prev_box; //of super blob
1446
+ inT32 prev_right; //of word sync
1447
+ int scale_factor; //on scores for big words
1448
+ inT32 sp_count; //spaces
1449
+ FPSEGPT_LIST seg_list; //char cells
1450
+ FPSEGPT_IT seg_it; //iterator
1451
+ inT16 segpos; //position of segment
1452
+ inT16 cellpos; //previous cell boundary
1453
+ //iterator
1454
+ ICOORDELT_IT cell_it = row_cells;
1455
+ ICOORDELT *cell; //new cell
1456
+ double sqsum; //sum of squares
1457
+ double spsum; //of spaces
1458
+ double sp_var; //space error
1459
+ double word_sync; //result for word
1460
+ inT32 total_count; //total blobs
1461
+
1462
+ if ((pitsync_linear_version & 3) > 1) {
1463
+ word_sync = compute_pitch_sd2 (row, projection, projection_left,
1464
+ projection_right, initial_pitch,
1465
+ occupation, mid_cuts, row_cells,
1466
+ testing_on, start, end);
1467
+ sp_sd = occupation;
1468
+ return word_sync;
1469
+ }
1470
+ mid_cuts = 0;
1471
+ cellpos = 0;
1472
+ total_count = 0;
1473
+ sqsum = 0;
1474
+ sp_count = 0;
1475
+ spsum = 0;
1476
+ prev_right = -1;
1477
+ if (blob_it.empty ())
1478
+ return space_size * 10;
1479
+ #ifndef GRAPHICS_DISABLED
1480
+ if (testing_on && to_win > 0) {
1481
+ blob_box = blob_it.data ()->bounding_box ();
1482
+ projection->plot (to_win, projection_left,
1483
+ row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1484
+ }
1485
+ #endif
1486
+ start_it = blob_it;
1487
+ blob_count = 0;
1488
+ blob_box = box_next (&blob_it);//first blob
1489
+ blob_it.mark_cycle_pt ();
1490
+ do {
1491
+ for (; blob_count > 0; blob_count--)
1492
+ box_next(&start_it);
1493
+ do {
1494
+ prev_box = blob_box;
1495
+ blob_count++;
1496
+ blob_box = box_next (&blob_it);
1497
+ }
1498
+ while (!blob_it.cycled_list ()
1499
+ && blob_box.left () - prev_box.right () < space_size);
1500
+ plot_it = start_it;
1501
+ if (pitsync_linear_version & 3)
1502
+ word_sync =
1503
+ check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
1504
+ projection, projection_left, projection_right,
1505
+ row->xheight * textord_projection_scale,
1506
+ occupation, &seg_list, start, end);
1507
+ else
1508
+ word_sync =
1509
+ check_pitch_sync (&start_it, blob_count, (inT16) initial_pitch, 2,
1510
+ projection, &seg_list);
1511
+ if (testing_on) {
1512
+ tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
1513
+ prev_box.right (), prev_box.top (),
1514
+ seg_list.length () - 1, word_sync);
1515
+ seg_it.set_to_list (&seg_list);
1516
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1517
+ seg_it.forward ()) {
1518
+ if (seg_it.data ()->faked)
1519
+ tprintf ("(F)");
1520
+ tprintf ("%d, ", seg_it.data ()->position ());
1521
+ // tprintf("C=%g, s=%g, sq=%g\n",
1522
+ // seg_it.data()->cost_function(),
1523
+ // seg_it.data()->sum(),
1524
+ // seg_it.data()->squares());
1525
+ }
1526
+ tprintf ("\n");
1527
+ }
1528
+ #ifndef GRAPHICS_DISABLED
1529
+ if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
1530
+ plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1531
+ #endif
1532
+ seg_it.set_to_list (&seg_list);
1533
+ if (prev_right >= 0) {
1534
+ sp_var = seg_it.data ()->position () - prev_right;
1535
+ sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1536
+ sp_var *= sp_var;
1537
+ spsum += sp_var;
1538
+ sp_count++;
1539
+ }
1540
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1541
+ segpos = seg_it.data ()->position ();
1542
+ if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1543
+ //big gap
1544
+ while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1545
+ cell = new ICOORDELT (cellpos + (inT16) initial_pitch, 0);
1546
+ cell_it.add_after_then_move (cell);
1547
+ cellpos += (inT16) initial_pitch;
1548
+ }
1549
+ //make new one
1550
+ cell = new ICOORDELT (segpos, 0);
1551
+ cell_it.add_after_then_move (cell);
1552
+ cellpos = segpos;
1553
+ }
1554
+ else if (segpos > cellpos - initial_pitch / 2) {
1555
+ cell = cell_it.data ();
1556
+ //average positions
1557
+ cell->set_x ((cellpos + segpos) / 2);
1558
+ cellpos = cell->x ();
1559
+ }
1560
+ }
1561
+ seg_it.move_to_last ();
1562
+ prev_right = seg_it.data ()->position ();
1563
+ if (textord_pitch_scalebigwords) {
1564
+ scale_factor = (seg_list.length () - 2) / 2;
1565
+ if (scale_factor < 1)
1566
+ scale_factor = 1;
1567
+ }
1568
+ else
1569
+ scale_factor = 1;
1570
+ sqsum += word_sync * scale_factor;
1571
+ total_count += (seg_list.length () - 1) * scale_factor;
1572
+ seg_list.clear ();
1573
+ }
1574
+ while (!blob_it.cycled_list ());
1575
+ sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1576
+ return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1577
+ }
1578
+
1579
+
1580
+ /**********************************************************************
1581
+ * compute_pitch_sd2
1582
+ *
1583
+ * Use a dp algorithm to fit the character cells and return the sd of
1584
+ * the cell size over the row.
1585
+ **********************************************************************/
1586
+
1587
+ float compute_pitch_sd2( //find fp cells
1588
+ TO_ROW *row, //row to do
1589
+ STATS *projection, //vertical projection
1590
+ inT16 projection_left, //edge
1591
+ inT16 projection_right, //edge
1592
+ float initial_pitch, //guess at pitch
1593
+ inT16 &occupation, //no of occupied cells
1594
+ inT16 &mid_cuts, //no of free cuts
1595
+ ICOORDELT_LIST *row_cells, //list of chop pts
1596
+ BOOL8 testing_on, //inidividual words
1597
+ inT16 start, //start of good range
1598
+ inT16 end //end of good range
1599
+ ) {
1600
+ //blobs
1601
+ BLOBNBOX_IT blob_it = row->blob_list ();
1602
+ BLOBNBOX_IT plot_it;
1603
+ inT16 blob_count; //no of blobs
1604
+ TBOX blob_box; //bounding box
1605
+ FPSEGPT_LIST seg_list; //char cells
1606
+ FPSEGPT_IT seg_it; //iterator
1607
+ inT16 segpos; //position of segment
1608
+ //iterator
1609
+ ICOORDELT_IT cell_it = row_cells;
1610
+ ICOORDELT *cell; //new cell
1611
+ double word_sync; //result for word
1612
+
1613
+ mid_cuts = 0;
1614
+ if (blob_it.empty ()) {
1615
+ occupation = 0;
1616
+ return initial_pitch * 10;
1617
+ }
1618
+ #ifndef GRAPHICS_DISABLED
1619
+ if (testing_on && to_win > 0) {
1620
+ projection->plot (to_win, projection_left,
1621
+ row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1622
+ }
1623
+ #endif
1624
+ blob_count = 0;
1625
+ blob_it.mark_cycle_pt ();
1626
+ do {
1627
+ //first blob
1628
+ blob_box = box_next (&blob_it);
1629
+ blob_count++;
1630
+ }
1631
+ while (!blob_it.cycled_list ());
1632
+ plot_it = blob_it;
1633
+ word_sync = check_pitch_sync2 (&blob_it, blob_count, (inT16) initial_pitch,
1634
+ 2, projection, projection_left,
1635
+ projection_right,
1636
+ row->xheight * textord_projection_scale,
1637
+ occupation, &seg_list, start, end);
1638
+ if (testing_on) {
1639
+ tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
1640
+ blob_box.right (), blob_box.top (),
1641
+ seg_list.length () - 1, word_sync);
1642
+ seg_it.set_to_list (&seg_list);
1643
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1644
+ if (seg_it.data ()->faked)
1645
+ tprintf ("(F)");
1646
+ tprintf ("%d, ", seg_it.data ()->position ());
1647
+ // tprintf("C=%g, s=%g, sq=%g\n",
1648
+ // seg_it.data()->cost_function(),
1649
+ // seg_it.data()->sum(),
1650
+ // seg_it.data()->squares());
1651
+ }
1652
+ tprintf ("\n");
1653
+ }
1654
+ #ifndef GRAPHICS_DISABLED
1655
+ if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
1656
+ plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1657
+ #endif
1658
+ seg_it.set_to_list (&seg_list);
1659
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1660
+ segpos = seg_it.data ()->position ();
1661
+ //make new one
1662
+ cell = new ICOORDELT (segpos, 0);
1663
+ cell_it.add_after_then_move (cell);
1664
+ if (seg_it.at_last ())
1665
+ mid_cuts = seg_it.data ()->cheap_cuts ();
1666
+ }
1667
+ seg_list.clear ();
1668
+ return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
1669
+ }
1670
+
1671
+
1672
+ /**********************************************************************
1673
+ * print_pitch_sd
1674
+ *
1675
+ * Use a dp algorithm to fit the character cells and return the sd of
1676
+ * the cell size over the row.
1677
+ **********************************************************************/
1678
+
1679
+ void print_pitch_sd( //find fp cells
1680
+ TO_ROW *row, //row to do
1681
+ STATS *projection, //vertical projection
1682
+ inT16 projection_left, //edges //size of blank
1683
+ inT16 projection_right,
1684
+ float space_size,
1685
+ float initial_pitch //guess at pitch
1686
+ ) {
1687
+ const char *res2; //pitch result
1688
+ inT16 occupation; //used cells
1689
+ float sp_sd; //space sd
1690
+ //blobs
1691
+ BLOBNBOX_IT blob_it = row->blob_list ();
1692
+ BLOBNBOX_IT start_it; //start of word
1693
+ BLOBNBOX_IT row_start; //start of row
1694
+ inT16 blob_count; //no of blobs
1695
+ inT16 total_blob_count; //total blobs in line
1696
+ TBOX blob_box; //bounding box
1697
+ TBOX prev_box; //of super blob
1698
+ inT32 prev_right; //of word sync
1699
+ int scale_factor; //on scores for big words
1700
+ inT32 sp_count; //spaces
1701
+ FPSEGPT_LIST seg_list; //char cells
1702
+ FPSEGPT_IT seg_it; //iterator
1703
+ double sqsum; //sum of squares
1704
+ double spsum; //of spaces
1705
+ double sp_var; //space error
1706
+ double word_sync; //result for word
1707
+ double total_count; //total cuts
1708
+
1709
+ if (blob_it.empty ())
1710
+ return;
1711
+ row_start = blob_it;
1712
+ total_blob_count = 0;
1713
+
1714
+ total_count = 0;
1715
+ sqsum = 0;
1716
+ sp_count = 0;
1717
+ spsum = 0;
1718
+ prev_right = -1;
1719
+ blob_it = row_start;
1720
+ start_it = blob_it;
1721
+ blob_count = 0;
1722
+ blob_box = box_next (&blob_it);//first blob
1723
+ blob_it.mark_cycle_pt ();
1724
+ do {
1725
+ for (; blob_count > 0; blob_count--)
1726
+ box_next(&start_it);
1727
+ do {
1728
+ prev_box = blob_box;
1729
+ blob_count++;
1730
+ blob_box = box_next (&blob_it);
1731
+ }
1732
+ while (!blob_it.cycled_list ()
1733
+ && blob_box.left () - prev_box.right () < space_size);
1734
+ word_sync =
1735
+ check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
1736
+ projection, projection_left, projection_right,
1737
+ row->xheight * textord_projection_scale,
1738
+ occupation, &seg_list, 0, 0);
1739
+ total_blob_count += blob_count;
1740
+ seg_it.set_to_list (&seg_list);
1741
+ if (prev_right >= 0) {
1742
+ sp_var = seg_it.data ()->position () - prev_right;
1743
+ sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1744
+ sp_var *= sp_var;
1745
+ spsum += sp_var;
1746
+ sp_count++;
1747
+ }
1748
+ seg_it.move_to_last ();
1749
+ prev_right = seg_it.data ()->position ();
1750
+ if (textord_pitch_scalebigwords) {
1751
+ scale_factor = (seg_list.length () - 2) / 2;
1752
+ if (scale_factor < 1)
1753
+ scale_factor = 1;
1754
+ }
1755
+ else
1756
+ scale_factor = 1;
1757
+ sqsum += word_sync * scale_factor;
1758
+ total_count += (seg_list.length () - 1) * scale_factor;
1759
+ seg_list.clear ();
1760
+ }
1761
+ while (!blob_it.cycled_list ());
1762
+ sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1763
+ word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1764
+ tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1765
+ word_sync, word_sync / initial_pitch, sp_sd,
1766
+ word_sync < textord_words_pitchsd_threshold * initial_pitch
1767
+ ? 'F' : 'P');
1768
+
1769
+ start_it = row_start;
1770
+ blob_it = row_start;
1771
+ word_sync =
1772
+ check_pitch_sync2 (&blob_it, total_blob_count, (inT16) initial_pitch, 2,
1773
+ projection, projection_left, projection_right,
1774
+ row->xheight * textord_projection_scale, occupation,
1775
+ &seg_list, 0, 0);
1776
+ if (occupation > 1)
1777
+ word_sync /= occupation;
1778
+ word_sync = sqrt (word_sync);
1779
+
1780
+ #ifndef GRAPHICS_DISABLED
1781
+ if (textord_show_row_cuts && to_win != NULL)
1782
+ plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
1783
+ #endif
1784
+ seg_list.clear ();
1785
+ if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
1786
+ if (word_sync < textord_words_def_fixed * initial_pitch
1787
+ && !row->all_caps)
1788
+ res2 = "DF";
1789
+ else
1790
+ res2 = "MF";
1791
+ }
1792
+ else
1793
+ res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
1794
+ tprintf
1795
+ ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1796
+ word_sync, word_sync / initial_pitch,
1797
+ word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
1798
+ occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
1799
+ }
1800
+
1801
+
1802
+ /**********************************************************************
1803
+ * sort_floats
1804
+ *
1805
+ * qsort function to sort 2 floats.
1806
+ **********************************************************************/
1807
+
1808
+ int sort_floats2( //qsort function
1809
+ const void *arg1, //ptrs to floats
1810
+ const void *arg2) {
1811
+ float diff; //difference
1812
+
1813
+ diff = *((float *) arg1) - *((float *) arg2);
1814
+ if (diff > 0)
1815
+ return 1;
1816
+ else if (diff < 0)
1817
+ return -1;
1818
+ else
1819
+ return 0;
1820
+ }
1821
+
1822
+
1823
+ /**********************************************************************
1824
+ * find_repeated_chars
1825
+ *
1826
+ * Find 4 or more adjacent chars which are the same and put them
1827
+ * into words in advance of fixed pitch checking and word generation.
1828
+ **********************************************************************/
1829
+
1830
+ void find_repeated_chars( //search for equal chars
1831
+ TO_BLOCK *block, //block to search
1832
+ BOOL8 testing_on //dbug mode
1833
+ ) {
1834
+ BOOL8 bol; //start of line
1835
+ TO_ROW *row; //current row
1836
+ TO_ROW_IT row_it = block->get_rows ();
1837
+ ROW *real_row; //output row
1838
+ WERD_IT word_it; //new words
1839
+ WERD *word; //new word
1840
+ BLOBNBOX *bblob; //current blob
1841
+ BLOBNBOX *nextblob; //neighbour to compare
1842
+ BLOBNBOX_IT box_it; //iterator
1843
+ BLOBNBOX_IT search_it; //forward search
1844
+ inT32 blobcount; //no of neighbours
1845
+ inT32 matched_blobcount; //no of matches
1846
+ inT32 blobindex; //in row
1847
+ inT32 row_length; //blobs in row
1848
+ inT32 width_change; //max width change
1849
+ inT32 blob_width; //required blob width
1850
+ inT32 space_width; //required gap width
1851
+ inT32 prev_right; //right edge of last blob
1852
+ float rating; //match rating
1853
+ PBLOB *pblob1; //polygonal blob
1854
+ PBLOB *pblob2; //second blob
1855
+ TBOX word_box; //for plotting
1856
+
1857
+ if (row_it.empty ())
1858
+ return; //empty block
1859
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1860
+ row = row_it.data ();
1861
+ box_it.set_to_list (row->blob_list ());
1862
+ row_length = row->blob_list ()->length ();
1863
+ blobindex = 0;
1864
+ word_it.set_to_list (&row->rep_words);
1865
+ bol = TRUE;
1866
+ if (!box_it.empty ()) {
1867
+ real_row = new ROW (row,
1868
+ (inT16) block->kern_size,
1869
+ (inT16) block->space_size);
1870
+ do {
1871
+ bblob = box_it.data ();
1872
+ blobcount = 1;
1873
+ search_it = box_it;
1874
+ search_it.forward ();
1875
+ matched_blobcount = 1;
1876
+ width_change = MAX_INT16;
1877
+ blob_width = 0;
1878
+ space_width = 0;
1879
+ prev_right = bblob->bounding_box ().right ();
1880
+ if (bblob->bounding_box ().height () * 2 < row->xheight
1881
+ && !bblob->joined_to_prev ()
1882
+ && (bblob->blob () != NULL || bblob->cblob () != NULL)) {
1883
+ if (bblob->cblob () != NULL)
1884
+ pblob1 = new PBLOB (bblob->cblob (), row->xheight);
1885
+ else
1886
+ pblob1 = bblob->blob ();
1887
+
1888
+ rating = 0.0f;
1889
+ while (rating < textord_repeat_rating
1890
+ && blobindex + blobcount < row_length
1891
+ && ((nextblob = search_it.data ())->blob () != NULL
1892
+ || nextblob->cblob () != NULL)
1893
+ && nextblob->bounding_box ().height () * 2 <
1894
+ row->xheight) {
1895
+ if (blobcount == 1) {
1896
+ space_width = nextblob->bounding_box ().left ()
1897
+ - bblob->bounding_box ().right ();
1898
+ blob_width = bblob->bounding_box ().width ();
1899
+ width_change =
1900
+ blob_width >
1901
+ space_width ? blob_width : space_width;
1902
+ width_change =
1903
+ (inT32) (width_change *
1904
+ textord_repch_width_variance);
1905
+ if (width_change < 3)
1906
+ width_change = 3;
1907
+ }
1908
+ if (nextblob->bounding_box ().width () >
1909
+ blob_width + width_change
1910
+ || nextblob->bounding_box ().width () <
1911
+ blob_width - width_change
1912
+ || nextblob->bounding_box ().left () - prev_right >
1913
+ space_width + width_change
1914
+ || nextblob->bounding_box ().left () - prev_right <
1915
+ space_width - width_change) {
1916
+ if (testing_on)
1917
+ tprintf
1918
+ ("Repch terminated:bw=%d, sw=%d, wc=%d, pr=%d, nb=(%d,%d)\n",
1919
+ blob_width, space_width, width_change,
1920
+ prev_right, nextblob->bounding_box ().left (),
1921
+ nextblob->bounding_box ().right ());
1922
+ break; //not good enough
1923
+ }
1924
+ if (nextblob->blob () != NULL)
1925
+ rating = compare_blobs (pblob1, real_row,
1926
+ nextblob->blob (), real_row);
1927
+ else {
1928
+ pblob2 =
1929
+ new PBLOB (nextblob->cblob (), row->xheight);
1930
+ rating =
1931
+ compare_blobs(pblob1, real_row, pblob2, real_row);
1932
+ delete pblob2;
1933
+ }
1934
+ if (rating < textord_repeat_rating) {
1935
+ // if (testing_on)
1936
+ // tprintf("Blob at (%d,%d)->(%d,%d) had rating %g\n",
1937
+ // nextblob->bounding_box().left(),
1938
+ // nextblob->bounding_box().bottom(),
1939
+ // nextblob->bounding_box().right(),
1940
+ // nextblob->bounding_box().top(),
1941
+ // rating);
1942
+ blobcount++;
1943
+ search_it.forward ();
1944
+ matched_blobcount++;
1945
+ while (blobindex + blobcount < row_length
1946
+ && (search_it.data ()->joined_to_prev () ||
1947
+ (search_it.data()->blob() == NULL &&
1948
+ search_it.data()->cblob() == NULL))) {
1949
+ search_it.forward ();
1950
+ blobcount++; //suck in joined bits
1951
+ }
1952
+ }
1953
+ prev_right = nextblob->bounding_box ().right ();
1954
+ }
1955
+ if (bblob->cblob () != NULL)
1956
+ delete pblob1;
1957
+
1958
+ if (matched_blobcount >= textord_repeat_threshold) {
1959
+ word =
1960
+ make_real_word (&box_it, blobcount, bol, FALSE, FALSE,
1961
+ 1);
1962
+ #ifndef GRAPHICS_DISABLED
1963
+ if (testing_on) {
1964
+ word_box = word->bounding_box ();
1965
+ tprintf
1966
+ ("Found repeated word of %d blobs (%d matched) from (%d,%d)->(%d,%d)\n",
1967
+ blobcount, matched_blobcount, word_box.left (),
1968
+ word_box.bottom (), word_box.right (),
1969
+ word_box.top ());
1970
+ //perimeter_color_index(to_win, RED);
1971
+ to_win->Pen(255,0,0);
1972
+ //interior_style(to_win, INT_HOLLOW, TRUE);
1973
+ to_win->Rectangle(word_box.left (),
1974
+ word_box.bottom (), word_box.right (),
1975
+ word_box.top ());
1976
+ }
1977
+ #endif
1978
+ word->set_flag (W_REP_CHAR, TRUE);
1979
+ word->set_flag (W_DONT_CHOP, TRUE);
1980
+ word_it.add_after_then_move (word);
1981
+ blobindex += blobcount;
1982
+ }
1983
+ }
1984
+ bol = FALSE;
1985
+ box_it.forward (); //next one
1986
+ blobindex++;
1987
+ }
1988
+ //until all done
1989
+ while (!box_it.at_first ());
1990
+ delete real_row;
1991
+ }
1992
+ }
1993
+ }
1994
+
1995
+
1996
+ /**********************************************************************
1997
+ * plot_fp_word
1998
+ *
1999
+ * Plot a block of words as if fixed pitch.
2000
+ **********************************************************************/
2001
+
2002
+ #ifndef GRAPHICS_DISABLED
2003
+ void plot_fp_word( //draw block of words
2004
+ TO_BLOCK *block, //block to draw
2005
+ float pitch, //pitch to draw with
2006
+ float nonspace //for space threshold
2007
+ ) {
2008
+ TO_ROW *row; //current row
2009
+ TO_ROW_IT row_it = block->get_rows ();
2010
+
2011
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2012
+ row = row_it.data ();
2013
+ row->min_space = (inT32) ((pitch + nonspace) / 2);
2014
+ row->max_nonspace = row->min_space;
2015
+ row->space_threshold = row->min_space;
2016
+ plot_word_decisions (to_win, (inT16) pitch, row);
2017
+ }
2018
+ }
2019
+ #endif