tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,2019 @@
1
+ /**********************************************************************
2
+ * File: topitch.cpp (Formerly to_pitch.c)
3
+ * Description: Code to determine fixed pitchness and the pitch if fixed.
4
+ * Author: Ray Smith
5
+ * Created: Tue Aug 24 16:57:29 BST 1993
6
+ *
7
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #include "mfcpch.h"
21
+ #ifdef __UNIX__
22
+ #include <assert.h>
23
+ #endif
24
+ #include "stderr.h"
25
+ #include "blobbox.h"
26
+ #include "lmedsq.h"
27
+ #include "statistc.h"
28
+ #include "drawtord.h"
29
+ #include "makerow.h"
30
+ #include "pitsync1.h"
31
+ #include "pithsync.h"
32
+ #include "blobcmpl.h"
33
+ #include "tovars.h"
34
+ #include "wordseg.h"
35
+ #include "topitch.h"
36
+ #include "secname.h"
37
+
38
+ #define EXTERN
39
+
40
+ EXTERN BOOL_VAR (textord_all_prop, FALSE, "All doc is proportial text");
41
+ EXTERN BOOL_VAR (textord_debug_pitch_test, FALSE,
42
+ "Debug on fixed pitch test");
43
+ EXTERN BOOL_VAR (textord_disable_pitch_test, FALSE,
44
+ "Turn off dp fixed pitch algorithm");
45
+ EXTERN BOOL_VAR (textord_fast_pitch_test, FALSE,
46
+ "Do even faster pitch algorithm");
47
+ EXTERN BOOL_VAR (textord_debug_pitch_metric, FALSE,
48
+ "Write full metric stuff");
49
+ EXTERN BOOL_VAR (textord_show_row_cuts, FALSE, "Draw row-level cuts");
50
+ EXTERN BOOL_VAR (textord_show_page_cuts, FALSE, "Draw page-level cuts");
51
+ EXTERN BOOL_VAR (textord_pitch_cheat, FALSE,
52
+ "Use correct answer for fixed/prop");
53
+ EXTERN BOOL_VAR (textord_blockndoc_fixed, FALSE,
54
+ "Attempt whole doc/block fixed pitch");
55
+ EXTERN double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts");
56
+ EXTERN double_VAR (textord_balance_factor, 1.0,
57
+ "Ding rate for unbalanced char cells");
58
+ EXTERN double_VAR (textord_repch_width_variance, 0.2,
59
+ "Max width change of gap/blob");
60
+
61
+ #define FIXED_WIDTH_MULTIPLE 5
62
+ #define BLOCK_STATS_CLUSTERS 10
63
+ #define MAX_ALLOWED_PITCH 100 //max pixel pitch.
64
+
65
+ /**********************************************************************
66
+ * compute_fixed_pitch
67
+ *
68
+ * Decide whether each row is fixed pitch individually.
69
+ * Correlate definite and uncertain results to obtain an individual
70
+ * result for each row in the TO_ROW class.
71
+ **********************************************************************/
72
+
73
+ void compute_fixed_pitch( //determine pitch
74
+ ICOORD page_tr, //top right
75
+ TO_BLOCK_LIST *port_blocks, //input list
76
+ float gradient, //page skew
77
+ FCOORD rotation, //for drawing
78
+ BOOL8 testing_on //correct orientation
79
+ ) {
80
+ TO_BLOCK_IT block_it; //iterator
81
+ TO_BLOCK *block; //current block;
82
+ TO_ROW_IT row_it; //row iterator
83
+ TO_ROW *row; //current row
84
+ int block_index; //block number
85
+ int row_index; //row number
86
+
87
+ #ifndef GRAPHICS_DISABLED
88
+ if (textord_show_initial_words && testing_on) {
89
+ if (to_win == NULL)
90
+ create_to_win(page_tr);
91
+ }
92
+ #endif
93
+
94
+ block_it.set_to_list (port_blocks);
95
+ block_index = 1;
96
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
97
+ block_it.forward ()) {
98
+ block = block_it.data ();
99
+ compute_block_pitch(block, rotation, block_index, testing_on);
100
+ block_index++;
101
+ }
102
+
103
+ if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
104
+ block_index = 1;
105
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
106
+ block_it.forward ()) {
107
+ block = block_it.data ();
108
+ if (!try_block_fixed (block, block_index))
109
+ try_rows_fixed(block, block_index, testing_on);
110
+ block_index++;
111
+ }
112
+ }
113
+
114
+ block_index = 1;
115
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
116
+ block_it.forward ()) {
117
+ block = block_it.data ();
118
+ row_it.set_to_list (block->get_rows ());
119
+ row_index = 1;
120
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
121
+ row = row_it.data ();
122
+ fix_row_pitch(row, block, port_blocks, row_index, block_index);
123
+ row_index++;
124
+ }
125
+ if (testing_on
126
+ && ((textord_debug_pitch_test && block->block->text_region () != NULL)
127
+ || textord_blocksall_fixed || textord_blocksall_prop)) {
128
+ tprintf ("Corr:");
129
+ print_block_counts(block, block_index);
130
+ }
131
+ block_index++;
132
+ }
133
+ #ifndef GRAPHICS_DISABLED
134
+ if (textord_show_initial_words && testing_on) {
135
+ //overlap_picture_ops(TRUE);
136
+ ScrollView::Update();
137
+ }
138
+ #endif
139
+ }
140
+
141
+
142
+ /**********************************************************************
143
+ * fix_row_pitch
144
+ *
145
+ * Get a pitch_decision for this row by voting among similar rows in the
146
+ * block, then similar rows over all the page, or any other rows at all.
147
+ **********************************************************************/
148
+
149
+ void fix_row_pitch( //get some value
150
+ TO_ROW *bad_row, //row to fix
151
+ TO_BLOCK *bad_block, //block of bad_row
152
+ TO_BLOCK_LIST *blocks, //blocks to scan
153
+ inT32 row_target, //number of row
154
+ inT32 block_target //number of block
155
+ ) {
156
+ const char *res_string; //decision on line
157
+ inT16 mid_cuts;
158
+ int block_votes; //votes in block
159
+ int like_votes; //votes over page
160
+ int other_votes; //votes of unlike blocks
161
+ int block_index; //number of block
162
+ int row_index; //number of row
163
+ int maxwidth; //max pitch
164
+ TO_BLOCK_IT block_it = blocks; //block iterator
165
+ TO_ROW_IT row_it;
166
+ TO_BLOCK *block; //current block
167
+ TO_ROW *row; //current row
168
+ float sp_sd; //space deviation
169
+ STATS block_stats; //pitches in block
170
+ STATS like_stats; //pitches in page
171
+
172
+ block_votes = like_votes = other_votes = 0;
173
+ maxwidth = (inT32) ceil (bad_row->xheight * textord_words_maxspace);
174
+ if (bad_row->pitch_decision != PITCH_DEF_FIXED
175
+ && bad_row->pitch_decision != PITCH_DEF_PROP) {
176
+ block_stats.set_range (0, maxwidth);
177
+ like_stats.set_range (0, maxwidth);
178
+ block_index = 1;
179
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
180
+ block_it.forward ()) {
181
+ block = block_it.data ();
182
+ row_index = 1;
183
+ row_it.set_to_list (block->get_rows ());
184
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
185
+ row_it.forward ()) {
186
+ row = row_it.data ();
187
+ if ((bad_row->all_caps
188
+ && row->xheight + row->ascrise
189
+ <
190
+ (bad_row->xheight + bad_row->ascrise) * (1 +
191
+ textord_pitch_rowsimilarity)
192
+ && row->xheight + row->ascrise >
193
+ (bad_row->xheight + bad_row->ascrise) * (1 -
194
+ textord_pitch_rowsimilarity))
195
+ || (!bad_row->all_caps
196
+ && row->xheight <
197
+ bad_row->xheight * (1 + textord_pitch_rowsimilarity)
198
+ && row->xheight >
199
+ bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
200
+ if (block_index == block_target) {
201
+ if (row->pitch_decision == PITCH_DEF_FIXED) {
202
+ block_votes += textord_words_veto_power;
203
+ block_stats.add ((inT32) row->fixed_pitch,
204
+ textord_words_veto_power);
205
+ }
206
+ else if (row->pitch_decision == PITCH_MAYBE_FIXED
207
+ || row->pitch_decision == PITCH_CORR_FIXED) {
208
+ block_votes++;
209
+ block_stats.add ((inT32) row->fixed_pitch, 1);
210
+ }
211
+ else if (row->pitch_decision == PITCH_DEF_PROP)
212
+ block_votes -= textord_words_veto_power;
213
+ else if (row->pitch_decision == PITCH_MAYBE_PROP
214
+ || row->pitch_decision == PITCH_CORR_PROP)
215
+ block_votes--;
216
+ }
217
+ else {
218
+ if (row->pitch_decision == PITCH_DEF_FIXED) {
219
+ like_votes += textord_words_veto_power;
220
+ like_stats.add ((inT32) row->fixed_pitch,
221
+ textord_words_veto_power);
222
+ }
223
+ else if (row->pitch_decision == PITCH_MAYBE_FIXED
224
+ || row->pitch_decision == PITCH_CORR_FIXED) {
225
+ like_votes++;
226
+ like_stats.add ((inT32) row->fixed_pitch, 1);
227
+ }
228
+ else if (row->pitch_decision == PITCH_DEF_PROP)
229
+ like_votes -= textord_words_veto_power;
230
+ else if (row->pitch_decision == PITCH_MAYBE_PROP
231
+ || row->pitch_decision == PITCH_CORR_PROP)
232
+ like_votes--;
233
+ }
234
+ }
235
+ else {
236
+ if (row->pitch_decision == PITCH_DEF_FIXED)
237
+ other_votes += textord_words_veto_power;
238
+ else if (row->pitch_decision == PITCH_MAYBE_FIXED
239
+ || row->pitch_decision == PITCH_CORR_FIXED)
240
+ other_votes++;
241
+ else if (row->pitch_decision == PITCH_DEF_PROP)
242
+ other_votes -= textord_words_veto_power;
243
+ else if (row->pitch_decision == PITCH_MAYBE_PROP
244
+ || row->pitch_decision == PITCH_CORR_PROP)
245
+ other_votes--;
246
+ }
247
+ row_index++;
248
+ }
249
+ block_index++;
250
+ }
251
+ if (block_votes > textord_words_veto_power) {
252
+ bad_row->fixed_pitch = block_stats.ile (0.5);
253
+ bad_row->pitch_decision = PITCH_CORR_FIXED;
254
+ }
255
+ else if (block_votes <= textord_words_veto_power && like_votes > 0) {
256
+ bad_row->fixed_pitch = like_stats.ile (0.5);
257
+ bad_row->pitch_decision = PITCH_CORR_FIXED;
258
+ }
259
+ else {
260
+ bad_row->pitch_decision = PITCH_CORR_PROP;
261
+ #ifndef SECURE_NAMES
262
+ if (block_votes == 0 && like_votes == 0 && other_votes > 0
263
+ && (textord_debug_pitch_test || textord_debug_pitch_metric))
264
+ tprintf
265
+ ("Warning:row %d of block %d set prop with no like rows against trend\n",
266
+ row_target, block_target);
267
+ #endif
268
+ }
269
+ }
270
+ if (textord_debug_pitch_metric) {
271
+ tprintf (":b_votes=%d:l_votes=%d:o_votes=%d",
272
+ block_votes, like_votes, other_votes);
273
+ if (bad_row->pitch_decision == PITCH_CORR_PROP
274
+ || bad_row->pitch_decision == PITCH_DEF_PROP) {
275
+ res_string = bad_block->block->text_region () != NULL ?
276
+ (bad_block->block->text_region ()->
277
+ is_prop ()? "CP" : "WP") : "XP";
278
+ }
279
+ else {
280
+ res_string = bad_block->block->text_region () != NULL ?
281
+ (bad_block->block->text_region ()->
282
+ is_prop ()? "WF" : "CF") : "XF";
283
+ }
284
+ tprintf (":Blk=%d:Row=%d:%c:",
285
+ block_target, row_target,
286
+ bad_block->block->text_region () != NULL ?
287
+ (bad_block->block->text_region ()->
288
+ is_prop ()? 'P' : 'F') : 'X');
289
+ tprintf ("x=%g:asc=%g:corr_res=%s\n", bad_row->xheight,
290
+ bad_row->ascrise, res_string);
291
+ }
292
+ if (textord_pitch_cheat && bad_block->block->text_region () != NULL)
293
+ bad_row->pitch_decision =
294
+ bad_block->block->text_region ()->
295
+ is_prop ()? PITCH_CORR_PROP : PITCH_CORR_FIXED;
296
+ if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
297
+ if (bad_row->fixed_pitch < textord_min_xheight) {
298
+ if (block_votes > 0)
299
+ bad_row->fixed_pitch = block_stats.ile (0.5);
300
+ else if (block_votes == 0 && like_votes > 0)
301
+ bad_row->fixed_pitch = like_stats.ile (0.5);
302
+ else {
303
+ tprintf
304
+ ("Warning:guessing pitch as xheight on row %d, block %d\n",
305
+ row_target, block_target);
306
+ bad_row->fixed_pitch = bad_row->xheight;
307
+ }
308
+ }
309
+ if (bad_row->fixed_pitch < textord_min_xheight)
310
+ bad_row->fixed_pitch = (float) textord_min_xheight;
311
+ bad_row->kern_size = bad_row->fixed_pitch / 4;
312
+ bad_row->min_space = (inT32) (bad_row->fixed_pitch * 0.6);
313
+ bad_row->max_nonspace = (inT32) (bad_row->fixed_pitch * 0.4);
314
+ bad_row->space_threshold =
315
+ (bad_row->min_space + bad_row->max_nonspace) / 2;
316
+ bad_row->space_size = bad_row->fixed_pitch;
317
+ if (bad_row->char_cells.empty ())
318
+ tune_row_pitch (bad_row, &bad_row->projection,
319
+ bad_row->projection_left, bad_row->projection_right,
320
+ (bad_row->fixed_pitch +
321
+ bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
322
+ sp_sd, mid_cuts, &bad_row->char_cells, FALSE);
323
+ }
324
+ else if (bad_row->pitch_decision == PITCH_CORR_PROP
325
+ || bad_row->pitch_decision == PITCH_DEF_PROP) {
326
+ bad_row->fixed_pitch = 0.0f;
327
+ bad_row->char_cells.clear ();
328
+ }
329
+ }
330
+
331
+
332
+ /**********************************************************************
333
+ * compute_block_pitch
334
+ *
335
+ * Decide whether each block is fixed pitch individually.
336
+ **********************************************************************/
337
+
338
+ void compute_block_pitch( //process each block
339
+ TO_BLOCK *block, //input list
340
+ FCOORD rotation, //for drawing
341
+ inT32 block_index, //block number
342
+ BOOL8 testing_on //correct orientation
343
+ ) {
344
+ TBOX block_box; //bounding box
345
+
346
+ block_box = block->block->bounding_box ();
347
+ if (testing_on && textord_debug_pitch_test) {
348
+ tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
349
+ block_index,
350
+ block_box.left (), block_box.bottom (),
351
+ block_box.right (), block_box.top ());
352
+ }
353
+ block->min_space = (inT32) floor (block->xheight
354
+ * textord_words_default_minspace);
355
+ block->max_nonspace = (inT32) ceil (block->xheight
356
+ * textord_words_default_nonspace);
357
+ block->fixed_pitch = 0.0f;
358
+ block->space_size = (float) block->min_space;
359
+ block->kern_size = (float) block->max_nonspace;
360
+ block->pr_nonsp = block->xheight * words_default_prop_nonspace;
361
+ block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop;
362
+ if (!block->get_rows ()->empty ()) {
363
+ ASSERT_HOST (block->xheight > 0);
364
+ if (textord_repeat_extraction)
365
+ find_repeated_chars(block, textord_show_initial_words &&testing_on);
366
+ #ifndef GRAPHICS_DISABLED
367
+ if (textord_show_initial_words && testing_on)
368
+ //overlap_picture_ops(TRUE);
369
+ ScrollView::Update();
370
+ #endif
371
+ compute_rows_pitch(block,
372
+ block_index,
373
+ textord_debug_pitch_test &&testing_on);
374
+ }
375
+ }
376
+
377
+
378
+ /**********************************************************************
379
+ * compute_rows_pitch
380
+ *
381
+ * Decide whether each row is fixed pitch individually.
382
+ **********************************************************************/
383
+
384
+ BOOL8 compute_rows_pitch( //find line stats
385
+ TO_BLOCK *block, //block to do
386
+ inT32 block_index, //block number
387
+ BOOL8 testing_on //correct orientation
388
+ ) {
389
+ inT32 maxwidth; //of spaces
390
+ TO_ROW *row; //current row
391
+ inT32 row_index; //row number.
392
+ float lower, upper; //cluster thresholds
393
+ TO_ROW_IT row_it = block->get_rows ();
394
+
395
+ row_index = 1;
396
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
397
+ row = row_it.data ();
398
+ ASSERT_HOST (row->xheight > 0);
399
+ row->compute_vertical_projection ();
400
+ maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
401
+ if (row_pitch_stats (row, maxwidth, testing_on)
402
+ && find_row_pitch (row, maxwidth,
403
+ textord_dotmatrix_gap + 1, block, block_index,
404
+ row_index, testing_on)) {
405
+ if (row->fixed_pitch == 0) {
406
+ lower = row->pr_nonsp;
407
+ upper = row->pr_space;
408
+ row->space_size = upper;
409
+ row->kern_size = lower;
410
+ }
411
+ }
412
+ else {
413
+ row->fixed_pitch = 0.0f; //insufficient data
414
+ row->pitch_decision = PITCH_DUNNO;
415
+ }
416
+ row_index++;
417
+ }
418
+ return FALSE;
419
+ }
420
+
421
+
422
+ /**********************************************************************
423
+ * try_doc_fixed
424
+ *
425
+ * Attempt to call the entire document fixed pitch.
426
+ **********************************************************************/
427
+
428
+ BOOL8 try_doc_fixed( //determine pitch
429
+ ICOORD page_tr, //top right
430
+ TO_BLOCK_LIST *port_blocks, //input list
431
+ float gradient //page skew
432
+ ) {
433
+ inT16 master_x; //uniform shifts
434
+ inT16 pitch; //median pitch.
435
+ int x; //profile coord
436
+ int prop_blocks; //correct counts
437
+ int fixed_blocks;
438
+ int total_row_count; //total in page
439
+ //iterator
440
+ TO_BLOCK_IT block_it = port_blocks;
441
+ TO_BLOCK *block; //current block;
442
+ TO_ROW_IT row_it; //row iterator
443
+ TO_ROW *row; //current row
444
+ inT16 projection_left; //edges
445
+ inT16 projection_right;
446
+ inT16 row_left; //edges of row
447
+ inT16 row_right;
448
+ ICOORDELT_LIST *master_cells; //cells for page
449
+ float master_y; //uniform shifts
450
+ float shift_factor; //page skew correction
451
+ float row_shift; //shift for row
452
+ float final_pitch; //output pitch
453
+ float row_y; //baseline
454
+ STATS projection; //entire page
455
+ STATS pitches (0, MAX_ALLOWED_PITCH);
456
+ //for median
457
+ float sp_sd; //space sd
458
+ inT16 mid_cuts; //no of cheap cuts
459
+ float pitch_sd; //sync rating
460
+
461
+ if (block_it.empty ()
462
+ // || block_it.data()==block_it.data_relative(1)
463
+ || !textord_blockndoc_fixed)
464
+ return FALSE;
465
+ shift_factor = gradient / (gradient * gradient + 1);
466
+ row_it.set_to_list (block_it.data ()->get_rows ());
467
+ master_x = row_it.data ()->projection_left;
468
+ master_y = row_it.data ()->baseline.y (master_x);
469
+ projection_left = MAX_INT16;
470
+ projection_right = -MAX_INT16;
471
+ prop_blocks = 0;
472
+ fixed_blocks = 0;
473
+ total_row_count = 0;
474
+
475
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
476
+ block_it.forward ()) {
477
+ block = block_it.data ();
478
+ if (block->block->text_region () != NULL) {
479
+ if (block->block->text_region ()->is_prop ())
480
+ prop_blocks++;
481
+ else
482
+ fixed_blocks++;
483
+ }
484
+ row_it.set_to_list (block->get_rows ());
485
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
486
+ row = row_it.data ();
487
+ total_row_count++;
488
+ if (row->fixed_pitch > 0)
489
+ pitches.add ((inT32) (row->fixed_pitch), 1);
490
+ //find median
491
+ row_y = row->baseline.y (master_x);
492
+ row_left =
493
+ (inT16) (row->projection_left -
494
+ shift_factor * (master_y - row_y));
495
+ row_right =
496
+ (inT16) (row->projection_right -
497
+ shift_factor * (master_y - row_y));
498
+ if (row_left < projection_left)
499
+ projection_left = row_left;
500
+ if (row_right > projection_right)
501
+ projection_right = row_right;
502
+ }
503
+ }
504
+ if (pitches.get_total () == 0)
505
+ return FALSE;
506
+ projection.set_range (projection_left, projection_right);
507
+
508
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
509
+ block_it.forward ()) {
510
+ block = block_it.data ();
511
+ row_it.set_to_list (block->get_rows ());
512
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
513
+ row = row_it.data ();
514
+ row_y = row->baseline.y (master_x);
515
+ row_left =
516
+ (inT16) (row->projection_left -
517
+ shift_factor * (master_y - row_y));
518
+ for (x = row->projection_left; x < row->projection_right;
519
+ x++, row_left++) {
520
+ projection.add (row_left, row->projection.pile_count (x));
521
+ }
522
+ }
523
+ }
524
+
525
+ row_it.set_to_list (block_it.data ()->get_rows ());
526
+ row = row_it.data ();
527
+ #ifndef GRAPHICS_DISABLED
528
+ if (textord_show_page_cuts && to_win != NULL)
529
+ projection.plot (to_win, projection_left,
530
+ row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
531
+ #endif
532
+ final_pitch = pitches.ile (0.5);
533
+ pitch = (inT16) final_pitch;
534
+ pitch_sd =
535
+ tune_row_pitch (row, &projection, projection_left, projection_right,
536
+ pitch * 0.75, final_pitch, sp_sd, mid_cuts,
537
+ &row->char_cells, FALSE);
538
+
539
+ if (textord_debug_pitch_metric)
540
+ tprintf
541
+ ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
542
+ prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
543
+ pitch_sd / total_row_count, pitch_sd / pitch,
544
+ pitch_sd / total_row_count / pitch);
545
+
546
+ #ifndef GRAPHICS_DISABLED
547
+ if (textord_show_page_cuts && to_win != NULL) {
548
+ master_cells = &row->char_cells;
549
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
550
+ block_it.forward ()) {
551
+ block = block_it.data ();
552
+ row_it.set_to_list (block->get_rows ());
553
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
554
+ row_it.forward ()) {
555
+ row = row_it.data ();
556
+ row_y = row->baseline.y (master_x);
557
+ row_shift = shift_factor * (master_y - row_y);
558
+ plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
559
+ }
560
+ }
561
+ }
562
+ #endif
563
+ row->char_cells.clear ();
564
+ return FALSE;
565
+ }
566
+
567
+
568
+ /**********************************************************************
569
+ * try_block_fixed
570
+ *
571
+ * Try to call the entire block fixed.
572
+ **********************************************************************/
573
+
574
+ BOOL8 try_block_fixed( //find line stats
575
+ TO_BLOCK *block, //block to do
576
+ inT32 block_index //block number
577
+ ) {
578
+ return FALSE;
579
+ }
580
+
581
+
582
+ /**********************************************************************
583
+ * try_rows_fixed
584
+ *
585
+ * Decide whether each row is fixed pitch individually.
586
+ **********************************************************************/
587
+
588
+ BOOL8 try_rows_fixed( //find line stats
589
+ TO_BLOCK *block, //block to do
590
+ inT32 block_index, //block number
591
+ BOOL8 testing_on //correct orientation
592
+ ) {
593
+ inT32 maxwidth; //of spaces
594
+ TO_ROW *row; //current row
595
+ inT32 row_index; //row number.
596
+ inT32 def_fixed = 0; //counters
597
+ inT32 def_prop = 0;
598
+ inT32 maybe_fixed = 0;
599
+ inT32 maybe_prop = 0;
600
+ inT32 dunno = 0;
601
+ inT32 corr_fixed = 0;
602
+ inT32 corr_prop = 0;
603
+ float lower, upper; //cluster thresholds
604
+ TO_ROW_IT row_it = block->get_rows ();
605
+
606
+ row_index = 1;
607
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
608
+ row = row_it.data ();
609
+ ASSERT_HOST (row->xheight > 0);
610
+ maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
611
+ if (row->fixed_pitch > 0 && fixed_pitch_row (row, block_index)) {
612
+ if (row->fixed_pitch == 0) {
613
+ lower = row->pr_nonsp;
614
+ upper = row->pr_space;
615
+ row->space_size = upper;
616
+ row->kern_size = lower;
617
+ }
618
+ }
619
+ row_index++;
620
+ }
621
+ count_block_votes(block,
622
+ def_fixed,
623
+ def_prop,
624
+ maybe_fixed,
625
+ maybe_prop,
626
+ corr_fixed,
627
+ corr_prop,
628
+ dunno);
629
+ if (testing_on
630
+ && (textord_debug_pitch_test
631
+ || textord_blocksall_prop || textord_blocksall_fixed)) {
632
+ tprintf ("Initially:");
633
+ print_block_counts(block, block_index);
634
+ }
635
+ if (def_fixed > def_prop * textord_words_veto_power)
636
+ block->pitch_decision = PITCH_DEF_FIXED;
637
+ else if (def_prop > def_fixed * textord_words_veto_power)
638
+ block->pitch_decision = PITCH_DEF_PROP;
639
+ else if (def_fixed > 0 || def_prop > 0)
640
+ block->pitch_decision = PITCH_DUNNO;
641
+ else if (maybe_fixed > maybe_prop * textord_words_veto_power)
642
+ block->pitch_decision = PITCH_MAYBE_FIXED;
643
+ else if (maybe_prop > maybe_fixed * textord_words_veto_power)
644
+ block->pitch_decision = PITCH_MAYBE_PROP;
645
+ else
646
+ block->pitch_decision = PITCH_DUNNO;
647
+ return FALSE;
648
+ }
649
+
650
+
651
+ /**********************************************************************
652
+ * print_block_counts
653
+ *
654
+ * Count up how many rows have what decision and print the results.
655
+ **********************************************************************/
656
+
657
+ void print_block_counts( //find line stats
658
+ TO_BLOCK *block, //block to do
659
+ inT32 block_index //block number
660
+ ) {
661
+ inT32 def_fixed = 0; //counters
662
+ inT32 def_prop = 0;
663
+ inT32 maybe_fixed = 0;
664
+ inT32 maybe_prop = 0;
665
+ inT32 dunno = 0;
666
+ inT32 corr_fixed = 0;
667
+ inT32 corr_prop = 0;
668
+
669
+ count_block_votes(block,
670
+ def_fixed,
671
+ def_prop,
672
+ maybe_fixed,
673
+ maybe_prop,
674
+ corr_fixed,
675
+ corr_prop,
676
+ dunno);
677
+ tprintf ("Block %d has (%d,%d,%d)",
678
+ block_index, def_fixed, maybe_fixed, corr_fixed);
679
+ if ((textord_blocksall_prop
680
+ || (block->block->text_region () != NULL
681
+ && block->block->text_region ()->is_prop ())) && (def_fixed
682
+ || maybe_fixed
683
+ || corr_fixed))
684
+ tprintf (" (Wrongly)");
685
+ tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
686
+ if ((textord_blocksall_fixed
687
+ || (block->block->text_region () != NULL
688
+ && !block->block->text_region ()->is_prop ())) && (def_prop
689
+ || maybe_prop
690
+ || corr_prop))
691
+ tprintf (" (Wrongly)");
692
+ tprintf (" prop, %d dunno\n", dunno);
693
+ }
694
+
695
+
696
+ /**********************************************************************
697
+ * count_block_votes
698
+ *
699
+ * Count the number of rows in the block with each kind of pitch_decision.
700
+ **********************************************************************/
701
+
702
+ void count_block_votes( //find line stats
703
+ TO_BLOCK *block, //block to do
704
+ inT32 &def_fixed, //add to counts
705
+ inT32 &def_prop,
706
+ inT32 &maybe_fixed,
707
+ inT32 &maybe_prop,
708
+ inT32 &corr_fixed,
709
+ inT32 &corr_prop,
710
+ inT32 &dunno) {
711
+ TO_ROW *row; //current row
712
+ TO_ROW_IT row_it = block->get_rows ();
713
+
714
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
715
+ row = row_it.data ();
716
+ switch (row->pitch_decision) {
717
+ case PITCH_DUNNO:
718
+ dunno++;
719
+ break;
720
+ case PITCH_DEF_PROP:
721
+ def_prop++;
722
+ break;
723
+ case PITCH_MAYBE_PROP:
724
+ maybe_prop++;
725
+ break;
726
+ case PITCH_DEF_FIXED:
727
+ def_fixed++;
728
+ break;
729
+ case PITCH_MAYBE_FIXED:
730
+ maybe_fixed++;
731
+ break;
732
+ case PITCH_CORR_PROP:
733
+ corr_prop++;
734
+ break;
735
+ case PITCH_CORR_FIXED:
736
+ corr_fixed++;
737
+ break;
738
+ }
739
+ }
740
+ }
741
+
742
+
743
+ /**********************************************************************
744
+ * row_pitch_stats
745
+ *
746
+ * Decide whether each row is fixed pitch individually.
747
+ **********************************************************************/
748
+
749
+ BOOL8 row_pitch_stats( //find line stats
750
+ TO_ROW *row, //current row
751
+ inT32 maxwidth, //of spaces
752
+ BOOL8 testing_on //correct orientation
753
+ ) {
754
+ BLOBNBOX *blob; //current blob
755
+ int gap_index; //current gap
756
+ inT32 prev_x; //end of prev blob
757
+ inT32 cluster_count; //no of clusters
758
+ inT32 prev_count; //of clusters
759
+ inT32 smooth_factor; //for smoothing stats
760
+ TBOX blob_box; //bounding box
761
+ float lower, upper; //cluster thresholds
762
+ //gap sizes
763
+ float gaps[BLOCK_STATS_CLUSTERS];
764
+ //blobs
765
+ BLOBNBOX_IT blob_it = row->blob_list ();
766
+ STATS gap_stats (0, maxwidth);
767
+ STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
768
+ //clusters
769
+
770
+ smooth_factor =
771
+ (inT32) (row->xheight * textord_wordstats_smooth_factor + 1.5);
772
+ if (!blob_it.empty ()) {
773
+ prev_x = blob_it.data ()->bounding_box ().right ();
774
+ blob_it.forward ();
775
+ while (!blob_it.at_first ()) {
776
+ blob = blob_it.data ();
777
+ if (!blob->joined_to_prev ()) {
778
+ blob_box = blob->bounding_box ();
779
+ if (blob_box.left () - prev_x < maxwidth)
780
+ gap_stats.add (blob_box.left () - prev_x, 1);
781
+ prev_x = blob_box.right ();
782
+ }
783
+ blob_it.forward ();
784
+ }
785
+ }
786
+ if (gap_stats.get_total () == 0) {
787
+ return FALSE;
788
+ }
789
+ cluster_count = 0;
790
+ lower = row->xheight * words_initial_lower;
791
+ upper = row->xheight * words_initial_upper;
792
+ gap_stats.smooth (smooth_factor);
793
+ do {
794
+ prev_count = cluster_count;
795
+ cluster_count = gap_stats.cluster (lower, upper,
796
+ textord_spacesize_ratioprop,
797
+ BLOCK_STATS_CLUSTERS, cluster_stats);
798
+ }
799
+ while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
800
+ if (cluster_count < 1) {
801
+ return FALSE;
802
+ }
803
+ for (gap_index = 0; gap_index < cluster_count; gap_index++)
804
+ gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
805
+ //get medians
806
+ if (testing_on) {
807
+ tprintf ("cluster_count=%d:", cluster_count);
808
+ for (gap_index = 0; gap_index < cluster_count; gap_index++)
809
+ tprintf (" %g(%d)", gaps[gap_index],
810
+ cluster_stats[gap_index + 1].get_total ());
811
+ tprintf ("\n");
812
+ }
813
+ qsort (gaps, cluster_count, sizeof (float), sort_floats2);
814
+
815
+ //Try to find proportional non-space and space for row.
816
+ lower = row->xheight * words_default_prop_nonspace;
817
+ upper = row->xheight * textord_words_min_minspace;
818
+ for (gap_index = 0; gap_index < cluster_count
819
+ && gaps[gap_index] < lower; gap_index++);
820
+ if (gap_index == 0) {
821
+ if (testing_on)
822
+ tprintf ("No clusters below nonspace threshold!!\n");
823
+ if (cluster_count > 1) {
824
+ row->pr_nonsp = gaps[0];
825
+ row->pr_space = gaps[1];
826
+ }
827
+ else {
828
+ row->pr_nonsp = lower;
829
+ row->pr_space = gaps[0];
830
+ }
831
+ }
832
+ else {
833
+ row->pr_nonsp = gaps[gap_index - 1];
834
+ while (gap_index < cluster_count && gaps[gap_index] < upper)
835
+ gap_index++;
836
+ if (gap_index == cluster_count) {
837
+ if (testing_on)
838
+ tprintf ("No clusters above nonspace threshold!!\n");
839
+ row->pr_space = lower * textord_spacesize_ratioprop;
840
+ }
841
+ else
842
+ row->pr_space = gaps[gap_index];
843
+ }
844
+
845
+ //Now try to find the fixed pitch space and non-space.
846
+ upper = row->xheight * words_default_fixed_space;
847
+ for (gap_index = 0; gap_index < cluster_count
848
+ && gaps[gap_index] < upper; gap_index++);
849
+ if (gap_index == 0) {
850
+ if (testing_on)
851
+ tprintf ("No clusters below space threshold!!\n");
852
+ row->fp_nonsp = upper;
853
+ row->fp_space = gaps[0];
854
+ }
855
+ else {
856
+ row->fp_nonsp = gaps[gap_index - 1];
857
+ if (gap_index == cluster_count) {
858
+ if (testing_on)
859
+ tprintf ("No clusters above space threshold!!\n");
860
+ row->fp_space = row->xheight;
861
+ }
862
+ else
863
+ row->fp_space = gaps[gap_index];
864
+ }
865
+ if (testing_on) {
866
+ tprintf
867
+ ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
868
+ row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
869
+ }
870
+ return TRUE; //computed some stats
871
+ }
872
+
873
+
874
+ /**********************************************************************
875
+ * find_row_pitch
876
+ *
877
+ * Check to see if this row could be fixed pitch using the given spacings.
878
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
879
+ * The larger threshold is the word gap threshold.
880
+ **********************************************************************/
881
+
882
+ BOOL8 find_row_pitch( //find lines
883
+ TO_ROW *row, //row to do
884
+ inT32 maxwidth, //max permitted space
885
+ inT32 dm_gap, //ignorable gaps
886
+ TO_BLOCK *block, //block of row
887
+ inT32 block_index, //block_number
888
+ inT32 row_index, //number of row
889
+ BOOL8 testing_on //correct orientation
890
+ ) {
891
+ BOOL8 used_dm_model; //looks lik dot matrix
892
+ float min_space; //estimate threshold
893
+ float non_space; //gap size
894
+ float gap_iqr; //interquartile range
895
+ float pitch_iqr;
896
+ float dm_gap_iqr; //interquartile range
897
+ float dm_pitch_iqr;
898
+ float dm_pitch; //pitch with dm on
899
+ float pitch; //revised estimate
900
+ float initial_pitch; //guess at pitch
901
+ STATS gap_stats (0, maxwidth);
902
+ //centre-centre
903
+ STATS pitch_stats (0, maxwidth);
904
+
905
+ row->fixed_pitch = 0.0f;
906
+ initial_pitch = row->fp_space;
907
+ if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
908
+ initial_pitch = row->xheight;//keep pitch decent
909
+ non_space = row->fp_nonsp;
910
+ if (non_space > initial_pitch)
911
+ non_space = initial_pitch;
912
+ min_space = (initial_pitch + non_space) / 2;
913
+
914
+ if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
915
+ initial_pitch, min_space, TRUE, FALSE, dm_gap)) {
916
+ dm_gap_iqr = 0.0001;
917
+ dm_pitch_iqr = maxwidth * 2.0f;
918
+ dm_pitch = initial_pitch;
919
+ }
920
+ else {
921
+ dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
922
+ dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
923
+ dm_pitch = pitch_stats.ile (0.5);
924
+ }
925
+ gap_stats.clear ();
926
+ pitch_stats.clear ();
927
+ if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
928
+ initial_pitch, min_space, TRUE, FALSE, 0)) {
929
+ gap_iqr = 0.0001;
930
+ pitch_iqr = maxwidth * 3.0f;
931
+ }
932
+ else {
933
+ gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
934
+ pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
935
+ if (testing_on)
936
+ tprintf
937
+ ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
938
+ initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
939
+ initial_pitch = pitch_stats.ile (0.5);
940
+ if (min_space > initial_pitch
941
+ && count_pitch_stats (row, &gap_stats, &pitch_stats,
942
+ initial_pitch, initial_pitch, TRUE, FALSE, 0)) {
943
+ min_space = initial_pitch;
944
+ gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
945
+ pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
946
+ if (testing_on)
947
+ tprintf
948
+ ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
949
+ initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
950
+ initial_pitch = pitch_stats.ile (0.5);
951
+ }
952
+ }
953
+ if (textord_debug_pitch_metric)
954
+ tprintf ("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
955
+ block_index, row_index,
956
+ block->block->text_region () != NULL ?
957
+ (block->block->text_region ()->is_prop ()? 'P' : 'F') : 'X',
958
+ pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
959
+ pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D'
960
+ : (pitch_iqr * dm_gap_iqr <=
961
+ dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
962
+ if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
963
+ row->pitch_decision = PITCH_DUNNO;
964
+ if (textord_debug_pitch_metric)
965
+ tprintf ("\n");
966
+ return FALSE; //insufficient data
967
+ }
968
+ if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
969
+ if (testing_on)
970
+ tprintf
971
+ ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
972
+ pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
973
+ gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
974
+ pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
975
+ pitch = pitch_stats.ile (0.5);
976
+ used_dm_model = FALSE;
977
+ }
978
+ else {
979
+ if (testing_on)
980
+ tprintf
981
+ ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
982
+ pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
983
+ gap_iqr = dm_gap_iqr;
984
+ pitch_iqr = dm_pitch_iqr;
985
+ pitch = dm_pitch;
986
+ used_dm_model = TRUE;
987
+ }
988
+ if (textord_debug_pitch_metric) {
989
+ tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
990
+ pitch_iqr, gap_iqr, pitch);
991
+ tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
992
+ pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
993
+ pitch_iqr < gap_iqr * textord_fpiqr_ratio
994
+ && pitch_iqr < block->xheight * textord_max_pitch_iqr
995
+ && pitch < block->xheight * textord_words_default_maxspace
996
+ ? 'F' : 'P');
997
+ }
998
+ if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
999
+ && pitch_iqr < block->xheight * textord_max_pitch_iqr
1000
+ && pitch < block->xheight * textord_words_default_maxspace)
1001
+ row->pitch_decision = PITCH_MAYBE_FIXED;
1002
+ else
1003
+ row->pitch_decision = PITCH_MAYBE_PROP;
1004
+ row->fixed_pitch = pitch;
1005
+ row->kern_size = gap_stats.ile (0.5);
1006
+ row->min_space = (inT32) (row->fixed_pitch + non_space) / 2;
1007
+ if (row->min_space > row->fixed_pitch)
1008
+ row->min_space = (inT32) row->fixed_pitch;
1009
+ row->max_nonspace = row->min_space;
1010
+ row->space_size = row->fixed_pitch;
1011
+ row->space_threshold = (row->max_nonspace + row->min_space) / 2;
1012
+ row->used_dm_model = used_dm_model;
1013
+ return TRUE;
1014
+ }
1015
+
1016
+
1017
+ /**********************************************************************
1018
+ * fixed_pitch_row
1019
+ *
1020
+ * Check to see if this row could be fixed pitch using the given spacings.
1021
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
1022
+ * The larger threshold is the word gap threshold.
1023
+ **********************************************************************/
1024
+
1025
+ BOOL8 fixed_pitch_row( //find lines
1026
+ TO_ROW *row, //row to do
1027
+ inT32 block_index //block_number
1028
+ ) {
1029
+ const char *res_string; //pitch result
1030
+ inT16 mid_cuts; //no of cheap cuts
1031
+ float non_space; //gap size
1032
+ float pitch_sd; //error on pitch
1033
+ float sp_sd; //space sd
1034
+
1035
+ non_space = row->fp_nonsp;
1036
+ if (non_space > row->fixed_pitch)
1037
+ non_space = row->fixed_pitch;
1038
+ if (textord_all_prop) {
1039
+ // Set the decision to definitely proportional.
1040
+ pitch_sd = textord_words_def_prop * row->fixed_pitch;
1041
+ row->pitch_decision = PITCH_DEF_PROP;
1042
+ } else {
1043
+ pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
1044
+ row->projection_right,
1045
+ (row->fixed_pitch + non_space * 3) / 4,
1046
+ row->fixed_pitch, sp_sd, mid_cuts,
1047
+ &row->char_cells,
1048
+ block_index == textord_debug_block);
1049
+ if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1050
+ && ((pitsync_linear_version & 3) < 3
1051
+ || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
1052
+ || sp_sd > 20
1053
+ || (pitch_sd == 0 && sp_sd > 10))))) {
1054
+ if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1055
+ && !row->all_caps
1056
+ && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1057
+ row->pitch_decision = PITCH_DEF_FIXED;
1058
+ else
1059
+ row->pitch_decision = PITCH_MAYBE_FIXED;
1060
+ }
1061
+ else if ((pitsync_linear_version & 3) < 3
1062
+ || sp_sd > 20
1063
+ || mid_cuts > 0
1064
+ || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
1065
+ if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
1066
+ row->pitch_decision = PITCH_MAYBE_PROP;
1067
+ else
1068
+ row->pitch_decision = PITCH_DEF_PROP;
1069
+ }
1070
+ else
1071
+ row->pitch_decision = PITCH_DUNNO;
1072
+ }
1073
+
1074
+ if (textord_debug_pitch_metric) {
1075
+ res_string = "??";
1076
+ switch (row->pitch_decision) {
1077
+ case PITCH_DEF_PROP:
1078
+ res_string = "DP";
1079
+ break;
1080
+ case PITCH_MAYBE_PROP:
1081
+ res_string = "MP";
1082
+ break;
1083
+ case PITCH_DEF_FIXED:
1084
+ res_string = "DF";
1085
+ break;
1086
+ case PITCH_MAYBE_FIXED:
1087
+ res_string = "MF";
1088
+ default:
1089
+ res_string = "??";
1090
+ }
1091
+ tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
1092
+ pitch_sd / row->fixed_pitch, sp_sd, res_string);
1093
+ }
1094
+ return TRUE;
1095
+ }
1096
+
1097
+
1098
+ /**********************************************************************
1099
+ * count_pitch_stats
1100
+ *
1101
+ * Count up the gap and pitch stats on the block to see if it is fixed pitch.
1102
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
1103
+ * The larger threshold is the word gap threshold.
1104
+ * The return value indicates whether there were any decent values to use.
1105
+ **********************************************************************/
1106
+
1107
+ BOOL8 count_pitch_stats( //find lines
1108
+ TO_ROW *row, //row to do
1109
+ STATS *gap_stats, //blob gaps
1110
+ STATS *pitch_stats, //centre-centre stats
1111
+ float initial_pitch, //guess at pitch
1112
+ float min_space, //estimate space size
1113
+ BOOL8 ignore_outsize, //discard big objects
1114
+ BOOL8 split_outsize, //split big objects
1115
+ inT32 dm_gap //ignorable gaps
1116
+ ) {
1117
+ BOOL8 prev_valid; //not word broken
1118
+ BLOBNBOX *blob; //current blob
1119
+ //blobs
1120
+ BLOBNBOX_IT blob_it = row->blob_list ();
1121
+ inT32 prev_right; //end of prev blob
1122
+ inT32 prev_centre; //centre of previous blob
1123
+ inT32 x_centre; //centre of this blob
1124
+ inT32 blob_width; //width of blob
1125
+ inT32 width_units; //no of widths in blob
1126
+ float width; //blob width
1127
+ TBOX blob_box; //bounding box
1128
+ TBOX joined_box; //of super blob
1129
+
1130
+ gap_stats->clear ();
1131
+ pitch_stats->clear ();
1132
+ if (blob_it.empty ())
1133
+ return FALSE;
1134
+ prev_valid = FALSE;
1135
+ prev_centre = 0;
1136
+ prev_right = 0; //stop complier warning
1137
+ joined_box = blob_it.data ()->bounding_box ();
1138
+ do {
1139
+ blob_it.forward ();
1140
+ blob = blob_it.data ();
1141
+ if (!blob->joined_to_prev ()) {
1142
+ blob_box = blob->bounding_box ();
1143
+ if ((blob_box.left () - joined_box.right () < dm_gap
1144
+ && !blob_it.at_first ())
1145
+ || (blob->cblob () == NULL && blob->blob () == NULL))
1146
+ joined_box += blob_box; //merge blobs
1147
+ else {
1148
+ blob_width = joined_box.width ();
1149
+ if (split_outsize) {
1150
+ width_units =
1151
+ (inT32) floor ((float) blob_width / initial_pitch + 0.5);
1152
+ if (width_units < 1)
1153
+ width_units = 1;
1154
+ width_units--;
1155
+ }
1156
+ else if (ignore_outsize) {
1157
+ width = (float) blob_width / initial_pitch;
1158
+ width_units = width < 1 + words_default_fixed_limit
1159
+ && width > 1 - words_default_fixed_limit ? 0 : -1;
1160
+ }
1161
+ else
1162
+ width_units = 0; //everything in
1163
+ x_centre = (inT32) (joined_box.left ()
1164
+ + (blob_width -
1165
+ width_units * initial_pitch) / 2);
1166
+ if (prev_valid && width_units >= 0) {
1167
+ // if (width_units>0)
1168
+ // {
1169
+ // tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
1170
+ // width_units,blob_width,x_centre,x_centre-prev_centre);
1171
+ // }
1172
+ gap_stats->add (joined_box.left () - prev_right, 1);
1173
+ pitch_stats->add (x_centre - prev_centre, 1);
1174
+ }
1175
+ prev_centre = (inT32) (x_centre + width_units * initial_pitch);
1176
+ prev_right = joined_box.right ();
1177
+ prev_valid = blob_box.left () - joined_box.right () < min_space;
1178
+ prev_valid = prev_valid && width_units >= 0;
1179
+ joined_box = blob_box;
1180
+ }
1181
+ }
1182
+ }
1183
+ while (!blob_it.at_first ());
1184
+ return gap_stats->get_total () >= 3;
1185
+ }
1186
+
1187
+
1188
+ /**********************************************************************
1189
+ * tune_row_pitch
1190
+ *
1191
+ * Use a dp algorithm to fit the character cells and return the sd of
1192
+ * the cell size over the row.
1193
+ **********************************************************************/
1194
+
1195
+ float tune_row_pitch( //find fp cells
1196
+ TO_ROW *row, //row to do
1197
+ STATS *projection, //vertical projection
1198
+ inT16 projection_left, //edge of projection
1199
+ inT16 projection_right, //edge of projection
1200
+ float space_size, //size of blank
1201
+ float &initial_pitch, //guess at pitch
1202
+ float &best_sp_sd, //space sd
1203
+ inT16 &best_mid_cuts, //no of cheap cuts
1204
+ ICOORDELT_LIST *best_cells, //row cells
1205
+ BOOL8 testing_on //inidividual words
1206
+ ) {
1207
+ int pitch_delta; //offset pitch
1208
+ inT16 mid_cuts; //cheap cuts
1209
+ float pitch_sd; //current sd
1210
+ float best_sd; //best result
1211
+ float best_pitch; //pitch for best result
1212
+ float initial_sd; //starting error
1213
+ float sp_sd; //space sd
1214
+ ICOORDELT_LIST test_cells; //row cells
1215
+ ICOORDELT_IT best_it; //start of best list
1216
+
1217
+ if (textord_fast_pitch_test)
1218
+ return tune_row_pitch2 (row, projection, projection_left,
1219
+ projection_right, space_size, initial_pitch,
1220
+ best_sp_sd,
1221
+ //space sd
1222
+ best_mid_cuts, best_cells, testing_on);
1223
+ if (textord_disable_pitch_test) {
1224
+ best_sp_sd = initial_pitch;
1225
+ return initial_pitch;
1226
+ }
1227
+ initial_sd =
1228
+ compute_pitch_sd(row,
1229
+ projection,
1230
+ projection_left,
1231
+ projection_right,
1232
+ space_size,
1233
+ initial_pitch,
1234
+ best_sp_sd,
1235
+ best_mid_cuts,
1236
+ best_cells,
1237
+ testing_on);
1238
+ best_sd = initial_sd;
1239
+ best_pitch = initial_pitch;
1240
+ if (testing_on)
1241
+ tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1242
+ for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1243
+ pitch_sd =
1244
+ compute_pitch_sd (row, projection, projection_left, projection_right,
1245
+ space_size, initial_pitch + pitch_delta, sp_sd,
1246
+ mid_cuts, &test_cells, testing_on);
1247
+ if (testing_on)
1248
+ tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1249
+ pitch_sd);
1250
+ if (pitch_sd < best_sd) {
1251
+ best_sd = pitch_sd;
1252
+ best_mid_cuts = mid_cuts;
1253
+ best_sp_sd = sp_sd;
1254
+ best_pitch = initial_pitch + pitch_delta;
1255
+ best_cells->clear ();
1256
+ best_it.set_to_list (best_cells);
1257
+ best_it.add_list_after (&test_cells);
1258
+ }
1259
+ else
1260
+ test_cells.clear ();
1261
+ if (pitch_sd > initial_sd)
1262
+ break; //getting worse
1263
+ }
1264
+ for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1265
+ pitch_sd =
1266
+ compute_pitch_sd (row, projection, projection_left, projection_right,
1267
+ space_size, initial_pitch - pitch_delta, sp_sd,
1268
+ mid_cuts, &test_cells, testing_on);
1269
+ if (testing_on)
1270
+ tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1271
+ pitch_sd);
1272
+ if (pitch_sd < best_sd) {
1273
+ best_sd = pitch_sd;
1274
+ best_mid_cuts = mid_cuts;
1275
+ best_sp_sd = sp_sd;
1276
+ best_pitch = initial_pitch - pitch_delta;
1277
+ best_cells->clear ();
1278
+ best_it.set_to_list (best_cells);
1279
+ best_it.add_list_after (&test_cells);
1280
+ }
1281
+ else
1282
+ test_cells.clear ();
1283
+ if (pitch_sd > initial_sd)
1284
+ break;
1285
+ }
1286
+ initial_pitch = best_pitch;
1287
+
1288
+ if (textord_debug_pitch_metric)
1289
+ print_pitch_sd(row,
1290
+ projection,
1291
+ projection_left,
1292
+ projection_right,
1293
+ space_size,
1294
+ best_pitch);
1295
+
1296
+ return best_sd;
1297
+ }
1298
+
1299
+
1300
+ /**********************************************************************
1301
+ * tune_row_pitch
1302
+ *
1303
+ * Use a dp algorithm to fit the character cells and return the sd of
1304
+ * the cell size over the row.
1305
+ **********************************************************************/
1306
+
1307
+ float tune_row_pitch2( //find fp cells
1308
+ TO_ROW *row, //row to do
1309
+ STATS *projection, //vertical projection
1310
+ inT16 projection_left, //edge of projection
1311
+ inT16 projection_right, //edge of projection
1312
+ float space_size, //size of blank
1313
+ float &initial_pitch, //guess at pitch
1314
+ float &best_sp_sd, //space sd
1315
+ inT16 &best_mid_cuts, //no of cheap cuts
1316
+ ICOORDELT_LIST *best_cells, //row cells
1317
+ BOOL8 testing_on //inidividual words
1318
+ ) {
1319
+ int pitch_delta; //offset pitch
1320
+ inT16 pixel; //pixel coord
1321
+ inT16 best_pixel; //pixel coord
1322
+ inT16 best_delta; //best pitch
1323
+ inT16 best_pitch; //best pitch
1324
+ inT16 start; //of good range
1325
+ inT16 end; //of good range
1326
+ inT32 best_count; //lowest sum
1327
+ float best_sd; //best result
1328
+ STATS *sum_proj; //summed projection
1329
+
1330
+ best_sp_sd = initial_pitch;
1331
+
1332
+ if (textord_disable_pitch_test) {
1333
+ return initial_pitch;
1334
+ }
1335
+ sum_proj = new STATS[textord_pitch_range * 2 + 1];
1336
+ if (sum_proj == NULL)
1337
+ return initial_pitch;
1338
+ best_pitch = (inT32) initial_pitch;
1339
+
1340
+ for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1341
+ pitch_delta++)
1342
+ sum_proj[textord_pitch_range + pitch_delta].set_range (0,
1343
+ best_pitch +
1344
+ pitch_delta + 1);
1345
+ for (pixel = projection_left; pixel <= projection_right; pixel++) {
1346
+ for (pitch_delta = -textord_pitch_range;
1347
+ pitch_delta <= textord_pitch_range; pitch_delta++)
1348
+ sum_proj[textord_pitch_range +
1349
+ pitch_delta].add ((pixel - projection_left) % (best_pitch +
1350
+ pitch_delta),
1351
+ projection->pile_count (pixel));
1352
+ }
1353
+ best_count = sum_proj[textord_pitch_range].pile_count (0);
1354
+ best_delta = 0;
1355
+ best_pixel = 0;
1356
+ for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1357
+ pitch_delta++) {
1358
+ for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1359
+ if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
1360
+ < best_count) {
1361
+ best_count =
1362
+ sum_proj[textord_pitch_range +
1363
+ pitch_delta].pile_count (pixel);
1364
+ best_delta = pitch_delta;
1365
+ best_pixel = pixel;
1366
+ }
1367
+ }
1368
+ }
1369
+ if (testing_on)
1370
+ tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1371
+ initial_pitch, best_delta, best_count);
1372
+ best_pitch += best_delta;
1373
+ initial_pitch = best_pitch;
1374
+ best_count++;
1375
+ best_count += best_count;
1376
+ for (start = best_pixel - 2; start > best_pixel - best_pitch
1377
+ && sum_proj[textord_pitch_range +
1378
+ best_delta].pile_count (start % best_pitch) <= best_count;
1379
+ start--);
1380
+ for (end = best_pixel + 2;
1381
+ end < best_pixel + best_pitch
1382
+ && sum_proj[textord_pitch_range +
1383
+ best_delta].pile_count (end % best_pitch) <= best_count;
1384
+ end++);
1385
+
1386
+ best_sd =
1387
+ compute_pitch_sd(row,
1388
+ projection,
1389
+ projection_left,
1390
+ projection_right,
1391
+ space_size,
1392
+ initial_pitch,
1393
+ best_sp_sd,
1394
+ best_mid_cuts,
1395
+ best_cells,
1396
+ testing_on,
1397
+ start,
1398
+ end);
1399
+ if (testing_on)
1400
+ tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
1401
+ best_sd);
1402
+
1403
+ if (textord_debug_pitch_metric)
1404
+ print_pitch_sd(row,
1405
+ projection,
1406
+ projection_left,
1407
+ projection_right,
1408
+ space_size,
1409
+ initial_pitch);
1410
+
1411
+ delete[]sum_proj;
1412
+
1413
+ return best_sd;
1414
+ }
1415
+
1416
+
1417
+ /**********************************************************************
1418
+ * compute_pitch_sd
1419
+ *
1420
+ * Use a dp algorithm to fit the character cells and return the sd of
1421
+ * the cell size over the row.
1422
+ **********************************************************************/
1423
+
1424
+ float compute_pitch_sd( //find fp cells
1425
+ TO_ROW *row, //row to do
1426
+ STATS *projection, //vertical projection
1427
+ inT16 projection_left, //edge
1428
+ inT16 projection_right, //edge
1429
+ float space_size, //size of blank
1430
+ float initial_pitch, //guess at pitch
1431
+ float &sp_sd, //space sd
1432
+ inT16 &mid_cuts, //no of free cuts
1433
+ ICOORDELT_LIST *row_cells, //list of chop pts
1434
+ BOOL8 testing_on, //inidividual words
1435
+ inT16 start, //start of good range
1436
+ inT16 end //end of good range
1437
+ ) {
1438
+ inT16 occupation; //no of cells in word.
1439
+ //blobs
1440
+ BLOBNBOX_IT blob_it = row->blob_list ();
1441
+ BLOBNBOX_IT start_it; //start of word
1442
+ BLOBNBOX_IT plot_it; //for plotting
1443
+ inT16 blob_count; //no of blobs
1444
+ TBOX blob_box; //bounding box
1445
+ TBOX prev_box; //of super blob
1446
+ inT32 prev_right; //of word sync
1447
+ int scale_factor; //on scores for big words
1448
+ inT32 sp_count; //spaces
1449
+ FPSEGPT_LIST seg_list; //char cells
1450
+ FPSEGPT_IT seg_it; //iterator
1451
+ inT16 segpos; //position of segment
1452
+ inT16 cellpos; //previous cell boundary
1453
+ //iterator
1454
+ ICOORDELT_IT cell_it = row_cells;
1455
+ ICOORDELT *cell; //new cell
1456
+ double sqsum; //sum of squares
1457
+ double spsum; //of spaces
1458
+ double sp_var; //space error
1459
+ double word_sync; //result for word
1460
+ inT32 total_count; //total blobs
1461
+
1462
+ if ((pitsync_linear_version & 3) > 1) {
1463
+ word_sync = compute_pitch_sd2 (row, projection, projection_left,
1464
+ projection_right, initial_pitch,
1465
+ occupation, mid_cuts, row_cells,
1466
+ testing_on, start, end);
1467
+ sp_sd = occupation;
1468
+ return word_sync;
1469
+ }
1470
+ mid_cuts = 0;
1471
+ cellpos = 0;
1472
+ total_count = 0;
1473
+ sqsum = 0;
1474
+ sp_count = 0;
1475
+ spsum = 0;
1476
+ prev_right = -1;
1477
+ if (blob_it.empty ())
1478
+ return space_size * 10;
1479
+ #ifndef GRAPHICS_DISABLED
1480
+ if (testing_on && to_win > 0) {
1481
+ blob_box = blob_it.data ()->bounding_box ();
1482
+ projection->plot (to_win, projection_left,
1483
+ row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1484
+ }
1485
+ #endif
1486
+ start_it = blob_it;
1487
+ blob_count = 0;
1488
+ blob_box = box_next (&blob_it);//first blob
1489
+ blob_it.mark_cycle_pt ();
1490
+ do {
1491
+ for (; blob_count > 0; blob_count--)
1492
+ box_next(&start_it);
1493
+ do {
1494
+ prev_box = blob_box;
1495
+ blob_count++;
1496
+ blob_box = box_next (&blob_it);
1497
+ }
1498
+ while (!blob_it.cycled_list ()
1499
+ && blob_box.left () - prev_box.right () < space_size);
1500
+ plot_it = start_it;
1501
+ if (pitsync_linear_version & 3)
1502
+ word_sync =
1503
+ check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
1504
+ projection, projection_left, projection_right,
1505
+ row->xheight * textord_projection_scale,
1506
+ occupation, &seg_list, start, end);
1507
+ else
1508
+ word_sync =
1509
+ check_pitch_sync (&start_it, blob_count, (inT16) initial_pitch, 2,
1510
+ projection, &seg_list);
1511
+ if (testing_on) {
1512
+ tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
1513
+ prev_box.right (), prev_box.top (),
1514
+ seg_list.length () - 1, word_sync);
1515
+ seg_it.set_to_list (&seg_list);
1516
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1517
+ seg_it.forward ()) {
1518
+ if (seg_it.data ()->faked)
1519
+ tprintf ("(F)");
1520
+ tprintf ("%d, ", seg_it.data ()->position ());
1521
+ // tprintf("C=%g, s=%g, sq=%g\n",
1522
+ // seg_it.data()->cost_function(),
1523
+ // seg_it.data()->sum(),
1524
+ // seg_it.data()->squares());
1525
+ }
1526
+ tprintf ("\n");
1527
+ }
1528
+ #ifndef GRAPHICS_DISABLED
1529
+ if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
1530
+ plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1531
+ #endif
1532
+ seg_it.set_to_list (&seg_list);
1533
+ if (prev_right >= 0) {
1534
+ sp_var = seg_it.data ()->position () - prev_right;
1535
+ sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1536
+ sp_var *= sp_var;
1537
+ spsum += sp_var;
1538
+ sp_count++;
1539
+ }
1540
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1541
+ segpos = seg_it.data ()->position ();
1542
+ if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1543
+ //big gap
1544
+ while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1545
+ cell = new ICOORDELT (cellpos + (inT16) initial_pitch, 0);
1546
+ cell_it.add_after_then_move (cell);
1547
+ cellpos += (inT16) initial_pitch;
1548
+ }
1549
+ //make new one
1550
+ cell = new ICOORDELT (segpos, 0);
1551
+ cell_it.add_after_then_move (cell);
1552
+ cellpos = segpos;
1553
+ }
1554
+ else if (segpos > cellpos - initial_pitch / 2) {
1555
+ cell = cell_it.data ();
1556
+ //average positions
1557
+ cell->set_x ((cellpos + segpos) / 2);
1558
+ cellpos = cell->x ();
1559
+ }
1560
+ }
1561
+ seg_it.move_to_last ();
1562
+ prev_right = seg_it.data ()->position ();
1563
+ if (textord_pitch_scalebigwords) {
1564
+ scale_factor = (seg_list.length () - 2) / 2;
1565
+ if (scale_factor < 1)
1566
+ scale_factor = 1;
1567
+ }
1568
+ else
1569
+ scale_factor = 1;
1570
+ sqsum += word_sync * scale_factor;
1571
+ total_count += (seg_list.length () - 1) * scale_factor;
1572
+ seg_list.clear ();
1573
+ }
1574
+ while (!blob_it.cycled_list ());
1575
+ sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1576
+ return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1577
+ }
1578
+
1579
+
1580
+ /**********************************************************************
1581
+ * compute_pitch_sd2
1582
+ *
1583
+ * Use a dp algorithm to fit the character cells and return the sd of
1584
+ * the cell size over the row.
1585
+ **********************************************************************/
1586
+
1587
+ float compute_pitch_sd2( //find fp cells
1588
+ TO_ROW *row, //row to do
1589
+ STATS *projection, //vertical projection
1590
+ inT16 projection_left, //edge
1591
+ inT16 projection_right, //edge
1592
+ float initial_pitch, //guess at pitch
1593
+ inT16 &occupation, //no of occupied cells
1594
+ inT16 &mid_cuts, //no of free cuts
1595
+ ICOORDELT_LIST *row_cells, //list of chop pts
1596
+ BOOL8 testing_on, //inidividual words
1597
+ inT16 start, //start of good range
1598
+ inT16 end //end of good range
1599
+ ) {
1600
+ //blobs
1601
+ BLOBNBOX_IT blob_it = row->blob_list ();
1602
+ BLOBNBOX_IT plot_it;
1603
+ inT16 blob_count; //no of blobs
1604
+ TBOX blob_box; //bounding box
1605
+ FPSEGPT_LIST seg_list; //char cells
1606
+ FPSEGPT_IT seg_it; //iterator
1607
+ inT16 segpos; //position of segment
1608
+ //iterator
1609
+ ICOORDELT_IT cell_it = row_cells;
1610
+ ICOORDELT *cell; //new cell
1611
+ double word_sync; //result for word
1612
+
1613
+ mid_cuts = 0;
1614
+ if (blob_it.empty ()) {
1615
+ occupation = 0;
1616
+ return initial_pitch * 10;
1617
+ }
1618
+ #ifndef GRAPHICS_DISABLED
1619
+ if (testing_on && to_win > 0) {
1620
+ projection->plot (to_win, projection_left,
1621
+ row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1622
+ }
1623
+ #endif
1624
+ blob_count = 0;
1625
+ blob_it.mark_cycle_pt ();
1626
+ do {
1627
+ //first blob
1628
+ blob_box = box_next (&blob_it);
1629
+ blob_count++;
1630
+ }
1631
+ while (!blob_it.cycled_list ());
1632
+ plot_it = blob_it;
1633
+ word_sync = check_pitch_sync2 (&blob_it, blob_count, (inT16) initial_pitch,
1634
+ 2, projection, projection_left,
1635
+ projection_right,
1636
+ row->xheight * textord_projection_scale,
1637
+ occupation, &seg_list, start, end);
1638
+ if (testing_on) {
1639
+ tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
1640
+ blob_box.right (), blob_box.top (),
1641
+ seg_list.length () - 1, word_sync);
1642
+ seg_it.set_to_list (&seg_list);
1643
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1644
+ if (seg_it.data ()->faked)
1645
+ tprintf ("(F)");
1646
+ tprintf ("%d, ", seg_it.data ()->position ());
1647
+ // tprintf("C=%g, s=%g, sq=%g\n",
1648
+ // seg_it.data()->cost_function(),
1649
+ // seg_it.data()->sum(),
1650
+ // seg_it.data()->squares());
1651
+ }
1652
+ tprintf ("\n");
1653
+ }
1654
+ #ifndef GRAPHICS_DISABLED
1655
+ if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
1656
+ plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1657
+ #endif
1658
+ seg_it.set_to_list (&seg_list);
1659
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1660
+ segpos = seg_it.data ()->position ();
1661
+ //make new one
1662
+ cell = new ICOORDELT (segpos, 0);
1663
+ cell_it.add_after_then_move (cell);
1664
+ if (seg_it.at_last ())
1665
+ mid_cuts = seg_it.data ()->cheap_cuts ();
1666
+ }
1667
+ seg_list.clear ();
1668
+ return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
1669
+ }
1670
+
1671
+
1672
+ /**********************************************************************
1673
+ * print_pitch_sd
1674
+ *
1675
+ * Use a dp algorithm to fit the character cells and return the sd of
1676
+ * the cell size over the row.
1677
+ **********************************************************************/
1678
+
1679
+ void print_pitch_sd( //find fp cells
1680
+ TO_ROW *row, //row to do
1681
+ STATS *projection, //vertical projection
1682
+ inT16 projection_left, //edges //size of blank
1683
+ inT16 projection_right,
1684
+ float space_size,
1685
+ float initial_pitch //guess at pitch
1686
+ ) {
1687
+ const char *res2; //pitch result
1688
+ inT16 occupation; //used cells
1689
+ float sp_sd; //space sd
1690
+ //blobs
1691
+ BLOBNBOX_IT blob_it = row->blob_list ();
1692
+ BLOBNBOX_IT start_it; //start of word
1693
+ BLOBNBOX_IT row_start; //start of row
1694
+ inT16 blob_count; //no of blobs
1695
+ inT16 total_blob_count; //total blobs in line
1696
+ TBOX blob_box; //bounding box
1697
+ TBOX prev_box; //of super blob
1698
+ inT32 prev_right; //of word sync
1699
+ int scale_factor; //on scores for big words
1700
+ inT32 sp_count; //spaces
1701
+ FPSEGPT_LIST seg_list; //char cells
1702
+ FPSEGPT_IT seg_it; //iterator
1703
+ double sqsum; //sum of squares
1704
+ double spsum; //of spaces
1705
+ double sp_var; //space error
1706
+ double word_sync; //result for word
1707
+ double total_count; //total cuts
1708
+
1709
+ if (blob_it.empty ())
1710
+ return;
1711
+ row_start = blob_it;
1712
+ total_blob_count = 0;
1713
+
1714
+ total_count = 0;
1715
+ sqsum = 0;
1716
+ sp_count = 0;
1717
+ spsum = 0;
1718
+ prev_right = -1;
1719
+ blob_it = row_start;
1720
+ start_it = blob_it;
1721
+ blob_count = 0;
1722
+ blob_box = box_next (&blob_it);//first blob
1723
+ blob_it.mark_cycle_pt ();
1724
+ do {
1725
+ for (; blob_count > 0; blob_count--)
1726
+ box_next(&start_it);
1727
+ do {
1728
+ prev_box = blob_box;
1729
+ blob_count++;
1730
+ blob_box = box_next (&blob_it);
1731
+ }
1732
+ while (!blob_it.cycled_list ()
1733
+ && blob_box.left () - prev_box.right () < space_size);
1734
+ word_sync =
1735
+ check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
1736
+ projection, projection_left, projection_right,
1737
+ row->xheight * textord_projection_scale,
1738
+ occupation, &seg_list, 0, 0);
1739
+ total_blob_count += blob_count;
1740
+ seg_it.set_to_list (&seg_list);
1741
+ if (prev_right >= 0) {
1742
+ sp_var = seg_it.data ()->position () - prev_right;
1743
+ sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1744
+ sp_var *= sp_var;
1745
+ spsum += sp_var;
1746
+ sp_count++;
1747
+ }
1748
+ seg_it.move_to_last ();
1749
+ prev_right = seg_it.data ()->position ();
1750
+ if (textord_pitch_scalebigwords) {
1751
+ scale_factor = (seg_list.length () - 2) / 2;
1752
+ if (scale_factor < 1)
1753
+ scale_factor = 1;
1754
+ }
1755
+ else
1756
+ scale_factor = 1;
1757
+ sqsum += word_sync * scale_factor;
1758
+ total_count += (seg_list.length () - 1) * scale_factor;
1759
+ seg_list.clear ();
1760
+ }
1761
+ while (!blob_it.cycled_list ());
1762
+ sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1763
+ word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1764
+ tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1765
+ word_sync, word_sync / initial_pitch, sp_sd,
1766
+ word_sync < textord_words_pitchsd_threshold * initial_pitch
1767
+ ? 'F' : 'P');
1768
+
1769
+ start_it = row_start;
1770
+ blob_it = row_start;
1771
+ word_sync =
1772
+ check_pitch_sync2 (&blob_it, total_blob_count, (inT16) initial_pitch, 2,
1773
+ projection, projection_left, projection_right,
1774
+ row->xheight * textord_projection_scale, occupation,
1775
+ &seg_list, 0, 0);
1776
+ if (occupation > 1)
1777
+ word_sync /= occupation;
1778
+ word_sync = sqrt (word_sync);
1779
+
1780
+ #ifndef GRAPHICS_DISABLED
1781
+ if (textord_show_row_cuts && to_win != NULL)
1782
+ plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
1783
+ #endif
1784
+ seg_list.clear ();
1785
+ if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
1786
+ if (word_sync < textord_words_def_fixed * initial_pitch
1787
+ && !row->all_caps)
1788
+ res2 = "DF";
1789
+ else
1790
+ res2 = "MF";
1791
+ }
1792
+ else
1793
+ res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
1794
+ tprintf
1795
+ ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1796
+ word_sync, word_sync / initial_pitch,
1797
+ word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
1798
+ occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
1799
+ }
1800
+
1801
+
1802
+ /**********************************************************************
1803
+ * sort_floats
1804
+ *
1805
+ * qsort function to sort 2 floats.
1806
+ **********************************************************************/
1807
+
1808
+ int sort_floats2( //qsort function
1809
+ const void *arg1, //ptrs to floats
1810
+ const void *arg2) {
1811
+ float diff; //difference
1812
+
1813
+ diff = *((float *) arg1) - *((float *) arg2);
1814
+ if (diff > 0)
1815
+ return 1;
1816
+ else if (diff < 0)
1817
+ return -1;
1818
+ else
1819
+ return 0;
1820
+ }
1821
+
1822
+
1823
+ /**********************************************************************
1824
+ * find_repeated_chars
1825
+ *
1826
+ * Find 4 or more adjacent chars which are the same and put them
1827
+ * into words in advance of fixed pitch checking and word generation.
1828
+ **********************************************************************/
1829
+
1830
+ void find_repeated_chars( //search for equal chars
1831
+ TO_BLOCK *block, //block to search
1832
+ BOOL8 testing_on //dbug mode
1833
+ ) {
1834
+ BOOL8 bol; //start of line
1835
+ TO_ROW *row; //current row
1836
+ TO_ROW_IT row_it = block->get_rows ();
1837
+ ROW *real_row; //output row
1838
+ WERD_IT word_it; //new words
1839
+ WERD *word; //new word
1840
+ BLOBNBOX *bblob; //current blob
1841
+ BLOBNBOX *nextblob; //neighbour to compare
1842
+ BLOBNBOX_IT box_it; //iterator
1843
+ BLOBNBOX_IT search_it; //forward search
1844
+ inT32 blobcount; //no of neighbours
1845
+ inT32 matched_blobcount; //no of matches
1846
+ inT32 blobindex; //in row
1847
+ inT32 row_length; //blobs in row
1848
+ inT32 width_change; //max width change
1849
+ inT32 blob_width; //required blob width
1850
+ inT32 space_width; //required gap width
1851
+ inT32 prev_right; //right edge of last blob
1852
+ float rating; //match rating
1853
+ PBLOB *pblob1; //polygonal blob
1854
+ PBLOB *pblob2; //second blob
1855
+ TBOX word_box; //for plotting
1856
+
1857
+ if (row_it.empty ())
1858
+ return; //empty block
1859
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1860
+ row = row_it.data ();
1861
+ box_it.set_to_list (row->blob_list ());
1862
+ row_length = row->blob_list ()->length ();
1863
+ blobindex = 0;
1864
+ word_it.set_to_list (&row->rep_words);
1865
+ bol = TRUE;
1866
+ if (!box_it.empty ()) {
1867
+ real_row = new ROW (row,
1868
+ (inT16) block->kern_size,
1869
+ (inT16) block->space_size);
1870
+ do {
1871
+ bblob = box_it.data ();
1872
+ blobcount = 1;
1873
+ search_it = box_it;
1874
+ search_it.forward ();
1875
+ matched_blobcount = 1;
1876
+ width_change = MAX_INT16;
1877
+ blob_width = 0;
1878
+ space_width = 0;
1879
+ prev_right = bblob->bounding_box ().right ();
1880
+ if (bblob->bounding_box ().height () * 2 < row->xheight
1881
+ && !bblob->joined_to_prev ()
1882
+ && (bblob->blob () != NULL || bblob->cblob () != NULL)) {
1883
+ if (bblob->cblob () != NULL)
1884
+ pblob1 = new PBLOB (bblob->cblob (), row->xheight);
1885
+ else
1886
+ pblob1 = bblob->blob ();
1887
+
1888
+ rating = 0.0f;
1889
+ while (rating < textord_repeat_rating
1890
+ && blobindex + blobcount < row_length
1891
+ && ((nextblob = search_it.data ())->blob () != NULL
1892
+ || nextblob->cblob () != NULL)
1893
+ && nextblob->bounding_box ().height () * 2 <
1894
+ row->xheight) {
1895
+ if (blobcount == 1) {
1896
+ space_width = nextblob->bounding_box ().left ()
1897
+ - bblob->bounding_box ().right ();
1898
+ blob_width = bblob->bounding_box ().width ();
1899
+ width_change =
1900
+ blob_width >
1901
+ space_width ? blob_width : space_width;
1902
+ width_change =
1903
+ (inT32) (width_change *
1904
+ textord_repch_width_variance);
1905
+ if (width_change < 3)
1906
+ width_change = 3;
1907
+ }
1908
+ if (nextblob->bounding_box ().width () >
1909
+ blob_width + width_change
1910
+ || nextblob->bounding_box ().width () <
1911
+ blob_width - width_change
1912
+ || nextblob->bounding_box ().left () - prev_right >
1913
+ space_width + width_change
1914
+ || nextblob->bounding_box ().left () - prev_right <
1915
+ space_width - width_change) {
1916
+ if (testing_on)
1917
+ tprintf
1918
+ ("Repch terminated:bw=%d, sw=%d, wc=%d, pr=%d, nb=(%d,%d)\n",
1919
+ blob_width, space_width, width_change,
1920
+ prev_right, nextblob->bounding_box ().left (),
1921
+ nextblob->bounding_box ().right ());
1922
+ break; //not good enough
1923
+ }
1924
+ if (nextblob->blob () != NULL)
1925
+ rating = compare_blobs (pblob1, real_row,
1926
+ nextblob->blob (), real_row);
1927
+ else {
1928
+ pblob2 =
1929
+ new PBLOB (nextblob->cblob (), row->xheight);
1930
+ rating =
1931
+ compare_blobs(pblob1, real_row, pblob2, real_row);
1932
+ delete pblob2;
1933
+ }
1934
+ if (rating < textord_repeat_rating) {
1935
+ // if (testing_on)
1936
+ // tprintf("Blob at (%d,%d)->(%d,%d) had rating %g\n",
1937
+ // nextblob->bounding_box().left(),
1938
+ // nextblob->bounding_box().bottom(),
1939
+ // nextblob->bounding_box().right(),
1940
+ // nextblob->bounding_box().top(),
1941
+ // rating);
1942
+ blobcount++;
1943
+ search_it.forward ();
1944
+ matched_blobcount++;
1945
+ while (blobindex + blobcount < row_length
1946
+ && (search_it.data ()->joined_to_prev () ||
1947
+ (search_it.data()->blob() == NULL &&
1948
+ search_it.data()->cblob() == NULL))) {
1949
+ search_it.forward ();
1950
+ blobcount++; //suck in joined bits
1951
+ }
1952
+ }
1953
+ prev_right = nextblob->bounding_box ().right ();
1954
+ }
1955
+ if (bblob->cblob () != NULL)
1956
+ delete pblob1;
1957
+
1958
+ if (matched_blobcount >= textord_repeat_threshold) {
1959
+ word =
1960
+ make_real_word (&box_it, blobcount, bol, FALSE, FALSE,
1961
+ 1);
1962
+ #ifndef GRAPHICS_DISABLED
1963
+ if (testing_on) {
1964
+ word_box = word->bounding_box ();
1965
+ tprintf
1966
+ ("Found repeated word of %d blobs (%d matched) from (%d,%d)->(%d,%d)\n",
1967
+ blobcount, matched_blobcount, word_box.left (),
1968
+ word_box.bottom (), word_box.right (),
1969
+ word_box.top ());
1970
+ //perimeter_color_index(to_win, RED);
1971
+ to_win->Pen(255,0,0);
1972
+ //interior_style(to_win, INT_HOLLOW, TRUE);
1973
+ to_win->Rectangle(word_box.left (),
1974
+ word_box.bottom (), word_box.right (),
1975
+ word_box.top ());
1976
+ }
1977
+ #endif
1978
+ word->set_flag (W_REP_CHAR, TRUE);
1979
+ word->set_flag (W_DONT_CHOP, TRUE);
1980
+ word_it.add_after_then_move (word);
1981
+ blobindex += blobcount;
1982
+ }
1983
+ }
1984
+ bol = FALSE;
1985
+ box_it.forward (); //next one
1986
+ blobindex++;
1987
+ }
1988
+ //until all done
1989
+ while (!box_it.at_first ());
1990
+ delete real_row;
1991
+ }
1992
+ }
1993
+ }
1994
+
1995
+
1996
+ /**********************************************************************
1997
+ * plot_fp_word
1998
+ *
1999
+ * Plot a block of words as if fixed pitch.
2000
+ **********************************************************************/
2001
+
2002
+ #ifndef GRAPHICS_DISABLED
2003
+ void plot_fp_word( //draw block of words
2004
+ TO_BLOCK *block, //block to draw
2005
+ float pitch, //pitch to draw with
2006
+ float nonspace //for space threshold
2007
+ ) {
2008
+ TO_ROW *row; //current row
2009
+ TO_ROW_IT row_it = block->get_rows ();
2010
+
2011
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2012
+ row = row_it.data ();
2013
+ row->min_space = (inT32) ((pitch + nonspace) / 2);
2014
+ row->max_nonspace = row->min_space;
2015
+ row->space_threshold = row->min_space;
2016
+ plot_word_decisions (to_win, (inT16) pitch, row);
2017
+ }
2018
+ }
2019
+ #endif