tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,1082 @@
1
+ /**********************************************************************
2
+ * File: adaptions.cpp (Formerly adaptions.c)
3
+ * Description: Functions used to adapt to blobs already confidently
4
+ * identified
5
+ * Author: Chris Newton
6
+ * Created: Thu Oct 7 10:17:28 BST 1993
7
+ *
8
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
9
+ ** Licensed under the Apache License, Version 2.0 (the "License");
10
+ ** you may not use this file except in compliance with the License.
11
+ ** You may obtain a copy of the License at
12
+ ** http://www.apache.org/licenses/LICENSE-2.0
13
+ ** Unless required by applicable law or agreed to in writing, software
14
+ ** distributed under the License is distributed on an "AS IS" BASIS,
15
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ ** See the License for the specific language governing permissions and
17
+ ** limitations under the License.
18
+ *
19
+ **********************************************************************/
20
+
21
+ #include "mfcpch.h"
22
+ #ifdef __UNIX__
23
+ #include <assert.h>
24
+ #endif
25
+ #include <ctype.h>
26
+ #include <string.h>
27
+ #include "tessbox.h"
28
+ #include "tessvars.h"
29
+ #include "memry.h"
30
+ #include "mainblk.h"
31
+ #include "charcut.h"
32
+ #include "imgs.h"
33
+ #include "scaleimg.h"
34
+ #include "reject.h"
35
+ #include "control.h"
36
+ #include "adaptions.h"
37
+ #include "stopper.h"
38
+ #include "charsample.h"
39
+ #include "matmatch.h"
40
+ #include "secname.h"
41
+
42
+ inT32 demo_word = 0;
43
+
44
+ #define WINDOWNAMESIZE 13 /*max size of name */
45
+
46
+ #define EXTERN
47
+
48
+ EXTERN BOOL_VAR (tessedit_reject_ems, FALSE, "Reject all m's");
49
+ EXTERN BOOL_VAR (tessedit_reject_suspect_ems, FALSE, "Reject suspect m's");
50
+
51
+ EXTERN double_VAR (tessedit_cluster_t1, 0.20,
52
+ "t1 threshold for clustering samples");
53
+ EXTERN double_VAR (tessedit_cluster_t2, 0.40,
54
+ "t2 threshold for clustering samples");
55
+ EXTERN double_VAR (tessedit_cluster_t3, 0.12,
56
+ "Extra threshold for clustering samples, only keep a new sample if best score greater than this value");
57
+ EXTERN double_VAR (tessedit_cluster_accept_fraction, 0.80,
58
+ "Largest fraction of characters in cluster for it to be used for adaption");
59
+ EXTERN INT_VAR (tessedit_cluster_min_size, 3,
60
+ "Smallest number of samples in a cluster for it to be used for adaption");
61
+ EXTERN BOOL_VAR (tessedit_cluster_debug, FALSE,
62
+ "Generate and print debug information for adaption by clustering");
63
+ EXTERN BOOL_VAR (tessedit_use_best_sample, FALSE,
64
+ "Use best sample from cluster when adapting");
65
+ EXTERN BOOL_VAR (tessedit_test_cluster_input, FALSE,
66
+ "Set reject map to enable cluster input to be measured");
67
+
68
+ EXTERN BOOL_VAR (tessedit_matrix_match, TRUE, "Use matrix matcher");
69
+ EXTERN BOOL_VAR (tessedit_mm_use_non_adaption_set, FALSE,
70
+ "Don't try to adapt to characters on this list");
71
+ EXTERN STRING_VAR (tessedit_non_adaption_set, ",.;:'~@*",
72
+ "Characters to be avoided when adapting");
73
+ EXTERN BOOL_VAR (tessedit_mm_adapt_using_prototypes, TRUE,
74
+ "Use prototypes when adapting");
75
+ EXTERN BOOL_VAR (tessedit_mm_use_prototypes, TRUE,
76
+ "Use prototypes as clusters are built");
77
+ EXTERN BOOL_VAR (tessedit_mm_use_rejmap, FALSE,
78
+ "Adapt to characters using reject map");
79
+ EXTERN BOOL_VAR (tessedit_mm_all_rejects, FALSE,
80
+ "Adapt to all characters using, matrix matcher");
81
+ EXTERN BOOL_VAR (tessedit_mm_only_match_same_char, FALSE,
82
+ "Only match samples against clusters for the same character");
83
+ EXTERN BOOL_VAR (tessedit_process_rns, FALSE, "Handle m - rn ambigs");
84
+
85
+ EXTERN BOOL_VAR (tessedit_demo_adaption, FALSE,
86
+ "Display cut images and matrix match for demo purposes");
87
+ EXTERN INT_VAR (tessedit_demo_word1, 62,
88
+ "Word number of first word to display");
89
+ EXTERN INT_VAR (tessedit_demo_word2, 64,
90
+ "Word number of second word to display");
91
+ EXTERN STRING_VAR (tessedit_demo_file, "academe",
92
+ "Name of document containing demo words");
93
+
94
+ BOOL8 word_adaptable( //should we adapt?
95
+ WERD_RES *word,
96
+ uinT16 mode) {
97
+ BOOL8 status = FALSE;
98
+ BITS16 flags(mode);
99
+
100
+ enum MODES
101
+ {
102
+ ADAPTABLE_WERD,
103
+ ACCEPTABLE_WERD,
104
+ CHECK_DAWGS,
105
+ CHECK_SPACES,
106
+ CHECK_ONE_ELL_CONFLICT,
107
+ CHECK_AMBIG_WERD
108
+ };
109
+
110
+ /*
111
+ 0: NO adaption
112
+ */
113
+ if (mode == 0) {
114
+ return FALSE;
115
+ }
116
+
117
+ if (flags.bit (ADAPTABLE_WERD))
118
+ status |= word->tess_would_adapt;
119
+
120
+ if (flags.bit (ACCEPTABLE_WERD))
121
+ status |= word->tess_accepted;
122
+
123
+ if (!status) // If not set then
124
+ return FALSE; // ignore other checks
125
+
126
+ if (flags.bit (CHECK_DAWGS) &&
127
+ (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
128
+ (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
129
+ (word->best_choice->permuter () != USER_DAWG_PERM) &&
130
+ (word->best_choice->permuter () != NUMBER_PERM))
131
+ return FALSE;
132
+
133
+ if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE))
134
+ return FALSE;
135
+
136
+ if (flags.bit (CHECK_SPACES) &&
137
+ (strchr (word->best_choice->string ().string (), ' ') != NULL))
138
+ return FALSE;
139
+
140
+ // if (flags.bit (CHECK_AMBIG_WERD) && test_ambig_word (word))
141
+ if (flags.bit (CHECK_AMBIG_WERD) &&
142
+ !NoDangerousAmbig(word->best_choice->string().string(),
143
+ word->best_choice->lengths().string(),
144
+ NULL))
145
+ return FALSE;
146
+
147
+ return status;
148
+
149
+ }
150
+
151
+
152
+ void collect_ems_for_adaption(WERD_RES *word,
153
+ CHAR_SAMPLES_LIST *char_clusters,
154
+ CHAR_SAMPLE_LIST *chars_waiting) {
155
+ PBLOB_LIST *blobs = word->outword->blob_list ();
156
+ PBLOB_IT blob_it(blobs);
157
+ inT16 i;
158
+ CHAR_SAMPLE *sample;
159
+ PIXROW_LIST *pixrow_list;
160
+ PIXROW_IT pixrow_it;
161
+ IMAGELINE *imlines; // lines of the image
162
+ TBOX pix_box; // box of imlines
163
+ // extent
164
+ WERD copy_outword; // copy to denorm
165
+ PBLOB_IT copy_blob_it;
166
+ OUTLINE_IT copy_outline_it;
167
+ inT32 resolution = page_image.get_res ();
168
+
169
+ if (tessedit_reject_ems || tessedit_reject_suspect_ems)
170
+ return; // Do nothing
171
+
172
+ if (word->word->bounding_box ().height () > resolution / 3)
173
+ return;
174
+
175
+ if (tessedit_demo_adaption)
176
+ // Make sure not set
177
+ tessedit_display_mm.set_value (FALSE);
178
+
179
+ if (word_adaptable (word, tessedit_em_adaption_mode)
180
+ && word->reject_map.reject_count () == 0
181
+ && (strchr (word->best_choice->string ().string (), 'm') != NULL
182
+ || (tessedit_process_rns
183
+ && strstr (word->best_choice->string ().string (),
184
+ "rn") != NULL))) {
185
+ if (tessedit_process_rns
186
+ && strstr (word->best_choice->string ().string (), "rn") != NULL) {
187
+ copy_outword = *(word->outword);
188
+ copy_blob_it.set_to_list (copy_outword.blob_list ());
189
+ i = 0;
190
+ while (word->best_choice->string ()[i] != '\0') {
191
+ if (word->best_choice->string ()[i] == 'r'
192
+ && word->best_choice->string ()[i + 1] == 'n') {
193
+ copy_outline_it.set_to_list (copy_blob_it.data ()->
194
+ out_list ());
195
+ copy_outline_it.add_list_after (copy_blob_it.
196
+ data_relative (1)->
197
+ out_list ());
198
+ copy_blob_it.forward ();
199
+ delete (copy_blob_it.extract ());
200
+ i++;
201
+ }
202
+ copy_blob_it.forward ();
203
+ i++;
204
+ }
205
+ }
206
+ else
207
+ copy_outword = *(word->outword);
208
+
209
+ copy_outword.baseline_denormalise (&word->denorm);
210
+ char_clip_word(&copy_outword, page_image, pixrow_list, imlines, pix_box);
211
+ pixrow_it.set_to_list (pixrow_list);
212
+ pixrow_it.move_to_first ();
213
+
214
+ blob_it.move_to_first ();
215
+ for (i = 0;
216
+ word->best_choice->string ()[i] != '\0';
217
+ i++, pixrow_it.forward (), blob_it.forward ()) {
218
+
219
+ if (word->best_choice->string ()[i] == 'm'
220
+ || (word->best_choice->string ()[i] == 'r'
221
+ && word->best_choice->string ()[i + 1] == 'n')) {
222
+ #ifndef SECURE_NAMES
223
+ if (tessedit_cluster_debug)
224
+ tprintf ("Sample %c for adaption found in %s, index %d\n",
225
+ word->best_choice->string ()[i],
226
+ word->best_choice->string ().string (), i);
227
+ #endif
228
+ if (tessedit_matrix_match) {
229
+ sample = clip_sample (pixrow_it.data (),
230
+ imlines,
231
+ pix_box,
232
+ copy_outword.flag (W_INVERSE),
233
+ word->best_choice->string ()[i]);
234
+
235
+ if (sample == NULL) { //Clip failed
236
+ #ifndef SECURE_NAMES
237
+ tprintf ("Unable to clip sample from %s, index %d\n",
238
+ word->best_choice->string ().string (), i);
239
+ #endif
240
+ if (word->best_choice->string ()[i] == 'r')
241
+ i++;
242
+
243
+ continue;
244
+ }
245
+ }
246
+ else
247
+ sample = new CHAR_SAMPLE (blob_it.data (),
248
+ &word->denorm,
249
+ word->best_choice->string ()[i]);
250
+
251
+ cluster_sample(sample, char_clusters, chars_waiting);
252
+
253
+ if (word->best_choice->string ()[i] == 'r')
254
+ i++; // Skip next character
255
+ }
256
+ }
257
+ delete[]imlines; // Free array of imlines
258
+ delete pixrow_list;
259
+ }
260
+ }
261
+
262
+
263
+ void collect_characters_for_adaption(WERD_RES *word,
264
+ CHAR_SAMPLES_LIST *char_clusters,
265
+ CHAR_SAMPLE_LIST *chars_waiting) {
266
+ PBLOB_LIST *blobs = word->outword->blob_list ();
267
+ PBLOB_IT blob_it(blobs);
268
+ inT16 i;
269
+ CHAR_SAMPLE *sample;
270
+ PIXROW_LIST *pixrow_list;
271
+ PIXROW_IT pixrow_it;
272
+ IMAGELINE *imlines; // lines of the image
273
+ TBOX pix_box; // box of imlines
274
+ // extent
275
+ WERD copy_outword; // copy to denorm
276
+ inT32 resolution = page_image.get_res ();
277
+
278
+ if (word->word->bounding_box ().height () > resolution / 3)
279
+ return;
280
+
281
+ if (tessedit_demo_adaption)
282
+ // Make sure not set
283
+ tessedit_display_mm.set_value (FALSE);
284
+
285
+ if ((word_adaptable (word, tessedit_cluster_adaption_mode)
286
+ && word->reject_map.reject_count () == 0) || tessedit_mm_use_rejmap) {
287
+ if (tessedit_test_cluster_input && !tessedit_mm_use_rejmap)
288
+ return; // Reject map set to acceptable
289
+ /* Collect information about good matches */
290
+ copy_outword = *(word->outword);
291
+ copy_outword.baseline_denormalise (&word->denorm);
292
+ char_clip_word(&copy_outword, page_image, pixrow_list, imlines, pix_box);
293
+ pixrow_it.set_to_list (pixrow_list);
294
+ pixrow_it.move_to_first ();
295
+
296
+ blob_it.move_to_first ();
297
+ for (i = 0;
298
+ word->best_choice->string ()[i] != '\0';
299
+ i++, pixrow_it.forward (), blob_it.forward ()) {
300
+
301
+ if (!(tessedit_mm_use_non_adaption_set
302
+ && STRING (tessedit_non_adaption_set).contains (word->
303
+ best_choice->
304
+ string ()[i]))
305
+ || (tessedit_mm_use_rejmap && word->reject_map[i].accepted ())) {
306
+ #ifndef SECURE_NAMES
307
+ if (tessedit_cluster_debug)
308
+ tprintf ("Sample %c for adaption found in %s, index %d\n",
309
+ word->best_choice->string ()[i],
310
+ word->best_choice->string ().string (), i);
311
+ #endif
312
+ sample = clip_sample (pixrow_it.data (),
313
+ imlines,
314
+ pix_box,
315
+ copy_outword.flag (W_INVERSE),
316
+ word->best_choice->string ()[i]);
317
+
318
+ if (sample == NULL) { //Clip failed
319
+ #ifndef SECURE_NAMES
320
+ tprintf ("Unable to clip sample from %s, index %d\n",
321
+ word->best_choice->string ().string (), i);
322
+ #endif
323
+ continue;
324
+ }
325
+ cluster_sample(sample, char_clusters, chars_waiting);
326
+ }
327
+ }
328
+ delete[]imlines; // Free array of imlines
329
+ delete pixrow_list;
330
+ }
331
+ else if (tessedit_test_cluster_input && !tessedit_mm_use_rejmap)
332
+ // Set word to all rejects
333
+ word->reject_map.rej_word_tess_failure ();
334
+
335
+ }
336
+
337
+
338
+ void cluster_sample(CHAR_SAMPLE *sample,
339
+ CHAR_SAMPLES_LIST *char_clusters,
340
+ CHAR_SAMPLE_LIST *chars_waiting) {
341
+ CHAR_SAMPLES *best_cluster = NULL;
342
+ CHAR_SAMPLES_IT c_it = char_clusters;
343
+ CHAR_SAMPLE_IT cw_it = chars_waiting;
344
+ float score;
345
+ float best_score = MAX_INT32;
346
+
347
+ if (c_it.empty ())
348
+ c_it.add_to_end (new CHAR_SAMPLES (sample));
349
+ else {
350
+ for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) {
351
+ score = c_it.data ()->match_score (sample);
352
+ if (score < best_score) {
353
+ best_score = score;
354
+ best_cluster = c_it.data ();
355
+ }
356
+ }
357
+
358
+ if (tessedit_cluster_debug)
359
+ tprintf ("Sample's best score %f\n", best_score);
360
+
361
+ if (best_score < tessedit_cluster_t1) {
362
+ if (best_score > tessedit_cluster_t3 || tessedit_mm_use_prototypes) {
363
+ best_cluster->add_sample (sample);
364
+ check_wait_list(chars_waiting, sample, best_cluster);
365
+ #ifndef SECURE_NAMES
366
+ if (tessedit_cluster_debug)
367
+ tprintf ("Sample added to an existing cluster\n");
368
+ #endif
369
+ }
370
+ else {
371
+ #ifndef SECURE_NAMES
372
+ if (tessedit_cluster_debug)
373
+ tprintf
374
+ ("Sample dropped, good match to an existing cluster\n");
375
+ #endif
376
+ }
377
+ }
378
+ else if (best_score > tessedit_cluster_t2) {
379
+ c_it.add_to_end (new CHAR_SAMPLES (sample));
380
+ #ifndef SECURE_NAMES
381
+ if (tessedit_cluster_debug)
382
+ tprintf ("New cluster created for this sample\n");
383
+ #endif
384
+ }
385
+ else {
386
+ cw_it.add_to_end (sample);
387
+ if (tessedit_cluster_debug)
388
+ tprintf ("Sample added to the wait list\n");
389
+ }
390
+ }
391
+ }
392
+
393
+
394
+ void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,
395
+ CHAR_SAMPLE *sample,
396
+ CHAR_SAMPLES *best_cluster) {
397
+ CHAR_SAMPLE *wait_sample;
398
+ CHAR_SAMPLE *test_sample = sample;
399
+ CHAR_SAMPLE_IT cw_it = chars_waiting;
400
+ CHAR_SAMPLE_LIST add_list; //Samples added to best cluster
401
+ CHAR_SAMPLE_IT add_it = &add_list;
402
+ float score;
403
+
404
+ add_list.clear ();
405
+
406
+ if (!cw_it.empty ()) {
407
+ do {
408
+ if (!add_list.empty ()) {
409
+ add_it.forward ();
410
+ test_sample = add_it.extract ();
411
+ best_cluster->add_sample (test_sample);
412
+ }
413
+
414
+ for (cw_it.mark_cycle_pt ();
415
+ !cw_it.cycled_list (); cw_it.forward ()) {
416
+ wait_sample = cw_it.data ();
417
+ if (tessedit_mm_use_prototypes)
418
+ score = best_cluster->match_score (wait_sample);
419
+ else
420
+ score = sample->match_sample (wait_sample, FALSE);
421
+ if (score < tessedit_cluster_t1) {
422
+ if (score > tessedit_cluster_t3
423
+ || tessedit_mm_use_prototypes) {
424
+ add_it.add_after_stay_put (cw_it.extract ());
425
+ #ifndef SECURE_NAMES
426
+ if (tessedit_cluster_debug)
427
+ tprintf
428
+ ("Wait sample added to an existing cluster\n");
429
+ #endif
430
+ }
431
+ else {
432
+ #ifndef SECURE_NAMES
433
+ if (tessedit_cluster_debug)
434
+ tprintf
435
+ ("Wait sample dropped, good match to an existing cluster\n");
436
+ #endif
437
+ }
438
+ }
439
+ }
440
+ }
441
+ while (!add_list.empty ());
442
+ }
443
+ }
444
+
445
+
446
+ void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,
447
+ CHAR_SAMPLE_LIST *chars_waiting) {
448
+ CHAR_SAMPLES *best_cluster;
449
+ CHAR_SAMPLES_IT c_it = char_clusters;
450
+ CHAR_SAMPLE_IT cw_it = chars_waiting;
451
+ CHAR_SAMPLE *sample;
452
+ inT32 total_sample_count = 0;
453
+
454
+ while (!cw_it.empty ()) {
455
+ cw_it.move_to_first ();
456
+ sample = cw_it.extract ();
457
+ best_cluster = new CHAR_SAMPLES (sample);
458
+ c_it.add_to_end (best_cluster);
459
+ check_wait_list(chars_waiting, sample, best_cluster);
460
+ }
461
+
462
+ for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) {
463
+ c_it.data ()->assign_to_char ();
464
+ if (tessedit_use_best_sample)
465
+ c_it.data ()->find_best_sample ();
466
+ else if (tessedit_mm_adapt_using_prototypes)
467
+ c_it.data ()->build_prototype ();
468
+
469
+ if (tessedit_cluster_debug)
470
+ total_sample_count += c_it.data ()->n_samples ();
471
+ }
472
+ #ifndef SECURE_NAMES
473
+ if (tessedit_cluster_debug)
474
+ tprintf ("Clustering completed, %d samples in all\n", total_sample_count);
475
+ #endif
476
+
477
+ #ifndef GRAPHICS_DISABLED
478
+ if (tessedit_demo_adaption)
479
+ display_cluster_prototypes(char_clusters);
480
+ #endif
481
+
482
+ }
483
+
484
+
485
+ void adapt_to_good_ems(WERD_RES *word,
486
+ CHAR_SAMPLES_LIST *char_clusters,
487
+ CHAR_SAMPLE_LIST *chars_waiting) {
488
+ PBLOB_LIST *blobs = word->outword->blob_list ();
489
+ PBLOB_IT blob_it(blobs);
490
+ inT16 i;
491
+ CHAR_SAMPLE *sample;
492
+ CHAR_SAMPLES_IT c_it = char_clusters;
493
+ CHAR_SAMPLE_IT cw_it = chars_waiting;
494
+ float score;
495
+ float best_score;
496
+ char best_char;
497
+ CHAR_SAMPLES *best_cluster;
498
+ PIXROW_LIST *pixrow_list;
499
+ PIXROW_IT pixrow_it;
500
+ IMAGELINE *imlines; // lines of the image
501
+ TBOX pix_box; // box of imlines
502
+ // extent
503
+ WERD copy_outword; // copy to denorm
504
+ TBOX b_box;
505
+ PBLOB_IT copy_blob_it;
506
+ OUTLINE_IT copy_outline_it;
507
+ PIXROW *pixrow = NULL;
508
+
509
+ static inT32 word_number = 0;
510
+
511
+ #ifndef GRAPHICS_DISABLED
512
+ ScrollView* demo_win = NULL;
513
+ #endif
514
+
515
+ inT32 resolution = page_image.get_res ();
516
+
517
+ if (word->word->bounding_box ().height () > resolution / 3)
518
+ return;
519
+
520
+ word_number++;
521
+
522
+ if (strchr (word->best_choice->string ().string (), 'm') == NULL
523
+ && (tessedit_process_rns
524
+ && strstr (word->best_choice->string ().string (), "rn") == NULL))
525
+ return;
526
+
527
+ if (tessedit_reject_ems)
528
+ reject_all_ems(word);
529
+ else if (tessedit_reject_suspect_ems)
530
+ reject_suspect_ems(word);
531
+ else {
532
+ if (char_clusters->length () == 0) {
533
+ #ifndef SECURE_NAMES
534
+ if (tessedit_cluster_debug)
535
+ tprintf ("No clusters to use for em adaption\n");
536
+ #endif
537
+ return;
538
+ }
539
+
540
+ if (!cw_it.empty ()) {
541
+ complete_clustering(char_clusters, chars_waiting);
542
+ print_em_stats(char_clusters, chars_waiting);
543
+ }
544
+
545
+ if ((!word_adaptable (word, tessedit_em_adaption_mode) ||
546
+ word->reject_map.reject_count () != 0)
547
+ && (strchr (word->best_choice->string ().string (), 'm') != NULL
548
+ || (tessedit_process_rns
549
+ && strstr (word->best_choice->string ().string (),
550
+ "rn") != NULL))) {
551
+ if (tessedit_process_rns
552
+ && strstr (word->best_choice->string ().string (),
553
+ "rn") != NULL) {
554
+ copy_outword = *(word->outword);
555
+ copy_blob_it.set_to_list (copy_outword.blob_list ());
556
+ i = 0;
557
+ while (word->best_choice->string ()[i] != '\0') {
558
+ if (word->best_choice->string ()[i] == 'r'
559
+ && word->best_choice->string ()[i + 1] == 'n') {
560
+ copy_outline_it.set_to_list (copy_blob_it.data ()->
561
+ out_list ());
562
+ copy_outline_it.add_list_after (copy_blob_it.
563
+ data_relative (1)->
564
+ out_list ());
565
+ copy_blob_it.forward ();
566
+ delete (copy_blob_it.extract ());
567
+ i++;
568
+ }
569
+ copy_blob_it.forward ();
570
+ i++;
571
+ }
572
+ }
573
+ else
574
+ copy_outword = *(word->outword);
575
+
576
+ copy_outword.baseline_denormalise (&word->denorm);
577
+ copy_blob_it.set_to_list (copy_outword.blob_list ());
578
+ char_clip_word(&copy_outword, page_image, pixrow_list, imlines, pix_box);
579
+ pixrow_it.set_to_list (pixrow_list);
580
+ pixrow_it.move_to_first ();
581
+
582
+ // For debugging only
583
+ b_box = copy_outword.bounding_box ();
584
+ pixrow = pixrow_it.data ();
585
+
586
+ blob_it.move_to_first ();
587
+ copy_blob_it.move_to_first ();
588
+ for (i = 0;
589
+ word->best_choice->string ()[i] != '\0';
590
+ i++, pixrow_it.forward (), blob_it.forward (),
591
+ copy_blob_it.forward ()) {
592
+ if ((word->best_choice->string ()[i] == 'm'
593
+ || (word->best_choice->string ()[i] == 'r'
594
+ && word->best_choice->string ()[i + 1] == 'n'))
595
+ && !word->reject_map[i].perm_rejected ()) {
596
+ if (tessedit_cluster_debug)
597
+ tprintf ("Sample %c to check found in %s, index %d\n",
598
+ word->best_choice->string ()[i],
599
+ word->best_choice->string ().string (), i);
600
+
601
+ if (tessedit_demo_adaption)
602
+ tprintf
603
+ ("Sample %c to check found in %s (%d), index %d\n",
604
+ word->best_choice->string ()[i],
605
+ word->best_choice->string ().string (), word_number,
606
+ i);
607
+
608
+ if (tessedit_matrix_match) {
609
+ TBOX copy_box = copy_blob_it.data ()->bounding_box ();
610
+
611
+ sample = clip_sample (pixrow_it.data (),
612
+ imlines,
613
+ pix_box,
614
+ copy_outword.flag (W_INVERSE),
615
+ word->best_choice->string ()[i]);
616
+
617
+ //Clip failed
618
+ if (sample == NULL) {
619
+ tprintf
620
+ ("Unable to clip sample from %s, index %d\n",
621
+ word->best_choice->string ().string (), i);
622
+ #ifndef SECURE_NAMES
623
+ if (tessedit_cluster_debug)
624
+ tprintf ("Sample rejected (no sample)\n");
625
+ #endif
626
+ word->reject_map[i].setrej_mm_reject ();
627
+ if (word->best_choice->string ()[i] == 'r') {
628
+ word->reject_map[i + 1].setrej_mm_reject ();
629
+ i++;
630
+ }
631
+ continue;
632
+ }
633
+ }
634
+ else
635
+ sample = new CHAR_SAMPLE (blob_it.data (),
636
+ &word->denorm,
637
+ word->best_choice->
638
+ string ()[i]);
639
+
640
+ best_score = MAX_INT32;
641
+ best_char = '\0';
642
+ best_cluster = NULL;
643
+
644
+ for (c_it.mark_cycle_pt ();
645
+ !c_it.cycled_list (); c_it.forward ()) {
646
+ if (c_it.data ()->character () != '\0') {
647
+ score = c_it.data ()->match_score (sample);
648
+ if (score < best_score) {
649
+ best_cluster = c_it.data ();
650
+ best_score = score;
651
+ best_char = c_it.data ()->character ();
652
+ }
653
+ }
654
+ }
655
+
656
+ if (best_score > tessedit_cluster_t1) {
657
+ #ifndef SECURE_NAMES
658
+ if (tessedit_cluster_debug)
659
+ tprintf ("Sample rejected (score %f)\n", best_score);
660
+ if (tessedit_demo_adaption)
661
+ tprintf ("Sample rejected (score %f)\n", best_score);
662
+ #endif
663
+ word->reject_map[i].setrej_mm_reject ();
664
+ if (word->best_choice->string ()[i] == 'r')
665
+ word->reject_map[i + 1].setrej_mm_reject ();
666
+ }
667
+ else {
668
+ if (word->best_choice->string ()[i] == best_char) {
669
+ #ifndef SECURE_NAMES
670
+ if (tessedit_cluster_debug)
671
+ tprintf ("Sample accepted (score %f)\n",
672
+ best_score);
673
+ if (tessedit_demo_adaption)
674
+ tprintf ("Sample accepted (score %f)\n",
675
+ best_score);
676
+ #endif
677
+ word->reject_map[i].setrej_mm_accept ();
678
+ if (word->best_choice->string ()[i] == 'r')
679
+ word->reject_map[i + 1].setrej_mm_accept ();
680
+ }
681
+ else {
682
+ #ifndef SECURE_NAMES
683
+ if (tessedit_cluster_debug)
684
+ tprintf ("Sample rejected (char %c, score %f)\n",
685
+ best_char, best_score);
686
+ if (tessedit_demo_adaption)
687
+ tprintf ("Sample rejected (char %c, score %f)\n",
688
+ best_char, best_score);
689
+ #endif
690
+ word->reject_map[i].setrej_mm_reject ();
691
+ if (word->best_choice->string ()[i] == 'r')
692
+ word->reject_map[i + 1].setrej_mm_reject ();
693
+ }
694
+ }
695
+
696
+ if (tessedit_demo_adaption) {
697
+ if (strcmp (imagebasename.string (),
698
+ tessedit_demo_file.string ()) != 0
699
+ || word_number == tessedit_demo_word1
700
+ || word_number == tessedit_demo_word2) {
701
+ #ifndef GRAPHICS_DISABLED
702
+ demo_win =
703
+ display_clip_image(&copy_outword,
704
+ page_image,
705
+ pixrow_list,
706
+ pix_box);
707
+ #endif
708
+ demo_word = word_number;
709
+ best_cluster->match_score (sample);
710
+ demo_word = 0;
711
+ }
712
+ }
713
+ if (word->best_choice->string ()[i] == 'r')
714
+ i++; // Skip next character
715
+ }
716
+ }
717
+ delete[]imlines; // Free array of imlines
718
+ delete pixrow_list;
719
+ }
720
+ }
721
+ }
722
+
723
+
724
+ void adapt_to_good_samples(WERD_RES *word,
725
+ CHAR_SAMPLES_LIST *char_clusters,
726
+ CHAR_SAMPLE_LIST *chars_waiting) {
727
+ PBLOB_LIST *blobs = word->outword->blob_list ();
728
+ PBLOB_IT blob_it(blobs);
729
+ inT16 i;
730
+ CHAR_SAMPLE *sample;
731
+ CHAR_SAMPLES_IT c_it = char_clusters;
732
+ CHAR_SAMPLE_IT cw_it = chars_waiting;
733
+ float score;
734
+ float best_score;
735
+ char best_char;
736
+ CHAR_SAMPLES *best_cluster;
737
+ PIXROW_LIST *pixrow_list;
738
+ PIXROW_IT pixrow_it;
739
+ IMAGELINE *imlines; // lines of the image
740
+ TBOX pix_box; // box of imlines
741
+ // extent
742
+ WERD copy_outword; // copy to denorm
743
+ TBOX b_box;
744
+ PBLOB_IT copy_blob_it;
745
+ PIXROW *pixrow = NULL;
746
+
747
+ static inT32 word_number = 0;
748
+
749
+ #ifndef GRAPHICS_DISABLED
750
+ ScrollView* demo_win = NULL;
751
+ #endif
752
+
753
+ inT32 resolution = page_image.get_res ();
754
+
755
+ word_number++;
756
+
757
+ if (tessedit_test_cluster_input)
758
+ return;
759
+
760
+ if (word->word->bounding_box ().height () > resolution / 3)
761
+ return;
762
+
763
+ if (char_clusters->length () == 0) {
764
+ #ifndef SECURE_NAMES
765
+ if (tessedit_cluster_debug)
766
+ tprintf ("No clusters to use for adaption\n");
767
+ #endif
768
+ return;
769
+ }
770
+
771
+ if (!cw_it.empty ()) {
772
+ complete_clustering(char_clusters, chars_waiting);
773
+ print_em_stats(char_clusters, chars_waiting);
774
+ }
775
+
776
+ if ((!word_adaptable (word, tessedit_cluster_adaption_mode)
777
+ && word->reject_map.reject_count () != 0) || tessedit_mm_use_rejmap) {
778
+ if (tessedit_cluster_debug) {
779
+ tprintf ("\nChecking: \"%s\" MAP ",
780
+ word->best_choice->string ().string ());
781
+ word->reject_map.print (debug_fp);
782
+ tprintf ("\n");
783
+ }
784
+
785
+ copy_outword = *(word->outword);
786
+ copy_outword.baseline_denormalise (&word->denorm);
787
+ copy_blob_it.set_to_list (copy_outword.blob_list ());
788
+ char_clip_word(&copy_outword, page_image, pixrow_list, imlines, pix_box);
789
+ pixrow_it.set_to_list (pixrow_list);
790
+ pixrow_it.move_to_first ();
791
+
792
+ // For debugging only
793
+ b_box = copy_outword.bounding_box ();
794
+ pixrow = pixrow_it.data ();
795
+
796
+ blob_it.move_to_first ();
797
+ copy_blob_it.move_to_first ();
798
+ for (i = 0;
799
+ word->best_choice->string ()[i] != '\0';
800
+ i++, pixrow_it.forward (), blob_it.forward (),
801
+ copy_blob_it.forward ()) {
802
+ if (word->reject_map[i].recoverable ()
803
+ || (tessedit_mm_all_rejects && word->reject_map[i].rejected ())) {
804
+ TBOX copy_box = copy_blob_it.data ()->bounding_box ();
805
+
806
+ if (tessedit_cluster_debug)
807
+ tprintf ("Sample %c to check found in %s, index %d\n",
808
+ word->best_choice->string ()[i],
809
+ word->best_choice->string ().string (), i);
810
+
811
+ if (tessedit_demo_adaption)
812
+ tprintf ("Sample %c to check found in %s (%d), index %d\n",
813
+ word->best_choice->string ()[i],
814
+ word->best_choice->string ().string (),
815
+ word_number, i);
816
+
817
+ sample = clip_sample (pixrow_it.data (),
818
+ imlines,
819
+ pix_box,
820
+ copy_outword.flag (W_INVERSE),
821
+ word->best_choice->string ()[i]);
822
+
823
+ if (sample == NULL) { //Clip failed
824
+ tprintf ("Unable to clip sample from %s, index %d\n",
825
+ word->best_choice->string ().string (), i);
826
+ #ifndef SECURE_NAMES
827
+ if (tessedit_cluster_debug)
828
+ tprintf ("Sample rejected (no sample)\n");
829
+ #endif
830
+ word->reject_map[i].setrej_mm_reject ();
831
+
832
+ continue;
833
+ }
834
+
835
+ best_score = MAX_INT32;
836
+ best_char = '\0';
837
+ best_cluster = NULL;
838
+
839
+ for (c_it.mark_cycle_pt ();
840
+ !c_it.cycled_list (); c_it.forward ()) {
841
+ if (c_it.data ()->character () != '\0') {
842
+ score = c_it.data ()->match_score (sample);
843
+ if (score < best_score) {
844
+ best_cluster = c_it.data ();
845
+ best_score = score;
846
+ best_char = c_it.data ()->character ();
847
+ }
848
+ }
849
+ }
850
+
851
+ if (best_score > tessedit_cluster_t1) {
852
+ #ifndef SECURE_NAMES
853
+ if (tessedit_cluster_debug)
854
+ tprintf ("Sample rejected (score %f)\n", best_score);
855
+ if (tessedit_demo_adaption)
856
+ tprintf ("Sample rejected (score %f)\n", best_score);
857
+ #endif
858
+ word->reject_map[i].setrej_mm_reject ();
859
+ }
860
+ else {
861
+ if (word->best_choice->string ()[i] == best_char) {
862
+ #ifndef SECURE_NAMES
863
+ if (tessedit_cluster_debug)
864
+ tprintf ("Sample accepted (score %f)\n", best_score);
865
+ if (tessedit_demo_adaption)
866
+ tprintf ("Sample accepted (score %f)\n", best_score);
867
+ #endif
868
+ if (tessedit_test_adaption)
869
+ word->reject_map[i].setrej_minimal_rej_accept ();
870
+ else
871
+ word->reject_map[i].setrej_mm_accept ();
872
+ }
873
+ else {
874
+ #ifndef SECURE_NAMES
875
+ if (tessedit_cluster_debug)
876
+ tprintf ("Sample rejected (char %c, score %f)\n",
877
+ best_char, best_score);
878
+ if (tessedit_demo_adaption)
879
+ tprintf ("Sample rejected (char %c, score %f)\n",
880
+ best_char, best_score);
881
+ #endif
882
+ word->reject_map[i].setrej_mm_reject ();
883
+ }
884
+ }
885
+
886
+ if (tessedit_demo_adaption) {
887
+ if (strcmp (imagebasename.string (),
888
+ tessedit_demo_file.string ()) != 0
889
+ || word_number == tessedit_demo_word1
890
+ || word_number == tessedit_demo_word2) {
891
+ #ifndef GRAPHICS_DISABLED
892
+ demo_win =
893
+ display_clip_image(&copy_outword,
894
+ page_image,
895
+ pixrow_list,
896
+ pix_box);
897
+ #endif
898
+ demo_word = word_number;
899
+ best_cluster->match_score (sample);
900
+ demo_word = 0;
901
+ }
902
+ }
903
+ }
904
+ }
905
+ delete[]imlines; // Free array of imlines
906
+ delete pixrow_list;
907
+
908
+ if (tessedit_cluster_debug) {
909
+ tprintf ("\nFinal: \"%s\" MAP ",
910
+ word->best_choice->string ().string ());
911
+ word->reject_map.print (debug_fp);
912
+ tprintf ("\n");
913
+ }
914
+ }
915
+ }
916
+
917
+
918
+ void print_em_stats(CHAR_SAMPLES_LIST *char_clusters,
919
+ CHAR_SAMPLE_LIST *chars_waiting) {
920
+ CHAR_SAMPLES_IT c_it = char_clusters;
921
+
922
+ if (!tessedit_cluster_debug)
923
+ return;
924
+ #ifndef SECURE_NAMES
925
+ tprintf ("There are %d clusters and %d samples waiting\n",
926
+ char_clusters->length (), chars_waiting->length ());
927
+
928
+ for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ())
929
+ c_it.data ()->print (debug_fp);
930
+ #endif
931
+ tprintf ("\n");
932
+ }
933
+
934
+
935
+ CHAR_SAMPLE *clip_sample( //lines of the image
936
+ PIXROW *pixrow,
937
+ IMAGELINE *imlines,
938
+ TBOX pix_box, //box of imlines extent
939
+ BOOL8 white_on_black,
940
+ char c) {
941
+ TBOX b_box = pixrow->bounding_box ();
942
+ float baseline_pos = 0;
943
+ inT32 resolution = page_image.get_res ();
944
+
945
+ if (!b_box.null_box ()) {
946
+ ASSERT_HOST (b_box.width () < page_image.get_xsize () &&
947
+ b_box.height () < page_image.get_ysize ());
948
+
949
+ if (b_box.width () > resolution || b_box.height () > resolution) {
950
+ tprintf ("clip sample: sample too big (%d x %d)\n",
951
+ b_box.width (), b_box.height ());
952
+
953
+ return NULL;
954
+ }
955
+
956
+ IMAGE *image = new (IMAGE);
957
+ if (image->create (b_box.width (), b_box.height (), 1) == -1) {
958
+ tprintf ("clip sample: create image failed (%d x %d)\n",
959
+ b_box.width (), b_box.height ());
960
+
961
+ delete image;
962
+ return NULL;
963
+ }
964
+
965
+ if (!white_on_black)
966
+ invert_image(image); // Set background to white
967
+ pixrow->char_clip_image (imlines, pix_box, NULL, *image, baseline_pos);
968
+ if (white_on_black)
969
+ invert_image(image); //invert white on black for scaling &NN
970
+ return new CHAR_SAMPLE (image, c);
971
+ }
972
+ else
973
+ return NULL;
974
+ }
975
+
976
+
977
+ #ifndef GRAPHICS_DISABLED
978
+ void display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters) {
979
+ inT16 proto_number = 0;
980
+ CHAR_SAMPLES_IT c_it = char_clusters;
981
+ char title[WINDOWNAMESIZE];
982
+
983
+ for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) {
984
+ proto_number++;
985
+
986
+ #ifndef SECURE_NAMES
987
+ tprintf ("Displaying proto number %d\n", proto_number);
988
+ #endif
989
+
990
+ if (c_it.data ()->prototype () != NULL) {
991
+ sprintf (title, "Proto - %d", proto_number);
992
+ display_image (c_it.data ()->prototype ()->make_image (),
993
+ title, (proto_number - 1) * 400, 0, FALSE);
994
+ }
995
+ }
996
+ }
997
+ #endif
998
+
999
+ // *********************************************************************
1000
+ // Simplistic routines to test the effect of rejecting ems and fullstops
1001
+ // *********************************************************************
1002
+
1003
+ void reject_all_ems(WERD_RES *word) {
1004
+ inT16 i;
1005
+
1006
+ for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {
1007
+ if (word->best_choice->string ()[i] == 'm')
1008
+ // reject all ems
1009
+ word->reject_map[i].setrej_mm_reject ();
1010
+ }
1011
+ }
1012
+
1013
+
1014
+ void reject_all_fullstops(WERD_RES *word) {
1015
+ inT16 i;
1016
+
1017
+ for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {
1018
+ if (word->best_choice->string ()[i] == '.')
1019
+ // reject all fullstops
1020
+ word->reject_map[i].setrej_mm_reject ();
1021
+ }
1022
+ }
1023
+
1024
+
1025
+ void reject_suspect_ems(WERD_RES *word) {
1026
+ inT16 i;
1027
+
1028
+ if (!word_adaptable (word, tessedit_cluster_adaption_mode))
1029
+ for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {
1030
+ if (word->best_choice->string ()[i] == 'm' && suspect_em (word, i))
1031
+ // reject all ems
1032
+ word->reject_map[i].setrej_mm_reject ();
1033
+ }
1034
+ }
1035
+
1036
+
1037
+ void reject_suspect_fullstops(WERD_RES *word) {
1038
+ inT16 i;
1039
+
1040
+ for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {
1041
+ if (word->best_choice->string ()[i] == '.'
1042
+ && suspect_fullstop (word, i))
1043
+ // reject all commas
1044
+ word->reject_map[i].setrej_mm_reject ();
1045
+ }
1046
+ }
1047
+
1048
+
1049
+ BOOL8 suspect_em(WERD_RES *word, inT16 index) {
1050
+ PBLOB_LIST *blobs = word->outword->blob_list ();
1051
+ PBLOB_IT blob_it(blobs);
1052
+ inT16 j;
1053
+
1054
+ for (j = 0; j < index; j++)
1055
+ blob_it.forward ();
1056
+
1057
+ return (blob_it.data ()->out_list ()->length () != 1);
1058
+ }
1059
+
1060
+
1061
+ BOOL8 suspect_fullstop(WERD_RES *word, inT16 i) {
1062
+ float aspect_ratio;
1063
+ PBLOB_LIST *blobs = word->outword->blob_list ();
1064
+ PBLOB_IT blob_it(blobs);
1065
+ inT16 j;
1066
+ TBOX box;
1067
+ inT16 width;
1068
+ inT16 height;
1069
+
1070
+ for (j = 0; j < i; j++)
1071
+ blob_it.forward ();
1072
+
1073
+ box = blob_it.data ()->bounding_box ();
1074
+
1075
+ width = box.width ();
1076
+ height = box.height ();
1077
+
1078
+ aspect_ratio = ((width > height) ? ((float) width) / height :
1079
+ ((float) height) / width);
1080
+
1081
+ return (aspect_ratio > tessed_fullstop_aspect_ratio);
1082
+ }