tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,2 @@
1
+
2
+ EXTRA_DIST = README counttestset.sh reorgdata.sh runalltests.sh runtestset.sh reports/1995.bus.3B.sum reports/1995.doe3.3B.sum reports/1995.mag.3B.sum reports/1995.news.3B.sum reports/2.03.summary reports/2.04.summary
@@ -0,0 +1,312 @@
1
+ # Makefile.in generated by automake 1.10.1 from Makefile.am.
2
+ # @configure_input@
3
+
4
+ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
5
+ # 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
6
+ # This Makefile.in is free software; the Free Software Foundation
7
+ # gives unlimited permission to copy and/or distribute it,
8
+ # with or without modifications, as long as this notice is preserved.
9
+
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY, to the extent permitted by law; without
12
+ # even the implied warranty of MERCHANTABILITY or FITNESS FOR A
13
+ # PARTICULAR PURPOSE.
14
+
15
+ @SET_MAKE@
16
+ VPATH = @srcdir@
17
+ pkgdatadir = $(datadir)/@PACKAGE@
18
+ pkglibdir = $(libdir)/@PACKAGE@
19
+ pkgincludedir = $(includedir)/@PACKAGE@
20
+ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
21
+ install_sh_DATA = $(install_sh) -c -m 644
22
+ install_sh_PROGRAM = $(install_sh) -c
23
+ install_sh_SCRIPT = $(install_sh) -c
24
+ INSTALL_HEADER = $(INSTALL_DATA)
25
+ transform = $(program_transform_name)
26
+ NORMAL_INSTALL = :
27
+ PRE_INSTALL = :
28
+ POST_INSTALL = :
29
+ NORMAL_UNINSTALL = :
30
+ PRE_UNINSTALL = :
31
+ POST_UNINSTALL = :
32
+ build_triplet = @build@
33
+ host_triplet = @host@
34
+ subdir = testing
35
+ DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in
36
+ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
37
+ am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
38
+ $(top_srcdir)/configure.ac
39
+ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
40
+ $(ACLOCAL_M4)
41
+ mkinstalldirs = $(SHELL) $(top_srcdir)/config/mkinstalldirs
42
+ CONFIG_HEADER = $(top_builddir)/config_auto.h
43
+ CONFIG_CLEAN_FILES =
44
+ SOURCES =
45
+ DIST_SOURCES =
46
+ DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
47
+ ACLOCAL = @ACLOCAL@
48
+ AMTAR = @AMTAR@
49
+ AUTOCONF = @AUTOCONF@
50
+ AUTOHEADER = @AUTOHEADER@
51
+ AUTOMAKE = @AUTOMAKE@
52
+ AWK = @AWK@
53
+ CC = @CC@
54
+ CCDEPMODE = @CCDEPMODE@
55
+ CFLAGS = @CFLAGS@
56
+ CPP = @CPP@
57
+ CPPFLAGS = @CPPFLAGS@
58
+ CXX = @CXX@
59
+ CXXCPP = @CXXCPP@
60
+ CXXDEPMODE = @CXXDEPMODE@
61
+ CXXFLAGS = @CXXFLAGS@
62
+ CYGPATH_W = @CYGPATH_W@
63
+ DEFS = @DEFS@
64
+ DEPDIR = @DEPDIR@
65
+ ECHO_C = @ECHO_C@
66
+ ECHO_N = @ECHO_N@
67
+ ECHO_T = @ECHO_T@
68
+ EGREP = @EGREP@
69
+ EXEEXT = @EXEEXT@
70
+ GREP = @GREP@
71
+ INSTALL = @INSTALL@
72
+ INSTALL_DATA = @INSTALL_DATA@
73
+ INSTALL_PROGRAM = @INSTALL_PROGRAM@
74
+ INSTALL_SCRIPT = @INSTALL_SCRIPT@
75
+ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
76
+ LDFLAGS = @LDFLAGS@
77
+ LIBOBJS = @LIBOBJS@
78
+ LIBS = @LIBS@
79
+ LIBTIFF_CFLAGS = @LIBTIFF_CFLAGS@
80
+ LIBTIFF_LIBS = @LIBTIFF_LIBS@
81
+ LTLIBOBJS = @LTLIBOBJS@
82
+ MAINT = @MAINT@
83
+ MAKEINFO = @MAKEINFO@
84
+ MKDIR_P = @MKDIR_P@
85
+ OBJEXT = @OBJEXT@
86
+ PACKAGE = @PACKAGE@
87
+ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
88
+ PACKAGE_DATE = @PACKAGE_DATE@
89
+ PACKAGE_NAME = @PACKAGE_NAME@
90
+ PACKAGE_STRING = @PACKAGE_STRING@
91
+ PACKAGE_TARNAME = @PACKAGE_TARNAME@
92
+ PACKAGE_VERSION = @PACKAGE_VERSION@
93
+ PACKAGE_YEAR = @PACKAGE_YEAR@
94
+ PATH_SEPARATOR = @PATH_SEPARATOR@
95
+ RANLIB = @RANLIB@
96
+ SET_MAKE = @SET_MAKE@
97
+ SHELL = @SHELL@
98
+ STRIP = @STRIP@
99
+ VERSION = @VERSION@
100
+ abs_builddir = @abs_builddir@
101
+ abs_srcdir = @abs_srcdir@
102
+ abs_top_builddir = @abs_top_builddir@
103
+ abs_top_srcdir = @abs_top_srcdir@
104
+ ac_ct_CC = @ac_ct_CC@
105
+ ac_ct_CXX = @ac_ct_CXX@
106
+ am__include = @am__include@
107
+ am__leading_dot = @am__leading_dot@
108
+ am__quote = @am__quote@
109
+ am__tar = @am__tar@
110
+ am__untar = @am__untar@
111
+ bindir = @bindir@
112
+ build = @build@
113
+ build_alias = @build_alias@
114
+ build_cpu = @build_cpu@
115
+ build_os = @build_os@
116
+ build_vendor = @build_vendor@
117
+ builddir = @builddir@
118
+ datadir = @datadir@
119
+ datarootdir = @datarootdir@
120
+ docdir = @docdir@
121
+ dvidir = @dvidir@
122
+ exec_prefix = @exec_prefix@
123
+ host = @host@
124
+ host_alias = @host_alias@
125
+ host_cpu = @host_cpu@
126
+ host_os = @host_os@
127
+ host_vendor = @host_vendor@
128
+ htmldir = @htmldir@
129
+ includedir = @includedir@
130
+ infodir = @infodir@
131
+ install_sh = @install_sh@
132
+ libdir = @libdir@
133
+ libexecdir = @libexecdir@
134
+ localedir = @localedir@
135
+ localstatedir = @localstatedir@
136
+ mandir = @mandir@
137
+ mkdir_p = @mkdir_p@
138
+ oldincludedir = @oldincludedir@
139
+ pdfdir = @pdfdir@
140
+ prefix = @prefix@
141
+ program_transform_name = @program_transform_name@
142
+ psdir = @psdir@
143
+ sbindir = @sbindir@
144
+ sharedstatedir = @sharedstatedir@
145
+ srcdir = @srcdir@
146
+ sysconfdir = @sysconfdir@
147
+ target_alias = @target_alias@
148
+ top_builddir = @top_builddir@
149
+ top_srcdir = @top_srcdir@
150
+ EXTRA_DIST = README counttestset.sh reorgdata.sh runalltests.sh runtestset.sh reports/1995.bus.3B.sum reports/1995.doe3.3B.sum reports/1995.mag.3B.sum reports/1995.news.3B.sum reports/2.03.summary reports/2.04.summary
151
+ all: all-am
152
+
153
+ .SUFFIXES:
154
+ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
155
+ @for dep in $?; do \
156
+ case '$(am__configure_deps)' in \
157
+ *$$dep*) \
158
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
159
+ && exit 0; \
160
+ exit 1;; \
161
+ esac; \
162
+ done; \
163
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu testing/Makefile'; \
164
+ cd $(top_srcdir) && \
165
+ $(AUTOMAKE) --gnu testing/Makefile
166
+ .PRECIOUS: Makefile
167
+ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
168
+ @case '$?' in \
169
+ *config.status*) \
170
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
171
+ *) \
172
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
173
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
174
+ esac;
175
+
176
+ $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
177
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
178
+
179
+ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
180
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
181
+ $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
182
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
183
+ tags: TAGS
184
+ TAGS:
185
+
186
+ ctags: CTAGS
187
+ CTAGS:
188
+
189
+
190
+ distdir: $(DISTFILES)
191
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
192
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
193
+ list='$(DISTFILES)'; \
194
+ dist_files=`for file in $$list; do echo $$file; done | \
195
+ sed -e "s|^$$srcdirstrip/||;t" \
196
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
197
+ case $$dist_files in \
198
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
199
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
200
+ sort -u` ;; \
201
+ esac; \
202
+ for file in $$dist_files; do \
203
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
204
+ if test -d $$d/$$file; then \
205
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
206
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
207
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
208
+ fi; \
209
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
210
+ else \
211
+ test -f $(distdir)/$$file \
212
+ || cp -p $$d/$$file $(distdir)/$$file \
213
+ || exit 1; \
214
+ fi; \
215
+ done
216
+ check-am: all-am
217
+ check: check-am
218
+ all-am: Makefile
219
+ installdirs:
220
+ install: install-am
221
+ install-exec: install-exec-am
222
+ install-data: install-data-am
223
+ uninstall: uninstall-am
224
+
225
+ install-am: all-am
226
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
227
+
228
+ installcheck: installcheck-am
229
+ install-strip:
230
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
231
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
232
+ `test -z '$(STRIP)' || \
233
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
234
+ mostlyclean-generic:
235
+
236
+ clean-generic:
237
+
238
+ distclean-generic:
239
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
240
+
241
+ maintainer-clean-generic:
242
+ @echo "This command is intended for maintainers to use"
243
+ @echo "it deletes files that may require special tools to rebuild."
244
+ clean: clean-am
245
+
246
+ clean-am: clean-generic mostlyclean-am
247
+
248
+ distclean: distclean-am
249
+ -rm -f Makefile
250
+ distclean-am: clean-am distclean-generic
251
+
252
+ dvi: dvi-am
253
+
254
+ dvi-am:
255
+
256
+ html: html-am
257
+
258
+ info: info-am
259
+
260
+ info-am:
261
+
262
+ install-data-am:
263
+
264
+ install-dvi: install-dvi-am
265
+
266
+ install-exec-am:
267
+
268
+ install-html: install-html-am
269
+
270
+ install-info: install-info-am
271
+
272
+ install-man:
273
+
274
+ install-pdf: install-pdf-am
275
+
276
+ install-ps: install-ps-am
277
+
278
+ installcheck-am:
279
+
280
+ maintainer-clean: maintainer-clean-am
281
+ -rm -f Makefile
282
+ maintainer-clean-am: distclean-am maintainer-clean-generic
283
+
284
+ mostlyclean: mostlyclean-am
285
+
286
+ mostlyclean-am: mostlyclean-generic
287
+
288
+ pdf: pdf-am
289
+
290
+ pdf-am:
291
+
292
+ ps: ps-am
293
+
294
+ ps-am:
295
+
296
+ uninstall-am:
297
+
298
+ .MAKE: install-am install-strip
299
+
300
+ .PHONY: all all-am check check-am clean clean-generic distclean \
301
+ distclean-generic distdir dvi dvi-am html html-am info info-am \
302
+ install install-am install-data install-data-am install-dvi \
303
+ install-dvi-am install-exec install-exec-am install-html \
304
+ install-html-am install-info install-info-am install-man \
305
+ install-pdf install-pdf-am install-ps install-ps-am \
306
+ install-strip installcheck installcheck-am installdirs \
307
+ maintainer-clean maintainer-clean-generic mostlyclean \
308
+ mostlyclean-generic pdf pdf-am ps ps-am uninstall uninstall-am
309
+
310
+ # Tell versions [3.59,3.63) of GNU make to not export all variables.
311
+ # Otherwise a system limit (for SysV at least) may be exceeded.
312
+ .NOEXPORT:
@@ -0,0 +1,43 @@
1
+ How to run UNLV tests.
2
+
3
+ The scripts in this directory make it possible to duplicate the tests
4
+ published in the Fourth Annual Test of OCR Accuracy.
5
+ See http://www.isri.unlv.edu/downloads/AT-1995.pdf
6
+ but first you have to get the tools and data from UNLV:
7
+
8
+ Step 1: to download the images goto
9
+ http://www.isri.unlv.edu/ISRI/OCRtk
10
+ and get 3b.tgz, Bb.tgz, Mb.tgz and Nb.tgz.
11
+
12
+ Step 2: extract the files. It doesn't really matter where
13
+ in your filesystem you put them, but they must go under a common
14
+ root so you have directories 3, B, M and N in, for example,
15
+ /users/me/ISRI-OCRtk.
16
+
17
+ Step 3: Reorg the files
18
+ The lack of tif extensions on the images is inconvenient, so there
19
+ is a script to reorganize the data to match the rest of the test
20
+ scripts.
21
+ cd to /users/me/ISRI-OCRtk or wherever 3, B, M and N ended up and run
22
+ /blah/blah/tesseract-ocr/testing/reorgdata.sh 3B
23
+ This makes directories doe3.3B, bus.3B, mag.3B and news.3B.
24
+ You can now get rid of 3, B, M, and N unless you want to get some of the
25
+ other scanning resolutions out of them.
26
+
27
+ Step 4: Download the ISRI toolkit from:
28
+ http://www.isri.unlv.edu/downloads/ftk-1.0.tgz
29
+
30
+ Step 5: If they work for you, use the binaries directly from the bin
31
+ directory and put them in tesseract-ocr/testing/unlv
32
+ otherwise build the tools for yourself and put them there.
33
+
34
+ Step 6: cd back to your main tesseract-ocr dir and Build tesseract.
35
+
36
+ Step 7: run testing/runalltests.sh with the root data dir and testname:
37
+ testing/runalltests.sh /users/me/ISRI-OCRtk tess2.0
38
+ and go to the gym, have lunch etc.
39
+
40
+ Step 8: There should be a file
41
+ testing/reports/tess2.0.summary that contains the final summarized accuracy
42
+ report and comparison with the 1995 results.
43
+
@@ -0,0 +1,61 @@
1
+ #!/bin/bash
2
+ # File: counttestset.sh
3
+ # Description: Script to count the errors on a single UNLV set.
4
+ # Author: Ray Smith
5
+ # Created: Wed Jun 13 11:58:01 PDT 2007
6
+ #
7
+ # (C) Copyright 2007, Google Inc.
8
+ # Licensed under the Apache License, Version 2.0 (the "License");
9
+ # you may not use this file except in compliance with the License.
10
+ # You may obtain a copy of the License at
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ if [ $# -ne 1 ]
19
+ then
20
+ echo "Usage:$0 pagesfile"
21
+ exit 1
22
+ fi
23
+ if [ ! -d ccmain ]
24
+ then
25
+ echo "Run $0 from the tesseract-ocr root directory!"
26
+ exit 1
27
+ fi
28
+ if [ ! -r testing/unlv/accuracy ]
29
+ then
30
+ echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
31
+ exit 1
32
+ fi
33
+ pages=$1
34
+
35
+ imdir=${pages%/pages}
36
+ setname=${imdir##*/}
37
+ resdir=testing/results/$setname
38
+ mkdir -p testing/reports
39
+ echo "Counting on set $setname in directory $imdir to $resdir"
40
+ accfiles=""
41
+ wafiles=""
42
+ while read page dir
43
+ do
44
+ if [ "$dir" ]
45
+ then
46
+ srcdir="$imdir/$dir"
47
+ else
48
+ srcdir="$imdir"
49
+ fi
50
+ # echo "$srcdir/$page.tif"
51
+ # Count character errors.
52
+ testing/unlv/accuracy $srcdir/$page.txt $resdir/$page.txt $resdir/$page.acc
53
+ accfiles="$accfiles $resdir/$page.acc"
54
+ # Count word errors.
55
+ testing/unlv/wordacc $srcdir/$page.txt $resdir/$page.txt $resdir/$page.wa
56
+ wafiles="$wafiles $resdir/$page.wa"
57
+ done <$pages
58
+ testing/unlv/accsum $accfiles >testing/reports/$setname.characc
59
+ testing/unlv/wordaccsum $wafiles >testing/reports/$setname.wordacc
60
+
61
+
@@ -0,0 +1,44 @@
1
+ #!/bin/bash
2
+
3
+ if [ $# -ne 1 ]
4
+ then
5
+ echo "Usage:$0 scantype"
6
+ echo "UNLV data comes in several scan types:"
7
+ echo "3B=300 dpi binary"
8
+ echo "3A=adaptive thresholded 300 dpi"
9
+ echo "3G=300 dpi grey"
10
+ echo "4B=400dpi binary"
11
+ echo "2B=200dpi binary"
12
+ echo "For now we only use 3B"
13
+ exit 1
14
+ fi
15
+ ext=$1
16
+
17
+ #There are several test sets without meaningful names, so rename
18
+ #them with something a bit more meaningful.
19
+ #Each s is oldname/newname
20
+ for s in 3/doe3 B/bus M/mag N/news L/legal R/rep S/spn Z/zset
21
+ do
22
+ old=${s%/*}
23
+ #if this set was downloaded then process it.
24
+ if [ -r "$old/PAGES" ]
25
+ then
26
+ new=${s#*/}.$ext
27
+ mkdir -p $new
28
+ echo "Set $old -> $new"
29
+ #The pages file had - instead of _ so fix it and add the extension.
30
+ for page in `cat $old/PAGES`
31
+ do
32
+ echo "${page%-*}_${page#*-}.$ext"
33
+ done >$new/pages
34
+ for f in `cat $new/pages`
35
+ do
36
+ #Put a tif extension on the tif files.
37
+ cp $old/${old}_B/$f $new/$f.tif
38
+ #Put a uzn extension on the zone files.
39
+ cp $old/${old}_B/${f}Z $new/$f.uzn
40
+ #Cat all the truth files together and put into a single txt file.
41
+ cat $old/${old}_GT/${f%.$ext}.Z* >$new/$f.txt
42
+ done
43
+ fi
44
+ done
@@ -0,0 +1 @@
1
+ 1995 bus.3B 5959 98.14% 0.00% 1631 96.83% 0.00% 1293 95.73% 0.00%
@@ -0,0 +1 @@
1
+ 1995 doe3.3B 36349 97.52% 0.00% 7826 96.34% 0.00% 7042 94.87% 0.00%
@@ -0,0 +1 @@
1
+ 1995 mag.3B 15043 97.74% 0.00% 4566 96.01% 0.00% 3379 94.99% 0.00%
@@ -0,0 +1 @@
1
+ 1995 news.3B 6432 98.69% 0.00% 1946 97.68% 0.00% 1502 96.94% 0.00%
@@ -0,0 +1,9 @@
1
+ 1995 bus.3B 5959 98.14% 0.00% 1631 96.83% 0.00% 1293 95.73% 0.00%
2
+ 1995 doe3.3B 36349 97.52% 0.00% 7826 96.34% 0.00% 7042 94.87% 0.00%
3
+ 1995 mag.3B 15043 97.74% 0.00% 4566 96.01% 0.00% 3379 94.99% 0.00%
4
+ 1995 news.3B 6432 98.69% 0.00% 1946 97.68% 0.00% 1502 96.94% 0.00%
5
+ 2.03 bus.3B 6422 97.99% 7.77% 1750 96.60% 7.30% 1361 95.51 5.26%
6
+ 2.03 doe3.3B 29520 97.98% -18.79% 7966 96.27% 1.79% 6764 95.07 -3.95%
7
+ 2.03 mag.3B 14568 97.81% -3.16% 4288 96.25% -6.09% 3054 95.47 -9.62%
8
+ 2.03 news.3B 7655 98.44% 19.01% 1730 97.94% -11.10% 1208 97.54 -19.57%
9
+ 2.03 Total 58165 - -8.81% 15734 - -1.47% 12387 - -6.27%
@@ -0,0 +1,9 @@
1
+ 1995 bus.3B 5959 98.14% 0.00% 1631 96.83% 0.00% 1293 95.73% 0.00%
2
+ 1995 doe3.3B 36349 97.52% 0.00% 7826 96.34% 0.00% 7042 94.87% 0.00%
3
+ 1995 mag.3B 15043 97.74% 0.00% 4566 96.01% 0.00% 3379 94.99% 0.00%
4
+ 1995 news.3B 6432 98.69% 0.00% 1946 97.68% 0.00% 1502 96.94% 0.00%
5
+ 2.04 bus.3B 6422 97.99% 7.77% 1750 96.60% 7.30% 1361 95.51 5.26%
6
+ 2.04 doe3.3B 29514 97.98% -18.80% 7963 96.27% 1.75% 6762 95.07 -3.98%
7
+ 2.04 mag.3B 14568 97.81% -3.16% 4289 96.25% -6.07% 3053 95.47 -9.65%
8
+ 2.04 news.3B 7655 98.44% 19.01% 1730 97.94% -11.10% 1208 97.54 -19.57%
9
+ 2.04 Total 58159 - -8.82% 15732 - -1.48% 12384 - -6.30%
@@ -0,0 +1,110 @@
1
+ #!/bin/bash
2
+ # File: runalltests.sh
3
+ # Description: Script to run a set of UNLV test sets.
4
+ # Author: Ray Smith
5
+ # Created: Thu Jun 14 08:21:01 PDT 2007
6
+ #
7
+ # (C) Copyright 2007, Google Inc.
8
+ # Licensed under the Apache License, Version 2.0 (the "License");
9
+ # you may not use this file except in compliance with the License.
10
+ # You may obtain a copy of the License at
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ if [ $# -ne 2 ]
19
+ then
20
+ echo "Usage:$0 unlv-data-dir version-id"
21
+ exit 1
22
+ fi
23
+ if [ ! -d ccmain ]
24
+ then
25
+ echo "Run $0 from the tesseract-ocr root directory!"
26
+ exit 1
27
+ fi
28
+ if [ ! -r ccmain/tesseract -a ! -r tesseract.exe ]
29
+ then
30
+ echo "Please build tesseract before running $0"
31
+ exit 1
32
+ fi
33
+ if [ ! -r testing/unlv/accuracy -a ! -r testing/unlv/accuracy.exe ]
34
+ then
35
+ echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
36
+ exit 1
37
+ fi
38
+
39
+ #deltapc new old calculates the %change from old to new
40
+ deltapc() {
41
+ awk ' BEGIN {
42
+ printf("%.2f", 100.0*('$1'-'$2')/'$2');
43
+ }'
44
+ }
45
+
46
+ imdir="$1"
47
+ vid="$2"
48
+ bindir=${0%/*}
49
+ if [ "$bindir" = "$0" ]
50
+ then
51
+ bindir="./"
52
+ fi
53
+ rdir=testing/reports
54
+ testsets="bus.3B doe3.3B mag.3B news.3B"
55
+
56
+ totalerrs=0
57
+ totalwerrs=0
58
+ totalnswerrs=0
59
+ totalolderrs=0
60
+ totaloldwerrs=0
61
+ totaloldnswerrs=0
62
+ for set in $testsets
63
+ do
64
+ if [ -r $imdir/$set/pages ]
65
+ then
66
+ # Run tesseract on all the pages.
67
+ $bindir/runtestset.sh $imdir/$set/pages
68
+ # Count the errors on all the pages.
69
+ $bindir/counttestset.sh $imdir/$set/pages
70
+ # Get the old character word and nonstop word errors.
71
+ olderrs=`cat testing/reports/1995.$set.sum | cut -f3`
72
+ oldwerrs=`cat testing/reports/1995.$set.sum | cut -f6`
73
+ oldnswerrs=`cat testing/reports/1995.$set.sum | cut -f9`
74
+ # Get the new character word and nonstop word errors and accuracy.
75
+ cherrs=`head -4 testing/reports/$set.characc |tail -1 |cut -c1-9 |
76
+ tr -d '[:blank:]'`
77
+ chacc=`head -5 testing/reports/$set.characc |tail -1 |cut -c1-9 |
78
+ tr -d '[:blank:]'`
79
+ wderrs=`head -4 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
80
+ tr -d '[:blank:]'`
81
+ wdacc=`head -5 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
82
+ tr -d '[:blank:]'`
83
+ nswderrs=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
84
+ cut -c10-17 |tr -d '[:blank:]'`
85
+ nswdacc=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
86
+ cut -c19-26 |tr -d '[:blank:]'`
87
+ # Compute the percent change.
88
+ chdelta=`deltapc $cherrs $olderrs`
89
+ wdelta=`deltapc $wderrs $oldwerrs`
90
+ nswdelta=`deltapc $nswderrs $oldnswerrs`
91
+ sumfile=$rdir/$vid.$set.sum
92
+ echo "$vid $set $cherrs $chacc $chdelta% $wderrs $wdacc\
93
+ $wdelta% $nswderrs $nswdacc $nswdelta%" >$sumfile
94
+ # Sum totals over all the testsets.
95
+ let totalerrs=totalerrs+cherrs
96
+ let totalwerrs=totalwerrs+wderrs
97
+ let totalnswerrs=totalnswerrs+nswderrs
98
+ let totalolderrs=totalolderrs+olderrs
99
+ let totaloldwerrs=totaloldwerrs+oldwerrs
100
+ let totaloldnswerrs=totaloldnswerrs+oldnswerrs
101
+ fi
102
+ done
103
+ # Compute grand total percent change.
104
+ chdelta=`deltapc $totalerrs $totalolderrs`
105
+ wdelta=`deltapc $totalwerrs $totaloldwerrs`
106
+ nswdelta=`deltapc $totalnswerrs $totaloldnswerrs `
107
+ tfile=$rdir/$vid.total.sum
108
+ echo "$vid Total $totalerrs - $chdelta% $totalwerrs\
109
+ - $wdelta% $totalnswerrs - $nswdelta%" >$tfile
110
+ cat $rdir/1995.*.sum $rdir/$vid.*.sum >$rdir/$vid.summary
@@ -0,0 +1,61 @@
1
+ #!/bin/bash
2
+ # File: runtestset.sh
3
+ # Description: Script to run tesseract on a single UNLV set.
4
+ # Author: Ray Smith
5
+ # Created: Wed Jun 13 10:13:01 PDT 2007
6
+ #
7
+ # (C) Copyright 2007, Google Inc.
8
+ # Licensed under the Apache License, Version 2.0 (the "License");
9
+ # you may not use this file except in compliance with the License.
10
+ # You may obtain a copy of the License at
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ if [ $# -ne 1 ]
19
+ then
20
+ echo "Usage:$0 pagesfile"
21
+ exit 1
22
+ fi
23
+ if [ ! -d ccmain ]
24
+ then
25
+ echo "Run $0 from the tesseract-ocr root directory!"
26
+ exit 1
27
+ fi
28
+ if [ ! -r ccmain/tesseract ]
29
+ then
30
+ if [ ! -r tesseract.exe ]
31
+ then
32
+ echo "Please build tesseract before running $0"
33
+ exit 1
34
+ else
35
+ tess="./tesseract.exe"
36
+ fi
37
+ else
38
+ tess="ccmain/tesseract"
39
+ export TESSDATA_PREFIX=$PWD/
40
+ fi
41
+
42
+ pages=$1
43
+
44
+ imdir=${pages%/pages}
45
+ setname=${imdir##*/}
46
+ resdir=testing/results/$setname
47
+ echo "Testing on set $setname in directory $imdir to $resdir"
48
+ mkdir -p $resdir
49
+ while read page dir
50
+ do
51
+ # A pages file may be a list of files with subdirs or maybe just
52
+ # a plain list of files so accomodate both.
53
+ if [ "$dir" ]
54
+ then
55
+ srcdir="$imdir/$dir"
56
+ else
57
+ srcdir="$imdir"
58
+ fi
59
+ # echo "$srcdir/$page.tif"
60
+ $tess $srcdir/$page.tif $resdir/$page nobatch unlv
61
+ done <$pages
@@ -0,0 +1,20 @@
1
+ SUBDIRS =
2
+ AM_CPPFLAGS = \
3
+ -I$(top_srcdir)/ccstruct -I$(top_srcdir)/ccutil \
4
+ -I$(top_srcdir)/image -I$(top_srcdir)/viewer \
5
+ -I$(top_srcdir)/pageseg
6
+
7
+ include_HEADERS = \
8
+ blkocc.h blobcmpl.h drawedg.h drawtord.h edgblob.h \
9
+ edgloop.h fpchop.h gap_map.h makerow.h oldbasel.h \
10
+ pithsync.h pitsync1.h scanedg.h sortflts.h tessout.h \
11
+ topitch.h tordmain.h tospace.h tovars.h underlin.h \
12
+ wordseg.h
13
+
14
+ lib_LIBRARIES = libtesseract_textord.a
15
+ libtesseract_textord_a_SOURCES = \
16
+ blkocc.cpp drawedg.cpp drawtord.cpp edgblob.cpp \
17
+ edgloop.cpp fpchop.cpp gap_map.cpp makerow.cpp oldbasel.cpp \
18
+ pithsync.cpp pitsync1.cpp scanedg.cpp sortflts.cpp \
19
+ topitch.cpp tordmain.cpp tospace.cpp tovars.cpp underlin.cpp \
20
+ wordseg.cpp