tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,855 @@
1
+ /******************************************************************************
2
+ ** Filename: cnTraining.cpp
3
+ ** Purpose: Generates a normproto and pffmtable.
4
+ ** Author: Dan Johnson
5
+ ** Revisment: Christy Russon
6
+ ** History: Fri Aug 18 08:53:50 1989, DSJ, Created.
7
+ ** 5/25/90, DSJ, Adapted to multiple feature types.
8
+ ** Tuesday, May 17, 1998 Changes made to make feature specific and
9
+ ** simplify structures. First step in simplifying training process.
10
+ **
11
+ ** (c) Copyright Hewlett-Packard Company, 1988.
12
+ ** Licensed under the Apache License, Version 2.0 (the "License");
13
+ ** you may not use this file except in compliance with the License.
14
+ ** You may obtain a copy of the License at
15
+ ** http://www.apache.org/licenses/LICENSE-2.0
16
+ ** Unless required by applicable law or agreed to in writing, software
17
+ ** distributed under the License is distributed on an "AS IS" BASIS,
18
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ ** See the License for the specific language governing permissions and
20
+ ** limitations under the License.
21
+ ******************************************************************************/
22
+
23
+
24
+ /**----------------------------------------------------------------------------
25
+ Include Files and Type Defines
26
+ ----------------------------------------------------------------------------**/
27
+ #include "oldlist.h"
28
+ #include "efio.h"
29
+ #include "emalloc.h"
30
+ #include "featdefs.h"
31
+ #include "tessopt.h"
32
+ #include "ocrfeatures.h"
33
+ #include "general.h"
34
+ #include "clusttool.h"
35
+ #include "cluster.h"
36
+ #include "name2char.h"
37
+ #include <string.h>
38
+ #include <stdio.h>
39
+ #include <math.h>
40
+ #include "unichar.h"
41
+
42
+ #define MAXNAMESIZE 80
43
+ #define MAX_NUM_SAMPLES 10000
44
+ #define PROGRAM_FEATURE_TYPE "cn"
45
+ #define MINSD (1.0f / 64.0f)
46
+
47
+ int row_number; /* cjn: fixes link problem */
48
+
49
+ typedef struct
50
+ {
51
+ char *Label;
52
+ int SampleCount;
53
+ LIST List;
54
+ }
55
+ LABELEDLISTNODE, *LABELEDLIST;
56
+
57
+ #define round(x,frag)(floor(x/frag+.5)*frag)
58
+
59
+ /**----------------------------------------------------------------------------
60
+ Public Function Prototypes
61
+ ----------------------------------------------------------------------------**/
62
+ int main (
63
+ int argc,
64
+ char **argv);
65
+
66
+ /**----------------------------------------------------------------------------
67
+ Private Function Prototypes
68
+ ----------------------------------------------------------------------------**/
69
+ void ParseArguments(
70
+ int argc,
71
+ char **argv);
72
+
73
+ char *GetNextFilename ();
74
+
75
+ void ReadTrainingSamples (
76
+ FILE *File,
77
+ LIST* TrainingSamples);
78
+
79
+ LABELEDLIST FindList (
80
+ LIST List,
81
+ char *Label);
82
+
83
+ LABELEDLIST NewLabeledList (
84
+ char *Label);
85
+
86
+ void WriteTrainingSamples (
87
+ char *Directory,
88
+ LIST CharList);
89
+
90
+ void WriteNormProtos (
91
+ char *Directory,
92
+ LIST LabeledProtoList,
93
+ CLUSTERER *Clusterer);
94
+
95
+ void FreeTrainingSamples (
96
+ LIST CharList);
97
+
98
+ void FreeNormProtoList (
99
+ LIST CharList);
100
+
101
+ void FreeLabeledList (
102
+ LABELEDLIST LabeledList);
103
+
104
+ CLUSTERER *SetUpForClustering(
105
+ LABELEDLIST CharSample);
106
+ /*
107
+ PARAMDESC *ConvertToPARAMDESC(
108
+ PARAM_DESC* Param_Desc,
109
+ int N);
110
+ */
111
+ void AddToNormProtosList(
112
+ LIST* NormProtoList,
113
+ LIST ProtoList,
114
+ char* CharName);
115
+
116
+ void WriteProtos(
117
+ FILE *File,
118
+ uinT16 N,
119
+ LIST ProtoList,
120
+ BOOL8 WriteSigProtos,
121
+ BOOL8 WriteInsigProtos);
122
+
123
+ int NumberOfProtos(
124
+ LIST ProtoList,
125
+ BOOL8 CountSigProtos,
126
+ BOOL8 CountInsigProtos);
127
+
128
+ /**----------------------------------------------------------------------------
129
+ Global Data Definitions and Declarations
130
+ ----------------------------------------------------------------------------**/
131
+ static char FontName[MAXNAMESIZE];
132
+ /* globals used for parsing command line arguments */
133
+ static char *Directory = NULL;
134
+ static int MaxNumSamples = MAX_NUM_SAMPLES;
135
+ static int Argc;
136
+ static char **Argv;
137
+
138
+ /* globals used to control what information is saved in the output file */
139
+ static BOOL8 ShowAllSamples = FALSE;
140
+ static BOOL8 ShowSignificantProtos = TRUE;
141
+ static BOOL8 ShowInsignificantProtos = FALSE;
142
+
143
+ /* global variable to hold configuration parameters to control clustering */
144
+ //-M 0.025 -B 0.05 -I 0.8 -C 1e-3
145
+ static CLUSTERCONFIG Config =
146
+ {
147
+ elliptical, 0.025, 0.05, 0.8, 1e-3, 0
148
+ };
149
+
150
+ static FLOAT32 RoundingAccuracy = 0.0;
151
+
152
+ /**----------------------------------------------------------------------------
153
+ Public Code
154
+ ----------------------------------------------------------------------------**/
155
+ /*---------------------------------------------------------------------------*/
156
+ int main (
157
+ int argc,
158
+ char **argv)
159
+
160
+ /*
161
+ ** Parameters:
162
+ ** argc number of command line arguments
163
+ ** argv array of command line arguments
164
+ ** Globals: none
165
+ ** Operation:
166
+ ** This program reads in a text file consisting of feature
167
+ ** samples from a training page in the following format:
168
+ **
169
+ ** FontName CharName NumberOfFeatureTypes(N)
170
+ ** FeatureTypeName1 NumberOfFeatures(M)
171
+ ** Feature1
172
+ ** ...
173
+ ** FeatureM
174
+ ** FeatureTypeName2 NumberOfFeatures(M)
175
+ ** Feature1
176
+ ** ...
177
+ ** FeatureM
178
+ ** ...
179
+ ** FeatureTypeNameN NumberOfFeatures(M)
180
+ ** Feature1
181
+ ** ...
182
+ ** FeatureM
183
+ ** FontName CharName ...
184
+ **
185
+ ** It then appends these samples into a separate file for each
186
+ ** character. The name of the file is
187
+ **
188
+ ** DirectoryName/FontName/CharName.FeatureTypeName
189
+ **
190
+ ** The DirectoryName can be specified via a command
191
+ ** line argument. If not specified, it defaults to the
192
+ ** current directory. The format of the resulting files is:
193
+ **
194
+ ** NumberOfFeatures(M)
195
+ ** Feature1
196
+ ** ...
197
+ ** FeatureM
198
+ ** NumberOfFeatures(M)
199
+ ** ...
200
+ **
201
+ ** The output files each have a header which describes the
202
+ ** type of feature which the file contains. This header is
203
+ ** in the format required by the clusterer. A command line
204
+ ** argument can also be used to specify that only the first
205
+ ** N samples of each class should be used.
206
+ ** Return: none
207
+ ** Exceptions: none
208
+ ** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
209
+ */
210
+
211
+ {
212
+ char *PageName;
213
+ FILE *TrainingPage;
214
+ LIST CharList = NIL;
215
+ CLUSTERER *Clusterer = NULL;
216
+ LIST ProtoList = NIL;
217
+ LIST NormProtoList = NIL;
218
+ LIST pCharList;
219
+ LABELEDLIST CharSample;
220
+
221
+ ParseArguments (argc, argv);
222
+ while ((PageName = GetNextFilename()) != NULL)
223
+ {
224
+ printf ("Reading %s ...\n", PageName);
225
+ TrainingPage = Efopen (PageName, "r");
226
+ ReadTrainingSamples (TrainingPage, &CharList);
227
+ fclose (TrainingPage);
228
+ //WriteTrainingSamples (Directory, CharList);
229
+ }
230
+ printf("Clustering ...\n");
231
+ pCharList = CharList;
232
+ iterate(pCharList)
233
+ {
234
+ //Cluster
235
+ CharSample = (LABELEDLIST) first_node (pCharList);
236
+ //printf ("\nClustering %s ...", CharSample->Label);
237
+ Clusterer = SetUpForClustering(CharSample);
238
+ float SavedMinSamples = Config.MinSamples;
239
+ Config.MagicSamples = CharSample->SampleCount;
240
+ while (Config.MinSamples > 0.001) {
241
+ ProtoList = ClusterSamples(Clusterer, &Config);
242
+ if (NumberOfProtos(ProtoList, 1, 0) > 0)
243
+ break;
244
+ else {
245
+ Config.MinSamples *= 0.95;
246
+ printf("0 significant protos for %s."
247
+ " Retrying clustering with MinSamples = %f%%\n",
248
+ CharSample->Label, Config.MinSamples);
249
+ }
250
+ }
251
+ Config.MinSamples = SavedMinSamples;
252
+ AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
253
+ }
254
+ FreeTrainingSamples (CharList);
255
+ WriteNormProtos (Directory, NormProtoList, Clusterer);
256
+ FreeClusterer(Clusterer);
257
+ FreeProtoList(&ProtoList);
258
+ FreeNormProtoList(NormProtoList);
259
+ printf ("\n");
260
+ return 0;
261
+ } // main
262
+
263
+
264
+ /**----------------------------------------------------------------------------
265
+ Private Code
266
+ ----------------------------------------------------------------------------**/
267
+ /*---------------------------------------------------------------------------*/
268
+ void ParseArguments(
269
+ int argc,
270
+ char **argv)
271
+
272
+ /*
273
+ ** Parameters:
274
+ ** argc number of command line arguments to parse
275
+ ** argv command line arguments
276
+ ** Globals:
277
+ ** ShowAllSamples flag controlling samples display
278
+ ** ShowSignificantProtos flag controlling proto display
279
+ ** ShowInsignificantProtos flag controlling proto display
280
+ ** Config current clustering parameters
281
+ ** tessoptarg, tessoptind defined by tessopt sys call
282
+ ** Argc, Argv global copies of argc and argv
283
+ ** Operation:
284
+ ** This routine parses the command line arguments that were
285
+ ** passed to the program. The legal arguments are:
286
+ ** -d "turn off display of samples"
287
+ ** -p "turn off significant protos"
288
+ ** -n "turn off insignificant proto"
289
+ ** -S [ spherical | elliptical | mixed | automatic ]
290
+ ** -M MinSamples "min samples per prototype (%)"
291
+ ** -B MaxIllegal "max illegal chars per cluster (%)"
292
+ ** -I Independence "0 to 1"
293
+ ** -C Confidence "1e-200 to 1.0"
294
+ ** -D Directory
295
+ ** -N MaxNumSamples
296
+ ** -R RoundingAccuracy
297
+ ** Return: none
298
+ ** Exceptions: Illegal options terminate the program.
299
+ ** History: 7/24/89, DSJ, Created.
300
+ */
301
+
302
+ {
303
+ int Option;
304
+ int ParametersRead;
305
+ BOOL8 Error;
306
+
307
+ Error = FALSE;
308
+ Argc = argc;
309
+ Argv = argv;
310
+ while (( Option = tessopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )
311
+ {
312
+ switch ( Option )
313
+ {
314
+ case 'n':
315
+ sscanf(tessoptarg,"%d", &ParametersRead);
316
+ ShowInsignificantProtos = ParametersRead;
317
+ break;
318
+ case 'p':
319
+ sscanf(tessoptarg,"%d", &ParametersRead);
320
+ ShowSignificantProtos = ParametersRead;
321
+ break;
322
+ case 'd':
323
+ ShowAllSamples = FALSE;
324
+ break;
325
+ case 'C':
326
+ ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
327
+ if ( ParametersRead != 1 ) Error = TRUE;
328
+ else if ( Config.Confidence > 1 ) Config.Confidence = 1;
329
+ else if ( Config.Confidence < 0 ) Config.Confidence = 0;
330
+ break;
331
+ case 'I':
332
+ ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
333
+ if ( ParametersRead != 1 ) Error = TRUE;
334
+ else if ( Config.Independence > 1 ) Config.Independence = 1;
335
+ else if ( Config.Independence < 0 ) Config.Independence = 0;
336
+ break;
337
+ case 'M':
338
+ ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
339
+ if ( ParametersRead != 1 ) Error = TRUE;
340
+ else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
341
+ else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
342
+ break;
343
+ case 'B':
344
+ ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
345
+ if ( ParametersRead != 1 ) Error = TRUE;
346
+ else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
347
+ else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
348
+ break;
349
+ case 'R':
350
+ ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
351
+ if ( ParametersRead != 1 ) Error = TRUE;
352
+ else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
353
+ else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
354
+ break;
355
+ case 'S':
356
+ switch ( tessoptarg[0] )
357
+ {
358
+ case 's': Config.ProtoStyle = spherical; break;
359
+ case 'e': Config.ProtoStyle = elliptical; break;
360
+ case 'm': Config.ProtoStyle = mixed; break;
361
+ case 'a': Config.ProtoStyle = automatic; break;
362
+ default: Error = TRUE;
363
+ }
364
+ break;
365
+ case 'D':
366
+ Directory = tessoptarg;
367
+ break;
368
+ case 'N':
369
+ if (sscanf (tessoptarg, "%d", &MaxNumSamples) != 1 ||
370
+ MaxNumSamples <= 0)
371
+ Error = TRUE;
372
+ break;
373
+ case '?':
374
+ Error = TRUE;
375
+ break;
376
+ }
377
+ if ( Error )
378
+ {
379
+ fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] );
380
+ fprintf (stderr, "\t[-S ProtoStyle]\n");
381
+ fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" );
382
+ fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n");
383
+ exit (2);
384
+ }
385
+ }
386
+ } /* ParseArguments */
387
+
388
+ /*---------------------------------------------------------------------------*/
389
+ char *GetNextFilename ()
390
+ /*
391
+ ** Parameters: none
392
+ ** Globals:
393
+ ** tessoptind defined by tessopt sys call
394
+ ** Argc, Argv global copies of argc and argv
395
+ ** Operation:
396
+ ** This routine returns the next command line argument. If
397
+ ** there are no remaining command line arguments, it returns
398
+ ** NULL. This routine should only be called after all option
399
+ ** arguments have been parsed and removed with ParseArguments.
400
+ ** Return: Next command line argument or NULL.
401
+ ** Exceptions: none
402
+ ** History: Fri Aug 18 09:34:12 1989, DSJ, Created.
403
+ */
404
+
405
+ {
406
+ if (tessoptind < Argc)
407
+ return (Argv [tessoptind++]);
408
+ else
409
+ return (NULL);
410
+
411
+ } /* GetNextFilename */
412
+
413
+ /*---------------------------------------------------------------------------*/
414
+ void ReadTrainingSamples (
415
+ FILE *File,
416
+ LIST* TrainingSamples)
417
+
418
+ /*
419
+ ** Parameters:
420
+ ** File open text file to read samples from
421
+ ** Globals: none
422
+ ** Operation:
423
+ ** This routine reads training samples from a file and
424
+ ** places them into a data structure which organizes the
425
+ ** samples by FontName and CharName. It then returns this
426
+ ** data structure.
427
+ ** Return: none
428
+ ** Exceptions: none
429
+ ** History: Fri Aug 18 13:11:39 1989, DSJ, Created.
430
+ ** Tue May 17 1998 simplifications to structure, illiminated
431
+ ** font, and feature specification levels of structure.
432
+ */
433
+
434
+ {
435
+ char unichar[UNICHAR_LEN + 1];
436
+ LABELEDLIST CharSample;
437
+ FEATURE_SET FeatureSamples;
438
+ CHAR_DESC CharDesc;
439
+ int Type, i;
440
+
441
+ while (fscanf (File, "%s %s", FontName, unichar) == 2) {
442
+ CharSample = FindList (*TrainingSamples, unichar);
443
+ if (CharSample == NULL) {
444
+ CharSample = NewLabeledList (unichar);
445
+ *TrainingSamples = push (*TrainingSamples, CharSample);
446
+ }
447
+ CharDesc = ReadCharDescription (File);
448
+ Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
449
+ FeatureSamples = CharDesc->FeatureSets[Type];
450
+ for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
451
+ FEATURE f = FeatureSamples->Features[feature];
452
+ for (int dim =0; dim < f->Type->NumParams; ++dim)
453
+ f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
454
+ }
455
+ CharSample->List = push (CharSample->List, FeatureSamples);
456
+ CharSample->SampleCount++;
457
+ for (i = 0; i < CharDesc->NumFeatureSets; i++)
458
+ if (Type != i)
459
+ FreeFeatureSet(CharDesc->FeatureSets[i]);
460
+ free (CharDesc);
461
+ }
462
+ } // ReadTrainingSamples
463
+
464
+ /*---------------------------------------------------------------------------*/
465
+ LABELEDLIST FindList (
466
+ LIST List,
467
+ char *Label)
468
+
469
+ /*
470
+ ** Parameters:
471
+ ** List list to search
472
+ ** Label label to search for
473
+ ** Globals: none
474
+ ** Operation:
475
+ ** This routine searches thru a list of labeled lists to find
476
+ ** a list with the specified label. If a matching labeled list
477
+ ** cannot be found, NULL is returned.
478
+ ** Return: Labeled list with the specified Label or NULL.
479
+ ** Exceptions: none
480
+ ** History: Fri Aug 18 15:57:41 1989, DSJ, Created.
481
+ */
482
+
483
+ {
484
+ LABELEDLIST LabeledList;
485
+
486
+ iterate (List)
487
+ {
488
+ LabeledList = (LABELEDLIST) first_node (List);
489
+ if (strcmp (LabeledList->Label, Label) == 0)
490
+ return (LabeledList);
491
+ }
492
+ return (NULL);
493
+
494
+ } /* FindList */
495
+
496
+ /*---------------------------------------------------------------------------*/
497
+ LABELEDLIST NewLabeledList (
498
+ char *Label)
499
+
500
+ /*
501
+ ** Parameters:
502
+ ** Label label for new list
503
+ ** Globals: none
504
+ ** Operation:
505
+ ** This routine allocates a new, empty labeled list and gives
506
+ ** it the specified label.
507
+ ** Return: New, empty labeled list.
508
+ ** Exceptions: none
509
+ ** History: Fri Aug 18 16:08:46 1989, DSJ, Created.
510
+ */
511
+
512
+ {
513
+ LABELEDLIST LabeledList;
514
+
515
+ LabeledList = (LABELEDLIST) (char*)Emalloc (sizeof (LABELEDLISTNODE));
516
+ LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
517
+ strcpy (LabeledList->Label, Label);
518
+ LabeledList->List = NIL;
519
+ LabeledList->SampleCount = 0;
520
+ return (LabeledList);
521
+
522
+ } /* NewLabeledList */
523
+
524
+ /*---------------------------------------------------------------------------*/
525
+ void WriteTrainingSamples (
526
+ char *Directory,
527
+ LIST CharList)
528
+
529
+ /*
530
+ ** Parameters:
531
+ ** Directory directory to place sample files into
532
+ ** FontList list of fonts used in the training samples
533
+ ** Globals:
534
+ ** MaxNumSamples max number of samples per class to write
535
+ ** Operation:
536
+ ** This routine writes the specified samples into files which
537
+ ** are organized according to the font name and character name
538
+ ** of the samples.
539
+ ** Return: none
540
+ ** Exceptions: none
541
+ ** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
542
+ */
543
+
544
+ {
545
+ LABELEDLIST CharSample;
546
+ FEATURE_SET FeatureSet;
547
+ LIST FeatureList;
548
+ FILE *File;
549
+ char Filename[MAXNAMESIZE];
550
+ int NumSamples;
551
+
552
+ iterate (CharList) // iterate thru all of the fonts
553
+ {
554
+ CharSample = (LABELEDLIST) first_node (CharList);
555
+
556
+ // construct the full pathname for the current samples file
557
+ strcpy (Filename, "");
558
+ if (Directory != NULL)
559
+ {
560
+ strcat (Filename, Directory);
561
+ strcat (Filename, "/");
562
+ }
563
+ strcat (Filename, "Merged");
564
+ strcat (Filename, "/");
565
+ strcat (Filename, CharSample->Label);
566
+ strcat (Filename, ".");
567
+ strcat (Filename, PROGRAM_FEATURE_TYPE);
568
+ printf ("\nWriting %s ...", Filename);
569
+
570
+ /* if file does not exist, create a new one with an appropriate
571
+ header; otherwise append samples to the existing file */
572
+ File = fopen (Filename, "r");
573
+ if (File == NULL)
574
+ {
575
+ File = Efopen (Filename, "w");
576
+ WriteOldParamDesc
577
+ (File, FeatureDefs.FeatureDesc[ShortNameToFeatureType (PROGRAM_FEATURE_TYPE)]);
578
+ }
579
+ else
580
+ {
581
+ fclose (File);
582
+ File = Efopen (Filename, "a");
583
+ }
584
+
585
+ // append samples onto the file
586
+ FeatureList = CharSample->List;
587
+ NumSamples = 0;
588
+ iterate (FeatureList)
589
+ {
590
+ //if (NumSamples >= MaxNumSamples) break;
591
+
592
+ FeatureSet = (FEATURE_SET) first_node (FeatureList);
593
+ WriteFeatureSet (File, FeatureSet);
594
+ NumSamples++;
595
+ }
596
+ fclose (File);
597
+ }
598
+ } /* WriteTrainingSamples */
599
+
600
+
601
+ /*----------------------------------------------------------------------------*/
602
+ void WriteNormProtos (
603
+ char *Directory,
604
+ LIST LabeledProtoList,
605
+ CLUSTERER *Clusterer)
606
+
607
+ /*
608
+ ** Parameters:
609
+ ** Directory directory to place sample files into
610
+ ** Globals:
611
+ ** MaxNumSamples max number of samples per class to write
612
+ ** Operation:
613
+ ** This routine writes the specified samples into files which
614
+ ** are organized according to the font name and character name
615
+ ** of the samples.
616
+ ** Return: none
617
+ ** Exceptions: none
618
+ ** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
619
+ */
620
+
621
+ {
622
+ FILE *File;
623
+ char Filename[MAXNAMESIZE];
624
+ LABELEDLIST LabeledProto;
625
+ int N;
626
+
627
+ strcpy (Filename, "");
628
+ if (Directory != NULL)
629
+ {
630
+ strcat (Filename, Directory);
631
+ strcat (Filename, "/");
632
+ }
633
+ strcat (Filename, "normproto");
634
+ printf ("\nWriting %s ...", Filename);
635
+ File = Efopen (Filename, "w");
636
+ fprintf(File,"%0d\n",Clusterer->SampleSize);
637
+ WriteParamDesc(File,Clusterer->SampleSize,Clusterer->ParamDesc);
638
+ iterate(LabeledProtoList)
639
+ {
640
+ LabeledProto = (LABELEDLIST) first_node (LabeledProtoList);
641
+ N = NumberOfProtos(LabeledProto->List,
642
+ ShowSignificantProtos, ShowInsignificantProtos);
643
+ if (N < 1) {
644
+ printf ("\nError! Not enough protos for %s: %d protos"
645
+ " (%d significant protos"
646
+ ", %d insignificant protos)\n",
647
+ LabeledProto->Label, N,
648
+ NumberOfProtos(LabeledProto->List, 1, 0),
649
+ NumberOfProtos(LabeledProto->List, 0, 1));
650
+ exit(1);
651
+ }
652
+ fprintf(File, "\n%s %d\n", LabeledProto->Label, N);
653
+ WriteProtos(File, Clusterer->SampleSize, LabeledProto->List,
654
+ ShowSignificantProtos, ShowInsignificantProtos);
655
+ }
656
+ fclose (File);
657
+
658
+ } // WriteNormProtos
659
+
660
+ /*---------------------------------------------------------------------------*/
661
+ void FreeTrainingSamples (
662
+ LIST CharList)
663
+
664
+ /*
665
+ ** Parameters:
666
+ ** FontList list of all fonts in document
667
+ ** Globals: none
668
+ ** Operation:
669
+ ** This routine deallocates all of the space allocated to
670
+ ** the specified list of training samples.
671
+ ** Return: none
672
+ ** Exceptions: none
673
+ ** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
674
+ */
675
+
676
+ {
677
+ LABELEDLIST CharSample;
678
+ FEATURE_SET FeatureSet;
679
+ LIST FeatureList;
680
+
681
+
682
+ printf ("\nFreeTrainingSamples...");
683
+ iterate (CharList) /* iterate thru all of the fonts */
684
+ {
685
+ CharSample = (LABELEDLIST) first_node (CharList);
686
+ FeatureList = CharSample->List;
687
+ iterate (FeatureList) /* iterate thru all of the classes */
688
+ {
689
+ FeatureSet = (FEATURE_SET) first_node (FeatureList);
690
+ FreeFeatureSet (FeatureSet);
691
+ }
692
+ FreeLabeledList (CharSample);
693
+ }
694
+ destroy (CharList);
695
+
696
+ } /* FreeTrainingSamples */
697
+
698
+ /*-------------------------------------------------------------------------*/
699
+ void FreeNormProtoList (
700
+ LIST CharList)
701
+
702
+ {
703
+ LABELEDLIST CharSample;
704
+
705
+ iterate (CharList) /* iterate thru all of the fonts */
706
+ {
707
+ CharSample = (LABELEDLIST) first_node (CharList);
708
+ FreeLabeledList (CharSample);
709
+ }
710
+ destroy (CharList);
711
+
712
+ } // FreeNormProtoList
713
+
714
+ /*---------------------------------------------------------------------------*/
715
+ void FreeLabeledList (
716
+ LABELEDLIST LabeledList)
717
+
718
+ /*
719
+ ** Parameters:
720
+ ** LabeledList labeled list to be freed
721
+ ** Globals: none
722
+ ** Operation:
723
+ ** This routine deallocates all of the memory consumed by
724
+ ** a labeled list. It does not free any memory which may be
725
+ ** consumed by the items in the list.
726
+ ** Return: none
727
+ ** Exceptions: none
728
+ ** History: Fri Aug 18 17:52:45 1989, DSJ, Created.
729
+ */
730
+
731
+ {
732
+ destroy (LabeledList->List);
733
+ free (LabeledList->Label);
734
+ free (LabeledList);
735
+
736
+ } /* FreeLabeledList */
737
+
738
+ /*---------------------------------------------------------------------------*/
739
+ CLUSTERER *SetUpForClustering(
740
+ LABELEDLIST CharSample)
741
+
742
+ /*
743
+ ** Parameters:
744
+ ** CharSample: LABELEDLIST that holds all the feature information for a
745
+ ** given character.
746
+ ** Globals:
747
+ ** None
748
+ ** Operation:
749
+ ** This routine reads samples from a LABELEDLIST and enters
750
+ ** those samples into a clusterer data structure. This
751
+ ** data structure is then returned to the caller.
752
+ ** Return:
753
+ ** Pointer to new clusterer data structure.
754
+ ** Exceptions:
755
+ ** None
756
+ ** History:
757
+ ** 8/16/89, DSJ, Created.
758
+ */
759
+
760
+ {
761
+ uinT16 N;
762
+ int i, j;
763
+ FLOAT32 *Sample = NULL;
764
+ CLUSTERER *Clusterer;
765
+ inT32 CharID;
766
+ LIST FeatureList = NULL;
767
+ FEATURE_SET FeatureSet = NULL;
768
+ FEATURE_DESC FeatureDesc = NULL;
769
+ // PARAM_DESC* ParamDesc;
770
+
771
+ FeatureDesc = FeatureDefs.FeatureDesc[ShortNameToFeatureType(PROGRAM_FEATURE_TYPE)];
772
+ N = FeatureDesc->NumParams;
773
+ //ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N);
774
+ Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);
775
+ // free(ParamDesc);
776
+
777
+ FeatureList = CharSample->List;
778
+ CharID = 0;
779
+ iterate(FeatureList)
780
+ {
781
+ FeatureSet = (FEATURE_SET) first_node (FeatureList);
782
+ for (i=0; i < FeatureSet->MaxNumFeatures; i++)
783
+ {
784
+ if (Sample == NULL)
785
+ Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
786
+ for (j=0; j < N; j++)
787
+ if (RoundingAccuracy != 0.0)
788
+ Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);
789
+ else
790
+ Sample[j] = FeatureSet->Features[i]->Params[j];
791
+ MakeSample (Clusterer, Sample, CharID);
792
+ }
793
+ CharID++;
794
+ }
795
+ if ( Sample != NULL ) free( Sample );
796
+ return( Clusterer );
797
+
798
+ } /* SetUpForClustering */
799
+
800
+ /*---------------------------------------------------------------------------*/
801
+ void AddToNormProtosList(
802
+ LIST* NormProtoList,
803
+ LIST ProtoList,
804
+ char* CharName)
805
+ {
806
+ PROTOTYPE* Proto;
807
+ LABELEDLIST LabeledProtoList;
808
+
809
+ LabeledProtoList = NewLabeledList(CharName);
810
+ iterate(ProtoList)
811
+ {
812
+ Proto = (PROTOTYPE *) first_node (ProtoList);
813
+ LabeledProtoList->List = push(LabeledProtoList->List, Proto);
814
+ }
815
+ *NormProtoList = push(*NormProtoList, LabeledProtoList);
816
+ }
817
+
818
+ /*-------------------------------------------------------------------------*/
819
+ void WriteProtos(
820
+ FILE *File,
821
+ uinT16 N,
822
+ LIST ProtoList,
823
+ BOOL8 WriteSigProtos,
824
+ BOOL8 WriteInsigProtos)
825
+ {
826
+ PROTOTYPE *Proto;
827
+
828
+ // write prototypes
829
+ iterate(ProtoList)
830
+ {
831
+ Proto = (PROTOTYPE *) first_node ( ProtoList );
832
+ if (( Proto->Significant && WriteSigProtos ) ||
833
+ ( ! Proto->Significant && WriteInsigProtos ) )
834
+ WritePrototype( File, N, Proto );
835
+ }
836
+ } // WriteProtos
837
+
838
+ /*---------------------------------------------------------------------------*/
839
+ int NumberOfProtos(
840
+ LIST ProtoList,
841
+ BOOL8 CountSigProtos,
842
+ BOOL8 CountInsigProtos)
843
+ {
844
+ int N = 0;
845
+ PROTOTYPE *Proto;
846
+
847
+ iterate(ProtoList)
848
+ {
849
+ Proto = (PROTOTYPE *) first_node ( ProtoList );
850
+ if (( Proto->Significant && CountSigProtos ) ||
851
+ ( ! Proto->Significant && CountInsigProtos ) )
852
+ N++;
853
+ }
854
+ return(N);
855
+ }