tesseract_bin 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,1341 @@
1
+ /******************************************************************************
2
+ ** Filename: mfTraining.c
3
+ ** Purpose: Separates training pages into files for each character.
4
+ ** Strips from files only the features and there parameters of
5
+ the feature type mf.
6
+ ** Author: Dan Johnson
7
+ ** Revisment: Christy Russon
8
+ ** Environment: HPUX 6.5
9
+ ** Library: HPUX 6.5
10
+ ** History: Fri Aug 18 08:53:50 1989, DSJ, Created.
11
+ ** 5/25/90, DSJ, Adapted to multiple feature types.
12
+ ** Tuesday, May 17, 1998 Changes made to make feature specific and
13
+ ** simplify structures. First step in simplifying training process.
14
+ **
15
+ ** (c) Copyright Hewlett-Packard Company, 1988.
16
+ ** Licensed under the Apache License, Version 2.0 (the "License");
17
+ ** you may not use this file except in compliance with the License.
18
+ ** You may obtain a copy of the License at
19
+ ** http://www.apache.org/licenses/LICENSE-2.0
20
+ ** Unless required by applicable law or agreed to in writing, software
21
+ ** distributed under the License is distributed on an "AS IS" BASIS,
22
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23
+ ** See the License for the specific language governing permissions and
24
+ ** limitations under the License.
25
+ ******************************************************************************/
26
+ /**----------------------------------------------------------------------------
27
+ Include Files and Type Defines
28
+ ----------------------------------------------------------------------------**/
29
+ #include "oldlist.h"
30
+ #include "efio.h"
31
+ #include "emalloc.h"
32
+ #include "featdefs.h"
33
+ #include "tessopt.h"
34
+ #include "ocrfeatures.h"
35
+ #include "mf.h"
36
+ #include "general.h"
37
+ #include "clusttool.h"
38
+ #include "cluster.h"
39
+ #include "protos.h"
40
+ #include "minmax.h"
41
+ #include "debug.h"
42
+ #include "tprintf.h"
43
+ #include "const.h"
44
+ #include "mergenf.h"
45
+ #include "name2char.h"
46
+ #include "intproto.h"
47
+ #include "variables.h"
48
+ #include "freelist.h"
49
+ #include "efio.h"
50
+ #include "danerror.h"
51
+ #include "globals.h"
52
+
53
+ #include <string.h>
54
+ #include <stdio.h>
55
+ #define _USE_MATH_DEFINES
56
+ #include <math.h>
57
+ #ifdef WIN32
58
+ #ifndef M_PI
59
+ #define M_PI 3.14159265358979323846
60
+ #endif
61
+ #endif
62
+
63
+ #define MAXNAMESIZE 80
64
+ #define MAX_NUM_SAMPLES 10000
65
+ #define PROGRAM_FEATURE_TYPE "mf"
66
+ #define MINSD (1.0f / 128.0f)
67
+ #define MINSD_ANGLE (1.0f / 64.0f)
68
+
69
+ int row_number; /* cjn: fixes link problem */
70
+
71
+ typedef struct
72
+ {
73
+ char *Label;
74
+ int SampleCount;
75
+ LIST List;
76
+ }
77
+ LABELEDLISTNODE, *LABELEDLIST;
78
+
79
+ typedef struct
80
+ {
81
+ char* Label;
82
+ int NumMerged[MAX_NUM_PROTOS];
83
+ CLASS_TYPE Class;
84
+ }MERGE_CLASS_NODE;
85
+ typedef MERGE_CLASS_NODE* MERGE_CLASS;
86
+
87
+ #define round(x,frag)(floor(x/frag+.5)*frag)
88
+
89
+ /**----------------------------------------------------------------------------
90
+ Public Function Prototypes
91
+ ----------------------------------------------------------------------------**/
92
+ int main (
93
+ int argc,
94
+ char **argv);
95
+
96
+ /**----------------------------------------------------------------------------
97
+ Private Function Prototypes
98
+ ----------------------------------------------------------------------------**/
99
+ void ParseArguments(
100
+ int argc,
101
+ char **argv);
102
+
103
+ char *GetNextFilename ();
104
+
105
+ LIST ReadTrainingSamples (
106
+ FILE *File);
107
+
108
+ LABELEDLIST FindList (
109
+ LIST List,
110
+ char *Label);
111
+
112
+ MERGE_CLASS FindClass (
113
+ LIST List,
114
+ char *Label);
115
+
116
+ LABELEDLIST NewLabeledList (
117
+ char *Label);
118
+
119
+ MERGE_CLASS NewLabeledClass (
120
+ char *Label);
121
+
122
+ void WriteTrainingSamples (
123
+ char *Directory,
124
+ LIST CharList);
125
+
126
+ void WriteClusteredTrainingSamples (
127
+ char *Directory,
128
+ LIST ProtoList,
129
+ CLUSTERER *Clusterer,
130
+ LABELEDLIST CharSample);
131
+ /**/
132
+ void WriteMergedTrainingSamples(
133
+ char *Directory,
134
+ LIST ClassList);
135
+
136
+ void WriteMicrofeat(
137
+ char *Directory,
138
+ LIST ClassList);
139
+
140
+ void WriteProtos(
141
+ FILE* File,
142
+ MERGE_CLASS MergeClass);
143
+
144
+ void WriteConfigs(
145
+ FILE* File,
146
+ CLASS_TYPE Class);
147
+
148
+ void FreeTrainingSamples (
149
+ LIST CharList);
150
+
151
+ void FreeLabeledClassList (
152
+ LIST ClassList);
153
+
154
+ void FreeLabeledList (
155
+ LABELEDLIST LabeledList);
156
+
157
+ CLUSTERER *SetUpForClustering(
158
+ LABELEDLIST CharSample);
159
+ /*
160
+ PARAMDESC *ConvertToPARAMDESC(
161
+ PARAM_DESC* Param_Desc,
162
+ int N);
163
+ */
164
+ void MergeInsignificantProtos(LIST ProtoList, const char* label,
165
+ CLUSTERER *Clusterer, CLUSTERCONFIG *Config);
166
+
167
+ LIST RemoveInsignificantProtos(
168
+ LIST ProtoList,
169
+ BOOL8 KeepSigProtos,
170
+ BOOL8 KeepInsigProtos,
171
+ int N);
172
+
173
+ void CleanUpUnusedData(
174
+ LIST ProtoList);
175
+
176
+ void Normalize (
177
+ float *Values);
178
+
179
+ void SetUpForFloat2Int(
180
+ LIST LabeledClassList);
181
+
182
+ void WritePFFMTable(INT_TEMPLATES Templates, const char* filename);
183
+
184
+ //--------------Global Data Definitions and Declarations--------------
185
+ static char FontName[MAXNAMESIZE];
186
+ // globals used for parsing command line arguments
187
+ static char *Directory = NULL;
188
+ static int MaxNumSamples = MAX_NUM_SAMPLES;
189
+ static int Argc;
190
+ static char **Argv;
191
+
192
+ // globals used to control what information is saved in the output file
193
+ static BOOL8 ShowAllSamples = FALSE;
194
+ static BOOL8 ShowSignificantProtos = TRUE;
195
+ static BOOL8 ShowInsignificantProtos = FALSE;
196
+
197
+ // global variable to hold configuration parameters to control clustering
198
+ // -M 0.40 -B 0.05 -I 1.0 -C 1e-6.
199
+ static CLUSTERCONFIG Config =
200
+ { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 };
201
+
202
+ static FLOAT32 RoundingAccuracy = 0.0f;
203
+
204
+ // The unicharset used during mftraining
205
+ static UNICHARSET unicharset_mftraining;
206
+
207
+ const char* test_ch = "";
208
+
209
+ /*----------------------------------------------------------------------------
210
+ Public Code
211
+ -----------------------------------------------------------------------------*/
212
+ void DisplayProtoList(const char* ch, LIST protolist) {
213
+ void* window = c_create_window("Char samples", 50, 200,
214
+ 520, 520, -130.0, 130.0, -130.0, 130.0);
215
+ LIST proto = protolist;
216
+ iterate(proto) {
217
+ PROTOTYPE* prototype = reinterpret_cast<PROTOTYPE *>(first_node(proto));
218
+ if (prototype->Significant)
219
+ c_line_color_index(window, Green);
220
+ else if (prototype->NumSamples == 0)
221
+ c_line_color_index(window, Blue);
222
+ else if (prototype->Merged)
223
+ c_line_color_index(window, Magenta);
224
+ else
225
+ c_line_color_index(window, Red);
226
+ float x = CenterX(prototype->Mean);
227
+ float y = CenterY(prototype->Mean);
228
+ double angle = OrientationOf(prototype->Mean) * 2 * M_PI;
229
+ float dx = static_cast<float>(LengthOf(prototype->Mean) * cos(angle) / 2);
230
+ float dy = static_cast<float>(LengthOf(prototype->Mean) * sin(angle) / 2);
231
+ c_move(window, (x - dx) * 256, (y - dy) * 256);
232
+ c_draw(window, (x + dx) * 256, (y + dy) * 256);
233
+ if (prototype->Significant)
234
+ tprintf("Green proto at (%g,%g)+(%g,%g) %d samples\n",
235
+ x, y, dx, dy, prototype->NumSamples);
236
+ else if (prototype->NumSamples > 0 && !prototype->Merged)
237
+ tprintf("Red proto at (%g,%g)+(%g,%g) %d samples\n",
238
+ x, y, dx, dy, prototype->NumSamples);
239
+ }
240
+ c_make_current(window);
241
+ }
242
+
243
+ /*---------------------------------------------------------------------------*/
244
+ int main (int argc, char **argv) {
245
+ /*
246
+ ** Parameters:
247
+ ** argc number of command line arguments
248
+ ** argv array of command line arguments
249
+ ** Globals: none
250
+ ** Operation:
251
+ ** This program reads in a text file consisting of feature
252
+ ** samples from a training page in the following format:
253
+ **
254
+ ** FontName CharName NumberOfFeatureTypes(N)
255
+ ** FeatureTypeName1 NumberOfFeatures(M)
256
+ ** Feature1
257
+ ** ...
258
+ ** FeatureM
259
+ ** FeatureTypeName2 NumberOfFeatures(M)
260
+ ** Feature1
261
+ ** ...
262
+ ** FeatureM
263
+ ** ...
264
+ ** FeatureTypeNameN NumberOfFeatures(M)
265
+ ** Feature1
266
+ ** ...
267
+ ** FeatureM
268
+ ** FontName CharName ...
269
+ **
270
+ ** The result of this program is a binary inttemp file used by
271
+ ** the OCR engine.
272
+ ** Return: none
273
+ ** Exceptions: none
274
+ ** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
275
+ ** Mon May 18 1998, Christy Russson, Revistion started.
276
+ */
277
+ char *PageName;
278
+ FILE *TrainingPage;
279
+ FILE *OutFile;
280
+ LIST CharList;
281
+ CLUSTERER *Clusterer = NULL;
282
+ LIST ProtoList = NIL;
283
+ LABELEDLIST CharSample;
284
+ PROTOTYPE *Prototype;
285
+ LIST ClassList = NIL;
286
+ int Cid, Pid;
287
+ PROTO Proto;
288
+ PROTO_STRUCT DummyProto;
289
+ BIT_VECTOR Config2;
290
+ MERGE_CLASS MergeClass;
291
+ INT_TEMPLATES IntTemplates;
292
+ LIST pCharList, pProtoList;
293
+ char Filename[MAXNAMESIZE];
294
+
295
+ // Clean the unichar set
296
+ unicharset_mftraining.clear();
297
+ // Space character needed to represent NIL classification
298
+ unicharset_mftraining.unichar_insert(" ");
299
+
300
+ ParseArguments (argc, argv);
301
+ InitFastTrainerVars ();
302
+ InitSubfeatureVars ();
303
+ while ((PageName = GetNextFilename()) != NULL) {
304
+ printf ("Reading %s ...\n", PageName);
305
+ TrainingPage = Efopen (PageName, "r");
306
+ CharList = ReadTrainingSamples (TrainingPage);
307
+ fclose (TrainingPage);
308
+ //WriteTrainingSamples (Directory, CharList);
309
+ pCharList = CharList;
310
+ iterate(pCharList) {
311
+ //Cluster
312
+ CharSample = (LABELEDLIST) first_node (pCharList);
313
+ // printf ("\nClustering %s ...", CharSample->Label);
314
+ Clusterer = SetUpForClustering(CharSample);
315
+ Config.MagicSamples = CharSample->SampleCount;
316
+ ProtoList = ClusterSamples(Clusterer, &Config);
317
+ CleanUpUnusedData(ProtoList);
318
+
319
+ //Merge
320
+ MergeInsignificantProtos(ProtoList, CharSample->Label,
321
+ Clusterer, &Config);
322
+ if (strcmp(test_ch, CharSample->Label) == 0)
323
+ DisplayProtoList(test_ch, ProtoList);
324
+ ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos,
325
+ ShowInsignificantProtos,
326
+ Clusterer->SampleSize);
327
+ FreeClusterer(Clusterer);
328
+ MergeClass = FindClass (ClassList, CharSample->Label);
329
+ if (MergeClass == NULL) {
330
+ MergeClass = NewLabeledClass (CharSample->Label);
331
+ ClassList = push (ClassList, MergeClass);
332
+ }
333
+ Cid = AddConfigToClass(MergeClass->Class);
334
+ pProtoList = ProtoList;
335
+ iterate (pProtoList) {
336
+ Prototype = (PROTOTYPE *) first_node (pProtoList);
337
+
338
+ // see if proto can be approximated by existing proto
339
+ Pid = FindClosestExistingProto(MergeClass->Class,
340
+ MergeClass->NumMerged, Prototype);
341
+ if (Pid == NO_PROTO) {
342
+ Pid = AddProtoToClass (MergeClass->Class);
343
+ Proto = ProtoIn (MergeClass->Class, Pid);
344
+ MakeNewFromOld (Proto, Prototype);
345
+ MergeClass->NumMerged[Pid] = 1;
346
+ }
347
+ else {
348
+ MakeNewFromOld (&DummyProto, Prototype);
349
+ ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto,
350
+ (FLOAT32) MergeClass->NumMerged[Pid], 1.0,
351
+ ProtoIn (MergeClass->Class, Pid));
352
+ MergeClass->NumMerged[Pid] ++;
353
+ }
354
+ Config2 = MergeClass->Class->Configurations[Cid];
355
+ AddProtoToConfig (Pid, Config2);
356
+ }
357
+ FreeProtoList (&ProtoList);
358
+ }
359
+ FreeTrainingSamples (CharList);
360
+ }
361
+ //WriteMergedTrainingSamples(Directory,ClassList);
362
+ WriteMicrofeat(Directory, ClassList);
363
+ InitIntProtoVars ();
364
+ InitPrototypes ();
365
+ SetUpForFloat2Int(ClassList);
366
+ IntTemplates = CreateIntTemplates(TrainingData, unicharset_mftraining);
367
+ strcpy (Filename, "");
368
+ if (Directory != NULL) {
369
+ strcat (Filename, Directory);
370
+ strcat (Filename, "/");
371
+ }
372
+ strcat (Filename, "inttemp");
373
+ #ifdef __UNIX__
374
+ OutFile = Efopen (Filename, "w");
375
+ #else
376
+ OutFile = Efopen (Filename, "wb");
377
+ #endif
378
+ WriteIntTemplates(OutFile, IntTemplates, unicharset_mftraining);
379
+ fclose (OutFile);
380
+ strcpy (Filename, "");
381
+ if (Directory != NULL) {
382
+ strcat (Filename, Directory);
383
+ strcat (Filename, "/");
384
+ }
385
+ strcat (Filename, "pffmtable");
386
+ // Now create pffmtable.
387
+ WritePFFMTable(IntTemplates, Filename);
388
+ printf ("Done!\n"); /**/
389
+ FreeLabeledClassList (ClassList);
390
+ return 0;
391
+ } /* main */
392
+
393
+
394
+ /**----------------------------------------------------------------------------
395
+ Private Code
396
+ ----------------------------------------------------------------------------**/
397
+ /*---------------------------------------------------------------------------*/
398
+ void ParseArguments(
399
+ int argc,
400
+ char **argv)
401
+
402
+ /*
403
+ ** Parameters:
404
+ ** argc number of command line arguments to parse
405
+ ** argv command line arguments
406
+ ** Globals:
407
+ ** ShowAllSamples flag controlling samples display
408
+ ** ShowSignificantProtos flag controlling proto display
409
+ ** ShowInsignificantProtos flag controlling proto display
410
+ ** Config current clustering parameters
411
+ ** tessoptarg, tessoptind defined by tessopt sys call
412
+ ** Argc, Argv global copies of argc and argv
413
+ ** Operation:
414
+ ** This routine parses the command line arguments that were
415
+ ** passed to the program. The legal arguments are:
416
+ ** -d "turn off display of samples"
417
+ ** -p "turn off significant protos"
418
+ ** -n "turn off insignificant proto"
419
+ ** -S [ spherical | elliptical | mixed | automatic ]
420
+ ** -M MinSamples "min samples per prototype (%)"
421
+ ** -B MaxIllegal "max illegal chars per cluster (%)"
422
+ ** -I Independence "0 to 1"
423
+ ** -C Confidence "1e-200 to 1.0"
424
+ ** -D Directory
425
+ ** -N MaxNumSamples
426
+ ** -R RoundingAccuracy
427
+ ** Return: none
428
+ ** Exceptions: Illegal options terminate the program.
429
+ ** History: 7/24/89, DSJ, Created.
430
+ */
431
+
432
+ {
433
+ int Option;
434
+ int ParametersRead;
435
+ BOOL8 Error;
436
+
437
+ Error = FALSE;
438
+ Argc = argc;
439
+ Argv = argv;
440
+ while (( Option = tessopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )
441
+ {
442
+ switch ( Option )
443
+ {
444
+ case 'n':
445
+ ShowInsignificantProtos = FALSE;
446
+ break;
447
+ case 'p':
448
+ ShowSignificantProtos = FALSE;
449
+ break;
450
+ case 'd':
451
+ ShowAllSamples = FALSE;
452
+ break;
453
+ case 'C':
454
+ ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
455
+ if ( ParametersRead != 1 ) Error = TRUE;
456
+ else if ( Config.Confidence > 1 ) Config.Confidence = 1;
457
+ else if ( Config.Confidence < 0 ) Config.Confidence = 0;
458
+ break;
459
+ case 'I':
460
+ ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
461
+ if ( ParametersRead != 1 ) Error = TRUE;
462
+ else if ( Config.Independence > 1 ) Config.Independence = 1;
463
+ else if ( Config.Independence < 0 ) Config.Independence = 0;
464
+ break;
465
+ case 'M':
466
+ ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
467
+ if ( ParametersRead != 1 ) Error = TRUE;
468
+ else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
469
+ else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
470
+ break;
471
+ case 'B':
472
+ ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
473
+ if ( ParametersRead != 1 ) Error = TRUE;
474
+ else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
475
+ else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
476
+ break;
477
+ case 'R':
478
+ ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
479
+ if ( ParametersRead != 1 ) Error = TRUE;
480
+ else if ( RoundingAccuracy > 0.01f ) RoundingAccuracy = 0.01f;
481
+ else if ( RoundingAccuracy < 0.0f ) RoundingAccuracy = 0.0f;
482
+ break;
483
+ case 'S':
484
+ switch ( tessoptarg[0] )
485
+ {
486
+ case 's': Config.ProtoStyle = spherical; break;
487
+ case 'e': Config.ProtoStyle = elliptical; break;
488
+ case 'm': Config.ProtoStyle = mixed; break;
489
+ case 'a': Config.ProtoStyle = automatic; break;
490
+ default: Error = TRUE;
491
+ }
492
+ break;
493
+ case 'D':
494
+ Directory = tessoptarg;
495
+ break;
496
+ case 'N':
497
+ if (sscanf (tessoptarg, "%d", &MaxNumSamples) != 1 ||
498
+ MaxNumSamples <= 0)
499
+ Error = TRUE;
500
+ break;
501
+ case '?':
502
+ Error = TRUE;
503
+ break;
504
+ }
505
+ if ( Error )
506
+ {
507
+ fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] );
508
+ fprintf (stderr, "\t[-S ProtoStyle]\n");
509
+ fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" );
510
+ fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n");
511
+ exit (2);
512
+ }
513
+ }
514
+ } // ParseArguments
515
+
516
+ /*---------------------------------------------------------------------------*/
517
+ char *GetNextFilename ()
518
+ /*
519
+ ** Parameters: none
520
+ ** Globals:
521
+ ** tessoptind defined by tessopt sys call
522
+ ** Argc, Argv global copies of argc and argv
523
+ ** Operation:
524
+ ** This routine returns the next command line argument. If
525
+ ** there are no remaining command line arguments, it returns
526
+ ** NULL. This routine should only be called after all option
527
+ ** arguments have been parsed and removed with ParseArguments.
528
+ ** Return: Next command line argument or NULL.
529
+ ** Exceptions: none
530
+ ** History: Fri Aug 18 09:34:12 1989, DSJ, Created.
531
+ */
532
+
533
+ {
534
+ if (tessoptind < Argc)
535
+ return (Argv [tessoptind++]);
536
+ else
537
+ return (NULL);
538
+
539
+ } /* GetNextFilename */
540
+
541
+ /*---------------------------------------------------------------------------*/
542
+ LIST ReadTrainingSamples (
543
+ FILE *File)
544
+
545
+ /*
546
+ ** Parameters:
547
+ ** File open text file to read samples from
548
+ ** Globals: none
549
+ ** Operation:
550
+ ** This routine reads training samples from a file and
551
+ ** places them into a data structure which organizes the
552
+ ** samples by FontName and CharName. It then returns this
553
+ ** data structure.
554
+ ** Return: none
555
+ ** Exceptions: none
556
+ ** History: Fri Aug 18 13:11:39 1989, DSJ, Created.
557
+ ** Tue May 17 1998 simplifications to structure, illiminated
558
+ ** font, and feature specification levels of structure.
559
+ */
560
+
561
+ {
562
+ char unichar[UNICHAR_LEN + 1];
563
+ LABELEDLIST CharSample;
564
+ FEATURE_SET FeatureSamples;
565
+ LIST TrainingSamples = NIL;
566
+ CHAR_DESC CharDesc;
567
+ int Type, i;
568
+
569
+ while (fscanf (File, "%s %s", FontName, unichar) == 2) {
570
+ if (!unicharset_mftraining.contains_unichar(unichar)) {
571
+ unicharset_mftraining.unichar_insert(unichar);
572
+ if (unicharset_mftraining.size() > MAX_NUM_CLASSES) {
573
+ cprintf("Error: Size of unicharset of mftraining is "
574
+ "greater than MAX_NUM_CLASSES\n");
575
+ exit(1);
576
+ }
577
+ }
578
+ CharSample = FindList (TrainingSamples, unichar);
579
+ if (CharSample == NULL) {
580
+ CharSample = NewLabeledList (unichar);
581
+ TrainingSamples = push (TrainingSamples, CharSample);
582
+ }
583
+ CharDesc = ReadCharDescription (File);
584
+ Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
585
+ FeatureSamples = CharDesc->FeatureSets[Type];
586
+ for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
587
+ FEATURE f = FeatureSamples->Features[feature];
588
+ for (int dim =0; dim < f->Type->NumParams; ++dim)
589
+ f->Params[dim] += dim == MFDirection ?
590
+ UniformRandomNumber(-MINSD_ANGLE, MINSD_ANGLE) :
591
+ UniformRandomNumber(-MINSD, MINSD);
592
+ }
593
+ CharSample->List = push (CharSample->List, FeatureSamples);
594
+ CharSample->SampleCount++;
595
+ for (i = 0; i < CharDesc->NumFeatureSets; i++)
596
+ if (Type != i)
597
+ FreeFeatureSet(CharDesc->FeatureSets[i]);
598
+ free (CharDesc);
599
+ }
600
+ return (TrainingSamples);
601
+
602
+ } /* ReadTrainingSamples */
603
+
604
+ /*---------------------------------------------------------------------------*/
605
+ LABELEDLIST FindList (
606
+ LIST List,
607
+ char *Label)
608
+
609
+ /*
610
+ ** Parameters:
611
+ ** List list to search
612
+ ** Label label to search for
613
+ ** Globals: none
614
+ ** Operation:
615
+ ** This routine searches thru a list of labeled lists to find
616
+ ** a list with the specified label. If a matching labeled list
617
+ ** cannot be found, NULL is returned.
618
+ ** Return: Labeled list with the specified Label or NULL.
619
+ ** Exceptions: none
620
+ ** History: Fri Aug 18 15:57:41 1989, DSJ, Created.
621
+ */
622
+
623
+ {
624
+ LABELEDLIST LabeledList;
625
+
626
+ iterate (List)
627
+ {
628
+ LabeledList = (LABELEDLIST) first_node (List);
629
+ if (strcmp (LabeledList->Label, Label) == 0)
630
+ return (LabeledList);
631
+ }
632
+ return (NULL);
633
+
634
+ } /* FindList */
635
+
636
+ /*----------------------------------------------------------------------------*/
637
+ MERGE_CLASS FindClass (
638
+ LIST List,
639
+ char *Label)
640
+ {
641
+ MERGE_CLASS MergeClass;
642
+
643
+ iterate (List)
644
+ {
645
+ MergeClass = (MERGE_CLASS) first_node (List);
646
+ if (strcmp (MergeClass->Label, Label) == 0)
647
+ return (MergeClass);
648
+ }
649
+ return (NULL);
650
+
651
+ } /* FindClass */
652
+
653
+ /*---------------------------------------------------------------------------*/
654
+ LABELEDLIST NewLabeledList (
655
+ char *Label)
656
+
657
+ /*
658
+ ** Parameters:
659
+ ** Label label for new list
660
+ ** Globals: none
661
+ ** Operation:
662
+ ** This routine allocates a new, empty labeled list and gives
663
+ ** it the specified label.
664
+ ** Return: New, empty labeled list.
665
+ ** Exceptions: none
666
+ ** History: Fri Aug 18 16:08:46 1989, DSJ, Created.
667
+ */
668
+
669
+ {
670
+ LABELEDLIST LabeledList;
671
+
672
+ LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
673
+ LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
674
+ strcpy (LabeledList->Label, Label);
675
+ LabeledList->List = NIL;
676
+ LabeledList->SampleCount = 0;
677
+ return (LabeledList);
678
+
679
+ } /* NewLabeledList */
680
+
681
+ /*---------------------------------------------------------------------------*/
682
+ MERGE_CLASS NewLabeledClass (
683
+ char *Label)
684
+ {
685
+ MERGE_CLASS MergeClass;
686
+
687
+ MergeClass = (MERGE_CLASS) Emalloc (sizeof (MERGE_CLASS_NODE));
688
+ MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
689
+ strcpy (MergeClass->Label, Label);
690
+ MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
691
+ return (MergeClass);
692
+
693
+ } /* NewLabeledClass */
694
+
695
+ /*---------------------------------------------------------------------------*/
696
+ void WriteTrainingSamples (
697
+ char *Directory,
698
+ LIST CharList)
699
+
700
+ /*
701
+ ** Parameters:
702
+ ** Directory directory to place sample files into
703
+ ** FontList list of fonts used in the training samples
704
+ ** Globals:
705
+ ** MaxNumSamples max number of samples per class to write
706
+ ** Operation:
707
+ ** This routine writes the specified samples into files which
708
+ ** are organized according to the font name and character name
709
+ ** of the samples.
710
+ ** Return: none
711
+ ** Exceptions: none
712
+ ** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
713
+ */
714
+
715
+ {
716
+ LABELEDLIST CharSample;
717
+ FEATURE_SET FeatureSet;
718
+ LIST FeatureList;
719
+ FILE *File;
720
+ char Filename[MAXNAMESIZE];
721
+ int NumSamples;
722
+
723
+ iterate (CharList) // iterate thru all of the fonts
724
+ {
725
+ CharSample = (LABELEDLIST) first_node (CharList);
726
+
727
+ // construct the full pathname for the current samples file
728
+ strcpy (Filename, "");
729
+ if (Directory != NULL)
730
+ {
731
+ strcat (Filename, Directory);
732
+ strcat (Filename, "/");
733
+ }
734
+ strcat (Filename, FontName);
735
+ strcat (Filename, "/");
736
+ strcat (Filename, CharSample->Label);
737
+ strcat (Filename, ".");
738
+ strcat (Filename, PROGRAM_FEATURE_TYPE);
739
+ printf ("\nWriting %s ...", Filename);
740
+
741
+ /* if file does not exist, create a new one with an appropriate
742
+ header; otherwise append samples to the existing file */
743
+ File = fopen (Filename, "r");
744
+ if (File == NULL)
745
+ {
746
+ File = Efopen (Filename, "w");
747
+ WriteOldParamDesc
748
+ (File, FeatureDefs.FeatureDesc[ShortNameToFeatureType (PROGRAM_FEATURE_TYPE)]);
749
+ }
750
+ else
751
+ {
752
+ fclose (File);
753
+ File = Efopen (Filename, "a");
754
+ }
755
+
756
+ // append samples onto the file
757
+ FeatureList = CharSample->List;
758
+ NumSamples = 0;
759
+ iterate (FeatureList)
760
+ {
761
+ if (NumSamples >= MaxNumSamples) break;
762
+
763
+ FeatureSet = (FEATURE_SET) first_node (FeatureList);
764
+ WriteFeatureSet (File, FeatureSet);
765
+ NumSamples++;
766
+ }
767
+ fclose (File);
768
+ }
769
+ } /* WriteTrainingSamples */
770
+
771
+
772
+ /*----------------------------------------------------------------------------*/
773
+ void WriteClusteredTrainingSamples (
774
+ char *Directory,
775
+ LIST ProtoList,
776
+ CLUSTERER *Clusterer,
777
+ LABELEDLIST CharSample)
778
+
779
+ /*
780
+ ** Parameters:
781
+ ** Directory directory to place sample files into
782
+ ** Globals:
783
+ ** MaxNumSamples max number of samples per class to write
784
+ ** Operation:
785
+ ** This routine writes the specified samples into files which
786
+ ** are organized according to the font name and character name
787
+ ** of the samples.
788
+ ** Return: none
789
+ ** Exceptions: none
790
+ ** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
791
+ */
792
+
793
+ {
794
+ FILE *File;
795
+ char Filename[MAXNAMESIZE];
796
+
797
+ strcpy (Filename, "");
798
+ if (Directory != NULL)
799
+ {
800
+ strcat (Filename, Directory);
801
+ strcat (Filename, "/");
802
+ }
803
+ strcat (Filename, FontName);
804
+ strcat (Filename, "/");
805
+ strcat (Filename, CharSample->Label);
806
+ strcat (Filename, ".");
807
+ strcat (Filename, PROGRAM_FEATURE_TYPE);
808
+ strcat (Filename, ".p");
809
+ printf ("\nWriting %s ...", Filename);
810
+ File = Efopen (Filename, "w");
811
+ WriteProtoList(File, Clusterer->SampleSize, Clusterer->ParamDesc,
812
+ ProtoList, ShowSignificantProtos, ShowInsignificantProtos);
813
+ fclose (File);
814
+
815
+ } /* WriteClusteredTrainingSamples */
816
+
817
+ /*---------------------------------------------------------------------------*/
818
+ void WriteMergedTrainingSamples(
819
+ char *Directory,
820
+ LIST ClassList)
821
+
822
+ {
823
+ FILE *File;
824
+ char Filename[MAXNAMESIZE];
825
+ MERGE_CLASS MergeClass;
826
+
827
+ iterate (ClassList)
828
+ {
829
+ MergeClass = (MERGE_CLASS) first_node (ClassList);
830
+ strcpy (Filename, "");
831
+ if (Directory != NULL)
832
+ {
833
+ strcat (Filename, Directory);
834
+ strcat (Filename, "/");
835
+ }
836
+ strcat (Filename, "Merged/");
837
+ strcat (Filename, MergeClass->Label);
838
+ strcat (Filename, PROTO_SUFFIX);
839
+ printf ("\nWriting Merged %s ...", Filename);
840
+ File = Efopen (Filename, "w");
841
+ WriteOldProtoFile (File, MergeClass->Class);
842
+ fclose (File);
843
+
844
+ strcpy (Filename, "");
845
+ if (Directory != NULL)
846
+ {
847
+ strcat (Filename, Directory);
848
+ strcat (Filename, "/");
849
+ }
850
+ strcat (Filename, "Merged/");
851
+ strcat (Filename, MergeClass->Label);
852
+ strcat (Filename, CONFIG_SUFFIX);
853
+ printf ("\nWriting Merged %s ...", Filename);
854
+ File = Efopen (Filename, "w");
855
+ WriteOldConfigFile (File, MergeClass->Class);
856
+ fclose (File);
857
+ }
858
+
859
+ } // WriteMergedTrainingSamples
860
+
861
+ /*--------------------------------------------------------------------------*/
862
+ void WriteMicrofeat(
863
+ char *Directory,
864
+ LIST ClassList)
865
+
866
+ {
867
+ FILE *File;
868
+ char Filename[MAXNAMESIZE];
869
+ MERGE_CLASS MergeClass;
870
+
871
+ strcpy (Filename, "");
872
+ if (Directory != NULL)
873
+ {
874
+ strcat (Filename, Directory);
875
+ strcat (Filename, "/");
876
+ }
877
+ strcat (Filename, "Microfeat");
878
+ File = Efopen (Filename, "w");
879
+ printf ("\nWriting Merged %s ...", Filename);
880
+ iterate(ClassList)
881
+ {
882
+ MergeClass = (MERGE_CLASS) first_node (ClassList);
883
+ WriteProtos(File, MergeClass);
884
+ WriteConfigs(File, MergeClass->Class);
885
+ }
886
+ fclose (File);
887
+ } // WriteMicrofeat
888
+
889
+ /*---------------------------------------------------------------------------*/
890
+ void WriteProtos(
891
+ FILE* File,
892
+ MERGE_CLASS MergeClass)
893
+ {
894
+ float Values[3];
895
+ int i;
896
+ PROTO Proto;
897
+
898
+ fprintf(File, "%s\n", MergeClass->Label);
899
+ fprintf(File, "%d\n", MergeClass->Class->NumProtos);
900
+ for(i=0; i < (MergeClass->Class)->NumProtos; i++)
901
+ {
902
+ Proto = ProtoIn(MergeClass->Class,i);
903
+ fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", Proto->X, Proto->Y,
904
+ Proto->Length, Proto->Angle);
905
+ Values[0] = Proto->X;
906
+ Values[1] = Proto->Y;
907
+ Values[2] = Proto->Angle;
908
+ Normalize(Values);
909
+ fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]);
910
+ }
911
+ } // WriteProtos
912
+
913
+ /*----------------------------------------------------------------------------*/
914
+ void WriteConfigs(
915
+ FILE* File,
916
+ CLASS_TYPE Class)
917
+ {
918
+ BIT_VECTOR Config;
919
+ int i, j, WordsPerConfig;
920
+
921
+ WordsPerConfig = WordsInVectorOfSize(Class->NumProtos);
922
+ fprintf(File, "%d %d\n", Class->NumConfigs, WordsPerConfig);
923
+ for(i=0; i < Class->NumConfigs; i++)
924
+ {
925
+ Config = Class->Configurations[i];
926
+ for(j=0; j < WordsPerConfig; j++)
927
+ fprintf(File, "%08x ", Config[j]);
928
+ fprintf(File, "\n");
929
+ }
930
+ fprintf(File, "\n");
931
+ } // WriteConfigs
932
+
933
+ /*---------------------------------------------------------------------------*/
934
+ void FreeTrainingSamples (
935
+ LIST CharList)
936
+
937
+ /*
938
+ ** Parameters:
939
+ ** FontList list of all fonts in document
940
+ ** Globals: none
941
+ ** Operation:
942
+ ** This routine deallocates all of the space allocated to
943
+ ** the specified list of training samples.
944
+ ** Return: none
945
+ ** Exceptions: none
946
+ ** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
947
+ */
948
+
949
+ {
950
+ LABELEDLIST CharSample;
951
+ FEATURE_SET FeatureSet;
952
+ LIST FeatureList;
953
+
954
+
955
+ // printf ("FreeTrainingSamples...\n");
956
+ iterate (CharList) /* iterate thru all of the fonts */
957
+ {
958
+ CharSample = (LABELEDLIST) first_node (CharList);
959
+ FeatureList = CharSample->List;
960
+ iterate (FeatureList) /* iterate thru all of the classes */
961
+ {
962
+ FeatureSet = (FEATURE_SET) first_node (FeatureList);
963
+ FreeFeatureSet (FeatureSet);
964
+ }
965
+ FreeLabeledList (CharSample);
966
+ }
967
+ destroy (CharList);
968
+
969
+ } /* FreeTrainingSamples */
970
+
971
+ /*-----------------------------------------------------------------------------*/
972
+ void FreeLabeledClassList (
973
+ LIST ClassList)
974
+
975
+ /*
976
+ ** Parameters:
977
+ ** FontList list of all fonts in document
978
+ ** Globals: none
979
+ ** Operation:
980
+ ** This routine deallocates all of the space allocated to
981
+ ** the specified list of training samples.
982
+ ** Return: none
983
+ ** Exceptions: none
984
+ ** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
985
+ */
986
+
987
+ {
988
+ MERGE_CLASS MergeClass;
989
+
990
+ iterate (ClassList) /* iterate thru all of the fonts */
991
+ {
992
+ MergeClass = (MERGE_CLASS) first_node (ClassList);
993
+ free (MergeClass->Label);
994
+ FreeClass(MergeClass->Class);
995
+ free (MergeClass);
996
+ }
997
+ destroy (ClassList);
998
+
999
+ } /* FreeLabeledClassList */
1000
+
1001
+ /*---------------------------------------------------------------------------*/
1002
+ void FreeLabeledList (
1003
+ LABELEDLIST LabeledList)
1004
+
1005
+ /*
1006
+ ** Parameters:
1007
+ ** LabeledList labeled list to be freed
1008
+ ** Globals: none
1009
+ ** Operation:
1010
+ ** This routine deallocates all of the memory consumed by
1011
+ ** a labeled list. It does not free any memory which may be
1012
+ ** consumed by the items in the list.
1013
+ ** Return: none
1014
+ ** Exceptions: none
1015
+ ** History: Fri Aug 18 17:52:45 1989, DSJ, Created.
1016
+ */
1017
+
1018
+ {
1019
+ destroy (LabeledList->List);
1020
+ free (LabeledList->Label);
1021
+ free (LabeledList);
1022
+
1023
+ } /* FreeLabeledList */
1024
+
1025
+ /*---------------------------------------------------------------------------*/
1026
+ CLUSTERER *SetUpForClustering(
1027
+ LABELEDLIST CharSample)
1028
+
1029
+ /*
1030
+ ** Parameters:
1031
+ ** CharSample: LABELEDLIST that holds all the feature information for a
1032
+ ** given character.
1033
+ ** Globals:
1034
+ ** None
1035
+ ** Operation:
1036
+ ** This routine reads samples from a LABELEDLIST and enters
1037
+ ** those samples into a clusterer data structure. This
1038
+ ** data structure is then returned to the caller.
1039
+ ** Return:
1040
+ ** Pointer to new clusterer data structure.
1041
+ ** Exceptions:
1042
+ ** None
1043
+ ** History:
1044
+ ** 8/16/89, DSJ, Created.
1045
+ */
1046
+
1047
+ {
1048
+ uinT16 N;
1049
+ int i, j;
1050
+ FLOAT32 *Sample = NULL;
1051
+ CLUSTERER *Clusterer;
1052
+ inT32 CharID;
1053
+ LIST FeatureList = NULL;
1054
+ FEATURE_SET FeatureSet = NULL;
1055
+ FEATURE_DESC FeatureDesc = NULL;
1056
+ // PARAM_DESC* ParamDesc;
1057
+
1058
+ FeatureDesc = FeatureDefs.FeatureDesc[ShortNameToFeatureType(PROGRAM_FEATURE_TYPE)];
1059
+ N = FeatureDesc->NumParams;
1060
+ // ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N);
1061
+ Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);
1062
+ // free(ParamDesc);
1063
+
1064
+ FeatureList = CharSample->List;
1065
+ CharID = 0;
1066
+ iterate(FeatureList)
1067
+ {
1068
+ if (CharID >= MaxNumSamples) break;
1069
+
1070
+ FeatureSet = (FEATURE_SET) first_node (FeatureList);
1071
+ for (i=0; i < FeatureSet->MaxNumFeatures; i++)
1072
+ {
1073
+ if (Sample == NULL)
1074
+ Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1075
+ for (j=0; j < N; j++)
1076
+ if (RoundingAccuracy != 0.0f)
1077
+ Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);
1078
+ else
1079
+ Sample[j] = FeatureSet->Features[i]->Params[j];
1080
+ MakeSample (Clusterer, Sample, CharID);
1081
+ }
1082
+ CharID++;
1083
+ }
1084
+ if ( Sample != NULL ) free( Sample );
1085
+ return( Clusterer );
1086
+
1087
+ } /* SetUpForClustering */
1088
+
1089
+ /*------------------------------------------------------------------------*/
1090
+ void MergeInsignificantProtos(LIST ProtoList, const char* label,
1091
+ CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
1092
+ PROTOTYPE *Prototype;
1093
+ bool debug = strcmp(test_ch, label) == 0;
1094
+
1095
+ LIST pProtoList = ProtoList;
1096
+ iterate(pProtoList) {
1097
+ Prototype = (PROTOTYPE *) first_node (pProtoList);
1098
+ if (Prototype->Significant || Prototype->Merged)
1099
+ continue;
1100
+ FLOAT32 best_dist = 0.125;
1101
+ PROTOTYPE* best_match = NULL;
1102
+ // Find the nearest alive prototype.
1103
+ LIST list_it = ProtoList;
1104
+ iterate(list_it) {
1105
+ PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);
1106
+ if (test_p != Prototype && !test_p->Merged) {
1107
+ FLOAT32 dist = ComputeDistance(Clusterer->SampleSize,
1108
+ Clusterer->ParamDesc,
1109
+ Prototype->Mean, test_p->Mean);
1110
+ if (dist < best_dist) {
1111
+ best_match = test_p;
1112
+ best_dist = dist;
1113
+ }
1114
+ }
1115
+ }
1116
+ if (best_match != NULL && !best_match->Significant) {
1117
+ if (debug)
1118
+ tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
1119
+ best_match->NumSamples, Prototype->NumSamples,
1120
+ best_match->Mean[0], best_match->Mean[1],
1121
+ Prototype->Mean[0], Prototype->Mean[1]);
1122
+ best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
1123
+ Clusterer->ParamDesc,
1124
+ best_match->NumSamples,
1125
+ Prototype->NumSamples,
1126
+ best_match->Mean,
1127
+ best_match->Mean, Prototype->Mean);
1128
+ Prototype->NumSamples = 0;
1129
+ Prototype->Merged = 1;
1130
+ } else if (best_match != NULL) {
1131
+ if (debug)
1132
+ tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
1133
+ Prototype->Mean[0], Prototype->Mean[1],
1134
+ best_match->Mean[0], best_match->Mean[1]);
1135
+ Prototype->Merged = 1;
1136
+ }
1137
+ }
1138
+ // Mark significant those that now have enough samples.
1139
+ int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar);
1140
+ pProtoList = ProtoList;
1141
+ iterate(pProtoList) {
1142
+ Prototype = (PROTOTYPE *) first_node (pProtoList);
1143
+ // Process insignificant protos that do not match a green one
1144
+ if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
1145
+ !Prototype->Merged) {
1146
+ if (debug)
1147
+ tprintf("Red proto at %g,%g becoming green\n",
1148
+ Prototype->Mean[0], Prototype->Mean[1]);
1149
+ Prototype->Significant = true;
1150
+ }
1151
+ }
1152
+ } /* MergeInsignificantProtos */
1153
+
1154
+ /*------------------------------------------------------------------------*/
1155
+ LIST RemoveInsignificantProtos(
1156
+ LIST ProtoList,
1157
+ BOOL8 KeepSigProtos,
1158
+ BOOL8 KeepInsigProtos,
1159
+ int N)
1160
+
1161
+ {
1162
+ LIST NewProtoList = NIL;
1163
+ LIST pProtoList;
1164
+ PROTOTYPE* Proto;
1165
+ PROTOTYPE* NewProto;
1166
+ int i;
1167
+
1168
+ pProtoList = ProtoList;
1169
+ iterate(pProtoList)
1170
+ {
1171
+ Proto = (PROTOTYPE *) first_node (pProtoList);
1172
+ if ((Proto->Significant && KeepSigProtos) ||
1173
+ (!Proto->Significant && KeepInsigProtos))
1174
+ {
1175
+ NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
1176
+
1177
+ NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1178
+ NewProto->Significant = Proto->Significant;
1179
+ NewProto->Style = Proto->Style;
1180
+ NewProto->NumSamples = Proto->NumSamples;
1181
+ NewProto->Cluster = NULL;
1182
+ NewProto->Distrib = NULL;
1183
+
1184
+ for (i=0; i < N; i++)
1185
+ NewProto->Mean[i] = Proto->Mean[i];
1186
+ if (Proto->Variance.Elliptical != NULL)
1187
+ {
1188
+ NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1189
+ for (i=0; i < N; i++)
1190
+ NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
1191
+ }
1192
+ else
1193
+ NewProto->Variance.Elliptical = NULL;
1194
+ //---------------------------------------------
1195
+ if (Proto->Magnitude.Elliptical != NULL)
1196
+ {
1197
+ NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1198
+ for (i=0; i < N; i++)
1199
+ NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
1200
+ }
1201
+ else
1202
+ NewProto->Magnitude.Elliptical = NULL;
1203
+ //------------------------------------------------
1204
+ if (Proto->Weight.Elliptical != NULL)
1205
+ {
1206
+ NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1207
+ for (i=0; i < N; i++)
1208
+ NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
1209
+ }
1210
+ else
1211
+ NewProto->Weight.Elliptical = NULL;
1212
+
1213
+ NewProto->TotalMagnitude = Proto->TotalMagnitude;
1214
+ NewProto->LogMagnitude = Proto->LogMagnitude;
1215
+ NewProtoList = push_last(NewProtoList, NewProto);
1216
+ }
1217
+ }
1218
+ //FreeProtoList (ProtoList);
1219
+ return (NewProtoList);
1220
+ } /* RemoveInsignificantProtos */
1221
+ /*-----------------------------------------------------------------------------*/
1222
+ void CleanUpUnusedData(
1223
+ LIST ProtoList)
1224
+ {
1225
+ PROTOTYPE* Prototype;
1226
+
1227
+ iterate(ProtoList)
1228
+ {
1229
+ Prototype = (PROTOTYPE *) first_node (ProtoList);
1230
+ if(Prototype->Variance.Elliptical != NULL)
1231
+ {
1232
+ memfree(Prototype->Variance.Elliptical);
1233
+ Prototype->Variance.Elliptical = NULL;
1234
+ }
1235
+ if(Prototype->Magnitude.Elliptical != NULL)
1236
+ {
1237
+ memfree(Prototype->Magnitude.Elliptical);
1238
+ Prototype->Magnitude.Elliptical = NULL;
1239
+ }
1240
+ if(Prototype->Weight.Elliptical != NULL)
1241
+ {
1242
+ memfree(Prototype->Weight.Elliptical);
1243
+ Prototype->Weight.Elliptical = NULL;
1244
+ }
1245
+ }
1246
+ }
1247
+
1248
+ /*--------------------------------------------------------------------------*/
1249
+ void Normalize (
1250
+ float *Values)
1251
+ {
1252
+ register float Slope;
1253
+ register float Intercept;
1254
+ register float Normalizer;
1255
+
1256
+ Slope = tan (Values [2] * 2 * PI);
1257
+ Intercept = Values [1] - Slope * Values [0];
1258
+ Normalizer = 1 / sqrt (Slope * Slope + 1.0);
1259
+
1260
+ Values [0] = Slope * Normalizer;
1261
+ Values [1] = - Normalizer;
1262
+ Values [2] = Intercept * Normalizer;
1263
+ } // Normalize
1264
+
1265
+ /** SetUpForFloat2Int **************************************************/
1266
+ void SetUpForFloat2Int(
1267
+ LIST LabeledClassList)
1268
+ {
1269
+ MERGE_CLASS MergeClass;
1270
+ CLASS_TYPE Class;
1271
+ int NumProtos;
1272
+ int NumConfigs;
1273
+ int NumWords;
1274
+ int i, j;
1275
+ float Values[3];
1276
+ PROTO NewProto;
1277
+ PROTO OldProto;
1278
+ BIT_VECTOR NewConfig;
1279
+ BIT_VECTOR OldConfig;
1280
+
1281
+ // printf("Float2Int ...\n");
1282
+
1283
+ iterate(LabeledClassList)
1284
+ {
1285
+ MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
1286
+ Class = &TrainingData[unicharset_mftraining.unichar_to_id(
1287
+ MergeClass->Label)];
1288
+ NumProtos = (MergeClass->Class)->NumProtos;
1289
+ NumConfigs = MergeClass->Class->NumConfigs;
1290
+
1291
+ Class->NumProtos = NumProtos;
1292
+ Class->MaxNumProtos = NumProtos;
1293
+ Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
1294
+ for(i=0; i < NumProtos; i++)
1295
+ {
1296
+ NewProto = ProtoIn(Class, i);
1297
+ OldProto = ProtoIn(MergeClass->Class, i);
1298
+ Values[0] = OldProto->X;
1299
+ Values[1] = OldProto->Y;
1300
+ Values[2] = OldProto->Angle;
1301
+ Normalize(Values);
1302
+ NewProto->X = OldProto->X;
1303
+ NewProto->Y = OldProto->Y;
1304
+ NewProto->Length = OldProto->Length;
1305
+ NewProto->Angle = OldProto->Angle;
1306
+ NewProto->A = Values[0];
1307
+ NewProto->B = Values[1];
1308
+ NewProto->C = Values[2];
1309
+ }
1310
+
1311
+ Class->NumConfigs = NumConfigs;
1312
+ Class->MaxNumConfigs = NumConfigs;
1313
+ Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
1314
+ NumWords = WordsInVectorOfSize(NumProtos);
1315
+ for(i=0; i < NumConfigs; i++)
1316
+ {
1317
+ NewConfig = NewBitVector(NumProtos);
1318
+ OldConfig = MergeClass->Class->Configurations[i];
1319
+ for(j=0; j < NumWords; j++)
1320
+ NewConfig[j] = OldConfig[j];
1321
+ Class->Configurations[i] = NewConfig;
1322
+ }
1323
+ }
1324
+ } // SetUpForFloat2Int
1325
+
1326
+ /*--------------------------------------------------------------------------*/
1327
+ void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) {
1328
+ FILE* fp = Efopen(filename, "wb");
1329
+ /* then write out each class */
1330
+ for (int i = 0; i < Templates->NumClasses; i++) {
1331
+ int MaxLength = 0;
1332
+ INT_CLASS Class = Templates->Class[i];
1333
+ for (int ConfigId = 0; ConfigId < Class->NumConfigs; ConfigId++) {
1334
+ if (Class->ConfigLengths[ConfigId] > MaxLength)
1335
+ MaxLength = Class->ConfigLengths[ConfigId];
1336
+ }
1337
+ fprintf(fp, "%s %d\n", unicharset_mftraining.id_to_unichar(
1338
+ Templates->ClassIdFor[i]), MaxLength);
1339
+ }
1340
+ fclose(fp);
1341
+ } // WritePFFMTable