tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,1341 @@
1
+ /******************************************************************************
2
+ ** Filename: mfTraining.c
3
+ ** Purpose: Separates training pages into files for each character.
4
+ ** Strips from files only the features and there parameters of
5
+ the feature type mf.
6
+ ** Author: Dan Johnson
7
+ ** Revisment: Christy Russon
8
+ ** Environment: HPUX 6.5
9
+ ** Library: HPUX 6.5
10
+ ** History: Fri Aug 18 08:53:50 1989, DSJ, Created.
11
+ ** 5/25/90, DSJ, Adapted to multiple feature types.
12
+ ** Tuesday, May 17, 1998 Changes made to make feature specific and
13
+ ** simplify structures. First step in simplifying training process.
14
+ **
15
+ ** (c) Copyright Hewlett-Packard Company, 1988.
16
+ ** Licensed under the Apache License, Version 2.0 (the "License");
17
+ ** you may not use this file except in compliance with the License.
18
+ ** You may obtain a copy of the License at
19
+ ** http://www.apache.org/licenses/LICENSE-2.0
20
+ ** Unless required by applicable law or agreed to in writing, software
21
+ ** distributed under the License is distributed on an "AS IS" BASIS,
22
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23
+ ** See the License for the specific language governing permissions and
24
+ ** limitations under the License.
25
+ ******************************************************************************/
26
+ /**----------------------------------------------------------------------------
27
+ Include Files and Type Defines
28
+ ----------------------------------------------------------------------------**/
29
+ #include "oldlist.h"
30
+ #include "efio.h"
31
+ #include "emalloc.h"
32
+ #include "featdefs.h"
33
+ #include "tessopt.h"
34
+ #include "ocrfeatures.h"
35
+ #include "mf.h"
36
+ #include "general.h"
37
+ #include "clusttool.h"
38
+ #include "cluster.h"
39
+ #include "protos.h"
40
+ #include "minmax.h"
41
+ #include "debug.h"
42
+ #include "tprintf.h"
43
+ #include "const.h"
44
+ #include "mergenf.h"
45
+ #include "name2char.h"
46
+ #include "intproto.h"
47
+ #include "variables.h"
48
+ #include "freelist.h"
49
+ #include "efio.h"
50
+ #include "danerror.h"
51
+ #include "globals.h"
52
+
53
+ #include <string.h>
54
+ #include <stdio.h>
55
+ #define _USE_MATH_DEFINES
56
+ #include <math.h>
57
+ #ifdef WIN32
58
+ #ifndef M_PI
59
+ #define M_PI 3.14159265358979323846
60
+ #endif
61
+ #endif
62
+
63
+ #define MAXNAMESIZE 80
64
+ #define MAX_NUM_SAMPLES 10000
65
+ #define PROGRAM_FEATURE_TYPE "mf"
66
+ #define MINSD (1.0f / 128.0f)
67
+ #define MINSD_ANGLE (1.0f / 64.0f)
68
+
69
+ int row_number; /* cjn: fixes link problem */
70
+
71
+ typedef struct
72
+ {
73
+ char *Label;
74
+ int SampleCount;
75
+ LIST List;
76
+ }
77
+ LABELEDLISTNODE, *LABELEDLIST;
78
+
79
+ typedef struct
80
+ {
81
+ char* Label;
82
+ int NumMerged[MAX_NUM_PROTOS];
83
+ CLASS_TYPE Class;
84
+ }MERGE_CLASS_NODE;
85
+ typedef MERGE_CLASS_NODE* MERGE_CLASS;
86
+
87
+ #define round(x,frag)(floor(x/frag+.5)*frag)
88
+
89
+ /**----------------------------------------------------------------------------
90
+ Public Function Prototypes
91
+ ----------------------------------------------------------------------------**/
92
+ int main (
93
+ int argc,
94
+ char **argv);
95
+
96
+ /**----------------------------------------------------------------------------
97
+ Private Function Prototypes
98
+ ----------------------------------------------------------------------------**/
99
+ void ParseArguments(
100
+ int argc,
101
+ char **argv);
102
+
103
+ char *GetNextFilename ();
104
+
105
+ LIST ReadTrainingSamples (
106
+ FILE *File);
107
+
108
+ LABELEDLIST FindList (
109
+ LIST List,
110
+ char *Label);
111
+
112
+ MERGE_CLASS FindClass (
113
+ LIST List,
114
+ char *Label);
115
+
116
+ LABELEDLIST NewLabeledList (
117
+ char *Label);
118
+
119
+ MERGE_CLASS NewLabeledClass (
120
+ char *Label);
121
+
122
+ void WriteTrainingSamples (
123
+ char *Directory,
124
+ LIST CharList);
125
+
126
+ void WriteClusteredTrainingSamples (
127
+ char *Directory,
128
+ LIST ProtoList,
129
+ CLUSTERER *Clusterer,
130
+ LABELEDLIST CharSample);
131
+ /**/
132
+ void WriteMergedTrainingSamples(
133
+ char *Directory,
134
+ LIST ClassList);
135
+
136
+ void WriteMicrofeat(
137
+ char *Directory,
138
+ LIST ClassList);
139
+
140
+ void WriteProtos(
141
+ FILE* File,
142
+ MERGE_CLASS MergeClass);
143
+
144
+ void WriteConfigs(
145
+ FILE* File,
146
+ CLASS_TYPE Class);
147
+
148
+ void FreeTrainingSamples (
149
+ LIST CharList);
150
+
151
+ void FreeLabeledClassList (
152
+ LIST ClassList);
153
+
154
+ void FreeLabeledList (
155
+ LABELEDLIST LabeledList);
156
+
157
+ CLUSTERER *SetUpForClustering(
158
+ LABELEDLIST CharSample);
159
+ /*
160
+ PARAMDESC *ConvertToPARAMDESC(
161
+ PARAM_DESC* Param_Desc,
162
+ int N);
163
+ */
164
+ void MergeInsignificantProtos(LIST ProtoList, const char* label,
165
+ CLUSTERER *Clusterer, CLUSTERCONFIG *Config);
166
+
167
+ LIST RemoveInsignificantProtos(
168
+ LIST ProtoList,
169
+ BOOL8 KeepSigProtos,
170
+ BOOL8 KeepInsigProtos,
171
+ int N);
172
+
173
+ void CleanUpUnusedData(
174
+ LIST ProtoList);
175
+
176
+ void Normalize (
177
+ float *Values);
178
+
179
+ void SetUpForFloat2Int(
180
+ LIST LabeledClassList);
181
+
182
+ void WritePFFMTable(INT_TEMPLATES Templates, const char* filename);
183
+
184
+ //--------------Global Data Definitions and Declarations--------------
185
+ static char FontName[MAXNAMESIZE];
186
+ // globals used for parsing command line arguments
187
+ static char *Directory = NULL;
188
+ static int MaxNumSamples = MAX_NUM_SAMPLES;
189
+ static int Argc;
190
+ static char **Argv;
191
+
192
+ // globals used to control what information is saved in the output file
193
+ static BOOL8 ShowAllSamples = FALSE;
194
+ static BOOL8 ShowSignificantProtos = TRUE;
195
+ static BOOL8 ShowInsignificantProtos = FALSE;
196
+
197
+ // global variable to hold configuration parameters to control clustering
198
+ // -M 0.40 -B 0.05 -I 1.0 -C 1e-6.
199
+ static CLUSTERCONFIG Config =
200
+ { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 };
201
+
202
+ static FLOAT32 RoundingAccuracy = 0.0f;
203
+
204
+ // The unicharset used during mftraining
205
+ static UNICHARSET unicharset_mftraining;
206
+
207
+ const char* test_ch = "";
208
+
209
+ /*----------------------------------------------------------------------------
210
+ Public Code
211
+ -----------------------------------------------------------------------------*/
212
+ void DisplayProtoList(const char* ch, LIST protolist) {
213
+ void* window = c_create_window("Char samples", 50, 200,
214
+ 520, 520, -130.0, 130.0, -130.0, 130.0);
215
+ LIST proto = protolist;
216
+ iterate(proto) {
217
+ PROTOTYPE* prototype = reinterpret_cast<PROTOTYPE *>(first_node(proto));
218
+ if (prototype->Significant)
219
+ c_line_color_index(window, Green);
220
+ else if (prototype->NumSamples == 0)
221
+ c_line_color_index(window, Blue);
222
+ else if (prototype->Merged)
223
+ c_line_color_index(window, Magenta);
224
+ else
225
+ c_line_color_index(window, Red);
226
+ float x = CenterX(prototype->Mean);
227
+ float y = CenterY(prototype->Mean);
228
+ double angle = OrientationOf(prototype->Mean) * 2 * M_PI;
229
+ float dx = static_cast<float>(LengthOf(prototype->Mean) * cos(angle) / 2);
230
+ float dy = static_cast<float>(LengthOf(prototype->Mean) * sin(angle) / 2);
231
+ c_move(window, (x - dx) * 256, (y - dy) * 256);
232
+ c_draw(window, (x + dx) * 256, (y + dy) * 256);
233
+ if (prototype->Significant)
234
+ tprintf("Green proto at (%g,%g)+(%g,%g) %d samples\n",
235
+ x, y, dx, dy, prototype->NumSamples);
236
+ else if (prototype->NumSamples > 0 && !prototype->Merged)
237
+ tprintf("Red proto at (%g,%g)+(%g,%g) %d samples\n",
238
+ x, y, dx, dy, prototype->NumSamples);
239
+ }
240
+ c_make_current(window);
241
+ }
242
+
243
+ /*---------------------------------------------------------------------------*/
244
+ int main (int argc, char **argv) {
245
+ /*
246
+ ** Parameters:
247
+ ** argc number of command line arguments
248
+ ** argv array of command line arguments
249
+ ** Globals: none
250
+ ** Operation:
251
+ ** This program reads in a text file consisting of feature
252
+ ** samples from a training page in the following format:
253
+ **
254
+ ** FontName CharName NumberOfFeatureTypes(N)
255
+ ** FeatureTypeName1 NumberOfFeatures(M)
256
+ ** Feature1
257
+ ** ...
258
+ ** FeatureM
259
+ ** FeatureTypeName2 NumberOfFeatures(M)
260
+ ** Feature1
261
+ ** ...
262
+ ** FeatureM
263
+ ** ...
264
+ ** FeatureTypeNameN NumberOfFeatures(M)
265
+ ** Feature1
266
+ ** ...
267
+ ** FeatureM
268
+ ** FontName CharName ...
269
+ **
270
+ ** The result of this program is a binary inttemp file used by
271
+ ** the OCR engine.
272
+ ** Return: none
273
+ ** Exceptions: none
274
+ ** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
275
+ ** Mon May 18 1998, Christy Russson, Revistion started.
276
+ */
277
+ char *PageName;
278
+ FILE *TrainingPage;
279
+ FILE *OutFile;
280
+ LIST CharList;
281
+ CLUSTERER *Clusterer = NULL;
282
+ LIST ProtoList = NIL;
283
+ LABELEDLIST CharSample;
284
+ PROTOTYPE *Prototype;
285
+ LIST ClassList = NIL;
286
+ int Cid, Pid;
287
+ PROTO Proto;
288
+ PROTO_STRUCT DummyProto;
289
+ BIT_VECTOR Config2;
290
+ MERGE_CLASS MergeClass;
291
+ INT_TEMPLATES IntTemplates;
292
+ LIST pCharList, pProtoList;
293
+ char Filename[MAXNAMESIZE];
294
+
295
+ // Clean the unichar set
296
+ unicharset_mftraining.clear();
297
+ // Space character needed to represent NIL classification
298
+ unicharset_mftraining.unichar_insert(" ");
299
+
300
+ ParseArguments (argc, argv);
301
+ InitFastTrainerVars ();
302
+ InitSubfeatureVars ();
303
+ while ((PageName = GetNextFilename()) != NULL) {
304
+ printf ("Reading %s ...\n", PageName);
305
+ TrainingPage = Efopen (PageName, "r");
306
+ CharList = ReadTrainingSamples (TrainingPage);
307
+ fclose (TrainingPage);
308
+ //WriteTrainingSamples (Directory, CharList);
309
+ pCharList = CharList;
310
+ iterate(pCharList) {
311
+ //Cluster
312
+ CharSample = (LABELEDLIST) first_node (pCharList);
313
+ // printf ("\nClustering %s ...", CharSample->Label);
314
+ Clusterer = SetUpForClustering(CharSample);
315
+ Config.MagicSamples = CharSample->SampleCount;
316
+ ProtoList = ClusterSamples(Clusterer, &Config);
317
+ CleanUpUnusedData(ProtoList);
318
+
319
+ //Merge
320
+ MergeInsignificantProtos(ProtoList, CharSample->Label,
321
+ Clusterer, &Config);
322
+ if (strcmp(test_ch, CharSample->Label) == 0)
323
+ DisplayProtoList(test_ch, ProtoList);
324
+ ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos,
325
+ ShowInsignificantProtos,
326
+ Clusterer->SampleSize);
327
+ FreeClusterer(Clusterer);
328
+ MergeClass = FindClass (ClassList, CharSample->Label);
329
+ if (MergeClass == NULL) {
330
+ MergeClass = NewLabeledClass (CharSample->Label);
331
+ ClassList = push (ClassList, MergeClass);
332
+ }
333
+ Cid = AddConfigToClass(MergeClass->Class);
334
+ pProtoList = ProtoList;
335
+ iterate (pProtoList) {
336
+ Prototype = (PROTOTYPE *) first_node (pProtoList);
337
+
338
+ // see if proto can be approximated by existing proto
339
+ Pid = FindClosestExistingProto(MergeClass->Class,
340
+ MergeClass->NumMerged, Prototype);
341
+ if (Pid == NO_PROTO) {
342
+ Pid = AddProtoToClass (MergeClass->Class);
343
+ Proto = ProtoIn (MergeClass->Class, Pid);
344
+ MakeNewFromOld (Proto, Prototype);
345
+ MergeClass->NumMerged[Pid] = 1;
346
+ }
347
+ else {
348
+ MakeNewFromOld (&DummyProto, Prototype);
349
+ ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto,
350
+ (FLOAT32) MergeClass->NumMerged[Pid], 1.0,
351
+ ProtoIn (MergeClass->Class, Pid));
352
+ MergeClass->NumMerged[Pid] ++;
353
+ }
354
+ Config2 = MergeClass->Class->Configurations[Cid];
355
+ AddProtoToConfig (Pid, Config2);
356
+ }
357
+ FreeProtoList (&ProtoList);
358
+ }
359
+ FreeTrainingSamples (CharList);
360
+ }
361
+ //WriteMergedTrainingSamples(Directory,ClassList);
362
+ WriteMicrofeat(Directory, ClassList);
363
+ InitIntProtoVars ();
364
+ InitPrototypes ();
365
+ SetUpForFloat2Int(ClassList);
366
+ IntTemplates = CreateIntTemplates(TrainingData, unicharset_mftraining);
367
+ strcpy (Filename, "");
368
+ if (Directory != NULL) {
369
+ strcat (Filename, Directory);
370
+ strcat (Filename, "/");
371
+ }
372
+ strcat (Filename, "inttemp");
373
+ #ifdef __UNIX__
374
+ OutFile = Efopen (Filename, "w");
375
+ #else
376
+ OutFile = Efopen (Filename, "wb");
377
+ #endif
378
+ WriteIntTemplates(OutFile, IntTemplates, unicharset_mftraining);
379
+ fclose (OutFile);
380
+ strcpy (Filename, "");
381
+ if (Directory != NULL) {
382
+ strcat (Filename, Directory);
383
+ strcat (Filename, "/");
384
+ }
385
+ strcat (Filename, "pffmtable");
386
+ // Now create pffmtable.
387
+ WritePFFMTable(IntTemplates, Filename);
388
+ printf ("Done!\n"); /**/
389
+ FreeLabeledClassList (ClassList);
390
+ return 0;
391
+ } /* main */
392
+
393
+
394
+ /**----------------------------------------------------------------------------
395
+ Private Code
396
+ ----------------------------------------------------------------------------**/
397
+ /*---------------------------------------------------------------------------*/
398
+ void ParseArguments(
399
+ int argc,
400
+ char **argv)
401
+
402
+ /*
403
+ ** Parameters:
404
+ ** argc number of command line arguments to parse
405
+ ** argv command line arguments
406
+ ** Globals:
407
+ ** ShowAllSamples flag controlling samples display
408
+ ** ShowSignificantProtos flag controlling proto display
409
+ ** ShowInsignificantProtos flag controlling proto display
410
+ ** Config current clustering parameters
411
+ ** tessoptarg, tessoptind defined by tessopt sys call
412
+ ** Argc, Argv global copies of argc and argv
413
+ ** Operation:
414
+ ** This routine parses the command line arguments that were
415
+ ** passed to the program. The legal arguments are:
416
+ ** -d "turn off display of samples"
417
+ ** -p "turn off significant protos"
418
+ ** -n "turn off insignificant proto"
419
+ ** -S [ spherical | elliptical | mixed | automatic ]
420
+ ** -M MinSamples "min samples per prototype (%)"
421
+ ** -B MaxIllegal "max illegal chars per cluster (%)"
422
+ ** -I Independence "0 to 1"
423
+ ** -C Confidence "1e-200 to 1.0"
424
+ ** -D Directory
425
+ ** -N MaxNumSamples
426
+ ** -R RoundingAccuracy
427
+ ** Return: none
428
+ ** Exceptions: Illegal options terminate the program.
429
+ ** History: 7/24/89, DSJ, Created.
430
+ */
431
+
432
+ {
433
+ int Option;
434
+ int ParametersRead;
435
+ BOOL8 Error;
436
+
437
+ Error = FALSE;
438
+ Argc = argc;
439
+ Argv = argv;
440
+ while (( Option = tessopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )
441
+ {
442
+ switch ( Option )
443
+ {
444
+ case 'n':
445
+ ShowInsignificantProtos = FALSE;
446
+ break;
447
+ case 'p':
448
+ ShowSignificantProtos = FALSE;
449
+ break;
450
+ case 'd':
451
+ ShowAllSamples = FALSE;
452
+ break;
453
+ case 'C':
454
+ ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
455
+ if ( ParametersRead != 1 ) Error = TRUE;
456
+ else if ( Config.Confidence > 1 ) Config.Confidence = 1;
457
+ else if ( Config.Confidence < 0 ) Config.Confidence = 0;
458
+ break;
459
+ case 'I':
460
+ ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
461
+ if ( ParametersRead != 1 ) Error = TRUE;
462
+ else if ( Config.Independence > 1 ) Config.Independence = 1;
463
+ else if ( Config.Independence < 0 ) Config.Independence = 0;
464
+ break;
465
+ case 'M':
466
+ ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
467
+ if ( ParametersRead != 1 ) Error = TRUE;
468
+ else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
469
+ else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
470
+ break;
471
+ case 'B':
472
+ ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
473
+ if ( ParametersRead != 1 ) Error = TRUE;
474
+ else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
475
+ else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
476
+ break;
477
+ case 'R':
478
+ ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
479
+ if ( ParametersRead != 1 ) Error = TRUE;
480
+ else if ( RoundingAccuracy > 0.01f ) RoundingAccuracy = 0.01f;
481
+ else if ( RoundingAccuracy < 0.0f ) RoundingAccuracy = 0.0f;
482
+ break;
483
+ case 'S':
484
+ switch ( tessoptarg[0] )
485
+ {
486
+ case 's': Config.ProtoStyle = spherical; break;
487
+ case 'e': Config.ProtoStyle = elliptical; break;
488
+ case 'm': Config.ProtoStyle = mixed; break;
489
+ case 'a': Config.ProtoStyle = automatic; break;
490
+ default: Error = TRUE;
491
+ }
492
+ break;
493
+ case 'D':
494
+ Directory = tessoptarg;
495
+ break;
496
+ case 'N':
497
+ if (sscanf (tessoptarg, "%d", &MaxNumSamples) != 1 ||
498
+ MaxNumSamples <= 0)
499
+ Error = TRUE;
500
+ break;
501
+ case '?':
502
+ Error = TRUE;
503
+ break;
504
+ }
505
+ if ( Error )
506
+ {
507
+ fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] );
508
+ fprintf (stderr, "\t[-S ProtoStyle]\n");
509
+ fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" );
510
+ fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n");
511
+ exit (2);
512
+ }
513
+ }
514
+ } // ParseArguments
515
+
516
+ /*---------------------------------------------------------------------------*/
517
+ char *GetNextFilename ()
518
+ /*
519
+ ** Parameters: none
520
+ ** Globals:
521
+ ** tessoptind defined by tessopt sys call
522
+ ** Argc, Argv global copies of argc and argv
523
+ ** Operation:
524
+ ** This routine returns the next command line argument. If
525
+ ** there are no remaining command line arguments, it returns
526
+ ** NULL. This routine should only be called after all option
527
+ ** arguments have been parsed and removed with ParseArguments.
528
+ ** Return: Next command line argument or NULL.
529
+ ** Exceptions: none
530
+ ** History: Fri Aug 18 09:34:12 1989, DSJ, Created.
531
+ */
532
+
533
+ {
534
+ if (tessoptind < Argc)
535
+ return (Argv [tessoptind++]);
536
+ else
537
+ return (NULL);
538
+
539
+ } /* GetNextFilename */
540
+
541
+ /*---------------------------------------------------------------------------*/
542
+ LIST ReadTrainingSamples (
543
+ FILE *File)
544
+
545
+ /*
546
+ ** Parameters:
547
+ ** File open text file to read samples from
548
+ ** Globals: none
549
+ ** Operation:
550
+ ** This routine reads training samples from a file and
551
+ ** places them into a data structure which organizes the
552
+ ** samples by FontName and CharName. It then returns this
553
+ ** data structure.
554
+ ** Return: none
555
+ ** Exceptions: none
556
+ ** History: Fri Aug 18 13:11:39 1989, DSJ, Created.
557
+ ** Tue May 17 1998 simplifications to structure, illiminated
558
+ ** font, and feature specification levels of structure.
559
+ */
560
+
561
+ {
562
+ char unichar[UNICHAR_LEN + 1];
563
+ LABELEDLIST CharSample;
564
+ FEATURE_SET FeatureSamples;
565
+ LIST TrainingSamples = NIL;
566
+ CHAR_DESC CharDesc;
567
+ int Type, i;
568
+
569
+ while (fscanf (File, "%s %s", FontName, unichar) == 2) {
570
+ if (!unicharset_mftraining.contains_unichar(unichar)) {
571
+ unicharset_mftraining.unichar_insert(unichar);
572
+ if (unicharset_mftraining.size() > MAX_NUM_CLASSES) {
573
+ cprintf("Error: Size of unicharset of mftraining is "
574
+ "greater than MAX_NUM_CLASSES\n");
575
+ exit(1);
576
+ }
577
+ }
578
+ CharSample = FindList (TrainingSamples, unichar);
579
+ if (CharSample == NULL) {
580
+ CharSample = NewLabeledList (unichar);
581
+ TrainingSamples = push (TrainingSamples, CharSample);
582
+ }
583
+ CharDesc = ReadCharDescription (File);
584
+ Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
585
+ FeatureSamples = CharDesc->FeatureSets[Type];
586
+ for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
587
+ FEATURE f = FeatureSamples->Features[feature];
588
+ for (int dim =0; dim < f->Type->NumParams; ++dim)
589
+ f->Params[dim] += dim == MFDirection ?
590
+ UniformRandomNumber(-MINSD_ANGLE, MINSD_ANGLE) :
591
+ UniformRandomNumber(-MINSD, MINSD);
592
+ }
593
+ CharSample->List = push (CharSample->List, FeatureSamples);
594
+ CharSample->SampleCount++;
595
+ for (i = 0; i < CharDesc->NumFeatureSets; i++)
596
+ if (Type != i)
597
+ FreeFeatureSet(CharDesc->FeatureSets[i]);
598
+ free (CharDesc);
599
+ }
600
+ return (TrainingSamples);
601
+
602
+ } /* ReadTrainingSamples */
603
+
604
+ /*---------------------------------------------------------------------------*/
605
+ LABELEDLIST FindList (
606
+ LIST List,
607
+ char *Label)
608
+
609
+ /*
610
+ ** Parameters:
611
+ ** List list to search
612
+ ** Label label to search for
613
+ ** Globals: none
614
+ ** Operation:
615
+ ** This routine searches thru a list of labeled lists to find
616
+ ** a list with the specified label. If a matching labeled list
617
+ ** cannot be found, NULL is returned.
618
+ ** Return: Labeled list with the specified Label or NULL.
619
+ ** Exceptions: none
620
+ ** History: Fri Aug 18 15:57:41 1989, DSJ, Created.
621
+ */
622
+
623
+ {
624
+ LABELEDLIST LabeledList;
625
+
626
+ iterate (List)
627
+ {
628
+ LabeledList = (LABELEDLIST) first_node (List);
629
+ if (strcmp (LabeledList->Label, Label) == 0)
630
+ return (LabeledList);
631
+ }
632
+ return (NULL);
633
+
634
+ } /* FindList */
635
+
636
+ /*----------------------------------------------------------------------------*/
637
+ MERGE_CLASS FindClass (
638
+ LIST List,
639
+ char *Label)
640
+ {
641
+ MERGE_CLASS MergeClass;
642
+
643
+ iterate (List)
644
+ {
645
+ MergeClass = (MERGE_CLASS) first_node (List);
646
+ if (strcmp (MergeClass->Label, Label) == 0)
647
+ return (MergeClass);
648
+ }
649
+ return (NULL);
650
+
651
+ } /* FindClass */
652
+
653
+ /*---------------------------------------------------------------------------*/
654
+ LABELEDLIST NewLabeledList (
655
+ char *Label)
656
+
657
+ /*
658
+ ** Parameters:
659
+ ** Label label for new list
660
+ ** Globals: none
661
+ ** Operation:
662
+ ** This routine allocates a new, empty labeled list and gives
663
+ ** it the specified label.
664
+ ** Return: New, empty labeled list.
665
+ ** Exceptions: none
666
+ ** History: Fri Aug 18 16:08:46 1989, DSJ, Created.
667
+ */
668
+
669
+ {
670
+ LABELEDLIST LabeledList;
671
+
672
+ LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
673
+ LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
674
+ strcpy (LabeledList->Label, Label);
675
+ LabeledList->List = NIL;
676
+ LabeledList->SampleCount = 0;
677
+ return (LabeledList);
678
+
679
+ } /* NewLabeledList */
680
+
681
+ /*---------------------------------------------------------------------------*/
682
+ MERGE_CLASS NewLabeledClass (
683
+ char *Label)
684
+ {
685
+ MERGE_CLASS MergeClass;
686
+
687
+ MergeClass = (MERGE_CLASS) Emalloc (sizeof (MERGE_CLASS_NODE));
688
+ MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
689
+ strcpy (MergeClass->Label, Label);
690
+ MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
691
+ return (MergeClass);
692
+
693
+ } /* NewLabeledClass */
694
+
695
+ /*---------------------------------------------------------------------------*/
696
+ void WriteTrainingSamples (
697
+ char *Directory,
698
+ LIST CharList)
699
+
700
+ /*
701
+ ** Parameters:
702
+ ** Directory directory to place sample files into
703
+ ** FontList list of fonts used in the training samples
704
+ ** Globals:
705
+ ** MaxNumSamples max number of samples per class to write
706
+ ** Operation:
707
+ ** This routine writes the specified samples into files which
708
+ ** are organized according to the font name and character name
709
+ ** of the samples.
710
+ ** Return: none
711
+ ** Exceptions: none
712
+ ** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
713
+ */
714
+
715
+ {
716
+ LABELEDLIST CharSample;
717
+ FEATURE_SET FeatureSet;
718
+ LIST FeatureList;
719
+ FILE *File;
720
+ char Filename[MAXNAMESIZE];
721
+ int NumSamples;
722
+
723
+ iterate (CharList) // iterate thru all of the fonts
724
+ {
725
+ CharSample = (LABELEDLIST) first_node (CharList);
726
+
727
+ // construct the full pathname for the current samples file
728
+ strcpy (Filename, "");
729
+ if (Directory != NULL)
730
+ {
731
+ strcat (Filename, Directory);
732
+ strcat (Filename, "/");
733
+ }
734
+ strcat (Filename, FontName);
735
+ strcat (Filename, "/");
736
+ strcat (Filename, CharSample->Label);
737
+ strcat (Filename, ".");
738
+ strcat (Filename, PROGRAM_FEATURE_TYPE);
739
+ printf ("\nWriting %s ...", Filename);
740
+
741
+ /* if file does not exist, create a new one with an appropriate
742
+ header; otherwise append samples to the existing file */
743
+ File = fopen (Filename, "r");
744
+ if (File == NULL)
745
+ {
746
+ File = Efopen (Filename, "w");
747
+ WriteOldParamDesc
748
+ (File, FeatureDefs.FeatureDesc[ShortNameToFeatureType (PROGRAM_FEATURE_TYPE)]);
749
+ }
750
+ else
751
+ {
752
+ fclose (File);
753
+ File = Efopen (Filename, "a");
754
+ }
755
+
756
+ // append samples onto the file
757
+ FeatureList = CharSample->List;
758
+ NumSamples = 0;
759
+ iterate (FeatureList)
760
+ {
761
+ if (NumSamples >= MaxNumSamples) break;
762
+
763
+ FeatureSet = (FEATURE_SET) first_node (FeatureList);
764
+ WriteFeatureSet (File, FeatureSet);
765
+ NumSamples++;
766
+ }
767
+ fclose (File);
768
+ }
769
+ } /* WriteTrainingSamples */
770
+
771
+
772
+ /*----------------------------------------------------------------------------*/
773
+ void WriteClusteredTrainingSamples (
774
+ char *Directory,
775
+ LIST ProtoList,
776
+ CLUSTERER *Clusterer,
777
+ LABELEDLIST CharSample)
778
+
779
+ /*
780
+ ** Parameters:
781
+ ** Directory directory to place sample files into
782
+ ** Globals:
783
+ ** MaxNumSamples max number of samples per class to write
784
+ ** Operation:
785
+ ** This routine writes the specified samples into files which
786
+ ** are organized according to the font name and character name
787
+ ** of the samples.
788
+ ** Return: none
789
+ ** Exceptions: none
790
+ ** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
791
+ */
792
+
793
+ {
794
+ FILE *File;
795
+ char Filename[MAXNAMESIZE];
796
+
797
+ strcpy (Filename, "");
798
+ if (Directory != NULL)
799
+ {
800
+ strcat (Filename, Directory);
801
+ strcat (Filename, "/");
802
+ }
803
+ strcat (Filename, FontName);
804
+ strcat (Filename, "/");
805
+ strcat (Filename, CharSample->Label);
806
+ strcat (Filename, ".");
807
+ strcat (Filename, PROGRAM_FEATURE_TYPE);
808
+ strcat (Filename, ".p");
809
+ printf ("\nWriting %s ...", Filename);
810
+ File = Efopen (Filename, "w");
811
+ WriteProtoList(File, Clusterer->SampleSize, Clusterer->ParamDesc,
812
+ ProtoList, ShowSignificantProtos, ShowInsignificantProtos);
813
+ fclose (File);
814
+
815
+ } /* WriteClusteredTrainingSamples */
816
+
817
+ /*---------------------------------------------------------------------------*/
818
+ void WriteMergedTrainingSamples(
819
+ char *Directory,
820
+ LIST ClassList)
821
+
822
+ {
823
+ FILE *File;
824
+ char Filename[MAXNAMESIZE];
825
+ MERGE_CLASS MergeClass;
826
+
827
+ iterate (ClassList)
828
+ {
829
+ MergeClass = (MERGE_CLASS) first_node (ClassList);
830
+ strcpy (Filename, "");
831
+ if (Directory != NULL)
832
+ {
833
+ strcat (Filename, Directory);
834
+ strcat (Filename, "/");
835
+ }
836
+ strcat (Filename, "Merged/");
837
+ strcat (Filename, MergeClass->Label);
838
+ strcat (Filename, PROTO_SUFFIX);
839
+ printf ("\nWriting Merged %s ...", Filename);
840
+ File = Efopen (Filename, "w");
841
+ WriteOldProtoFile (File, MergeClass->Class);
842
+ fclose (File);
843
+
844
+ strcpy (Filename, "");
845
+ if (Directory != NULL)
846
+ {
847
+ strcat (Filename, Directory);
848
+ strcat (Filename, "/");
849
+ }
850
+ strcat (Filename, "Merged/");
851
+ strcat (Filename, MergeClass->Label);
852
+ strcat (Filename, CONFIG_SUFFIX);
853
+ printf ("\nWriting Merged %s ...", Filename);
854
+ File = Efopen (Filename, "w");
855
+ WriteOldConfigFile (File, MergeClass->Class);
856
+ fclose (File);
857
+ }
858
+
859
+ } // WriteMergedTrainingSamples
860
+
861
+ /*--------------------------------------------------------------------------*/
862
+ void WriteMicrofeat(
863
+ char *Directory,
864
+ LIST ClassList)
865
+
866
+ {
867
+ FILE *File;
868
+ char Filename[MAXNAMESIZE];
869
+ MERGE_CLASS MergeClass;
870
+
871
+ strcpy (Filename, "");
872
+ if (Directory != NULL)
873
+ {
874
+ strcat (Filename, Directory);
875
+ strcat (Filename, "/");
876
+ }
877
+ strcat (Filename, "Microfeat");
878
+ File = Efopen (Filename, "w");
879
+ printf ("\nWriting Merged %s ...", Filename);
880
+ iterate(ClassList)
881
+ {
882
+ MergeClass = (MERGE_CLASS) first_node (ClassList);
883
+ WriteProtos(File, MergeClass);
884
+ WriteConfigs(File, MergeClass->Class);
885
+ }
886
+ fclose (File);
887
+ } // WriteMicrofeat
888
+
889
+ /*---------------------------------------------------------------------------*/
890
+ void WriteProtos(
891
+ FILE* File,
892
+ MERGE_CLASS MergeClass)
893
+ {
894
+ float Values[3];
895
+ int i;
896
+ PROTO Proto;
897
+
898
+ fprintf(File, "%s\n", MergeClass->Label);
899
+ fprintf(File, "%d\n", MergeClass->Class->NumProtos);
900
+ for(i=0; i < (MergeClass->Class)->NumProtos; i++)
901
+ {
902
+ Proto = ProtoIn(MergeClass->Class,i);
903
+ fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", Proto->X, Proto->Y,
904
+ Proto->Length, Proto->Angle);
905
+ Values[0] = Proto->X;
906
+ Values[1] = Proto->Y;
907
+ Values[2] = Proto->Angle;
908
+ Normalize(Values);
909
+ fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]);
910
+ }
911
+ } // WriteProtos
912
+
913
+ /*----------------------------------------------------------------------------*/
914
+ void WriteConfigs(
915
+ FILE* File,
916
+ CLASS_TYPE Class)
917
+ {
918
+ BIT_VECTOR Config;
919
+ int i, j, WordsPerConfig;
920
+
921
+ WordsPerConfig = WordsInVectorOfSize(Class->NumProtos);
922
+ fprintf(File, "%d %d\n", Class->NumConfigs, WordsPerConfig);
923
+ for(i=0; i < Class->NumConfigs; i++)
924
+ {
925
+ Config = Class->Configurations[i];
926
+ for(j=0; j < WordsPerConfig; j++)
927
+ fprintf(File, "%08x ", Config[j]);
928
+ fprintf(File, "\n");
929
+ }
930
+ fprintf(File, "\n");
931
+ } // WriteConfigs
932
+
933
+ /*---------------------------------------------------------------------------*/
934
+ void FreeTrainingSamples (
935
+ LIST CharList)
936
+
937
+ /*
938
+ ** Parameters:
939
+ ** FontList list of all fonts in document
940
+ ** Globals: none
941
+ ** Operation:
942
+ ** This routine deallocates all of the space allocated to
943
+ ** the specified list of training samples.
944
+ ** Return: none
945
+ ** Exceptions: none
946
+ ** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
947
+ */
948
+
949
+ {
950
+ LABELEDLIST CharSample;
951
+ FEATURE_SET FeatureSet;
952
+ LIST FeatureList;
953
+
954
+
955
+ // printf ("FreeTrainingSamples...\n");
956
+ iterate (CharList) /* iterate thru all of the fonts */
957
+ {
958
+ CharSample = (LABELEDLIST) first_node (CharList);
959
+ FeatureList = CharSample->List;
960
+ iterate (FeatureList) /* iterate thru all of the classes */
961
+ {
962
+ FeatureSet = (FEATURE_SET) first_node (FeatureList);
963
+ FreeFeatureSet (FeatureSet);
964
+ }
965
+ FreeLabeledList (CharSample);
966
+ }
967
+ destroy (CharList);
968
+
969
+ } /* FreeTrainingSamples */
970
+
971
+ /*-----------------------------------------------------------------------------*/
972
+ void FreeLabeledClassList (
973
+ LIST ClassList)
974
+
975
+ /*
976
+ ** Parameters:
977
+ ** FontList list of all fonts in document
978
+ ** Globals: none
979
+ ** Operation:
980
+ ** This routine deallocates all of the space allocated to
981
+ ** the specified list of training samples.
982
+ ** Return: none
983
+ ** Exceptions: none
984
+ ** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
985
+ */
986
+
987
+ {
988
+ MERGE_CLASS MergeClass;
989
+
990
+ iterate (ClassList) /* iterate thru all of the fonts */
991
+ {
992
+ MergeClass = (MERGE_CLASS) first_node (ClassList);
993
+ free (MergeClass->Label);
994
+ FreeClass(MergeClass->Class);
995
+ free (MergeClass);
996
+ }
997
+ destroy (ClassList);
998
+
999
+ } /* FreeLabeledClassList */
1000
+
1001
+ /*---------------------------------------------------------------------------*/
1002
+ void FreeLabeledList (
1003
+ LABELEDLIST LabeledList)
1004
+
1005
+ /*
1006
+ ** Parameters:
1007
+ ** LabeledList labeled list to be freed
1008
+ ** Globals: none
1009
+ ** Operation:
1010
+ ** This routine deallocates all of the memory consumed by
1011
+ ** a labeled list. It does not free any memory which may be
1012
+ ** consumed by the items in the list.
1013
+ ** Return: none
1014
+ ** Exceptions: none
1015
+ ** History: Fri Aug 18 17:52:45 1989, DSJ, Created.
1016
+ */
1017
+
1018
+ {
1019
+ destroy (LabeledList->List);
1020
+ free (LabeledList->Label);
1021
+ free (LabeledList);
1022
+
1023
+ } /* FreeLabeledList */
1024
+
1025
+ /*---------------------------------------------------------------------------*/
1026
+ CLUSTERER *SetUpForClustering(
1027
+ LABELEDLIST CharSample)
1028
+
1029
+ /*
1030
+ ** Parameters:
1031
+ ** CharSample: LABELEDLIST that holds all the feature information for a
1032
+ ** given character.
1033
+ ** Globals:
1034
+ ** None
1035
+ ** Operation:
1036
+ ** This routine reads samples from a LABELEDLIST and enters
1037
+ ** those samples into a clusterer data structure. This
1038
+ ** data structure is then returned to the caller.
1039
+ ** Return:
1040
+ ** Pointer to new clusterer data structure.
1041
+ ** Exceptions:
1042
+ ** None
1043
+ ** History:
1044
+ ** 8/16/89, DSJ, Created.
1045
+ */
1046
+
1047
+ {
1048
+ uinT16 N;
1049
+ int i, j;
1050
+ FLOAT32 *Sample = NULL;
1051
+ CLUSTERER *Clusterer;
1052
+ inT32 CharID;
1053
+ LIST FeatureList = NULL;
1054
+ FEATURE_SET FeatureSet = NULL;
1055
+ FEATURE_DESC FeatureDesc = NULL;
1056
+ // PARAM_DESC* ParamDesc;
1057
+
1058
+ FeatureDesc = FeatureDefs.FeatureDesc[ShortNameToFeatureType(PROGRAM_FEATURE_TYPE)];
1059
+ N = FeatureDesc->NumParams;
1060
+ // ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N);
1061
+ Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);
1062
+ // free(ParamDesc);
1063
+
1064
+ FeatureList = CharSample->List;
1065
+ CharID = 0;
1066
+ iterate(FeatureList)
1067
+ {
1068
+ if (CharID >= MaxNumSamples) break;
1069
+
1070
+ FeatureSet = (FEATURE_SET) first_node (FeatureList);
1071
+ for (i=0; i < FeatureSet->MaxNumFeatures; i++)
1072
+ {
1073
+ if (Sample == NULL)
1074
+ Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1075
+ for (j=0; j < N; j++)
1076
+ if (RoundingAccuracy != 0.0f)
1077
+ Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);
1078
+ else
1079
+ Sample[j] = FeatureSet->Features[i]->Params[j];
1080
+ MakeSample (Clusterer, Sample, CharID);
1081
+ }
1082
+ CharID++;
1083
+ }
1084
+ if ( Sample != NULL ) free( Sample );
1085
+ return( Clusterer );
1086
+
1087
+ } /* SetUpForClustering */
1088
+
1089
+ /*------------------------------------------------------------------------*/
1090
+ void MergeInsignificantProtos(LIST ProtoList, const char* label,
1091
+ CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
1092
+ PROTOTYPE *Prototype;
1093
+ bool debug = strcmp(test_ch, label) == 0;
1094
+
1095
+ LIST pProtoList = ProtoList;
1096
+ iterate(pProtoList) {
1097
+ Prototype = (PROTOTYPE *) first_node (pProtoList);
1098
+ if (Prototype->Significant || Prototype->Merged)
1099
+ continue;
1100
+ FLOAT32 best_dist = 0.125;
1101
+ PROTOTYPE* best_match = NULL;
1102
+ // Find the nearest alive prototype.
1103
+ LIST list_it = ProtoList;
1104
+ iterate(list_it) {
1105
+ PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);
1106
+ if (test_p != Prototype && !test_p->Merged) {
1107
+ FLOAT32 dist = ComputeDistance(Clusterer->SampleSize,
1108
+ Clusterer->ParamDesc,
1109
+ Prototype->Mean, test_p->Mean);
1110
+ if (dist < best_dist) {
1111
+ best_match = test_p;
1112
+ best_dist = dist;
1113
+ }
1114
+ }
1115
+ }
1116
+ if (best_match != NULL && !best_match->Significant) {
1117
+ if (debug)
1118
+ tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
1119
+ best_match->NumSamples, Prototype->NumSamples,
1120
+ best_match->Mean[0], best_match->Mean[1],
1121
+ Prototype->Mean[0], Prototype->Mean[1]);
1122
+ best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
1123
+ Clusterer->ParamDesc,
1124
+ best_match->NumSamples,
1125
+ Prototype->NumSamples,
1126
+ best_match->Mean,
1127
+ best_match->Mean, Prototype->Mean);
1128
+ Prototype->NumSamples = 0;
1129
+ Prototype->Merged = 1;
1130
+ } else if (best_match != NULL) {
1131
+ if (debug)
1132
+ tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
1133
+ Prototype->Mean[0], Prototype->Mean[1],
1134
+ best_match->Mean[0], best_match->Mean[1]);
1135
+ Prototype->Merged = 1;
1136
+ }
1137
+ }
1138
+ // Mark significant those that now have enough samples.
1139
+ int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar);
1140
+ pProtoList = ProtoList;
1141
+ iterate(pProtoList) {
1142
+ Prototype = (PROTOTYPE *) first_node (pProtoList);
1143
+ // Process insignificant protos that do not match a green one
1144
+ if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
1145
+ !Prototype->Merged) {
1146
+ if (debug)
1147
+ tprintf("Red proto at %g,%g becoming green\n",
1148
+ Prototype->Mean[0], Prototype->Mean[1]);
1149
+ Prototype->Significant = true;
1150
+ }
1151
+ }
1152
+ } /* MergeInsignificantProtos */
1153
+
1154
+ /*------------------------------------------------------------------------*/
1155
+ LIST RemoveInsignificantProtos(
1156
+ LIST ProtoList,
1157
+ BOOL8 KeepSigProtos,
1158
+ BOOL8 KeepInsigProtos,
1159
+ int N)
1160
+
1161
+ {
1162
+ LIST NewProtoList = NIL;
1163
+ LIST pProtoList;
1164
+ PROTOTYPE* Proto;
1165
+ PROTOTYPE* NewProto;
1166
+ int i;
1167
+
1168
+ pProtoList = ProtoList;
1169
+ iterate(pProtoList)
1170
+ {
1171
+ Proto = (PROTOTYPE *) first_node (pProtoList);
1172
+ if ((Proto->Significant && KeepSigProtos) ||
1173
+ (!Proto->Significant && KeepInsigProtos))
1174
+ {
1175
+ NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
1176
+
1177
+ NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1178
+ NewProto->Significant = Proto->Significant;
1179
+ NewProto->Style = Proto->Style;
1180
+ NewProto->NumSamples = Proto->NumSamples;
1181
+ NewProto->Cluster = NULL;
1182
+ NewProto->Distrib = NULL;
1183
+
1184
+ for (i=0; i < N; i++)
1185
+ NewProto->Mean[i] = Proto->Mean[i];
1186
+ if (Proto->Variance.Elliptical != NULL)
1187
+ {
1188
+ NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1189
+ for (i=0; i < N; i++)
1190
+ NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
1191
+ }
1192
+ else
1193
+ NewProto->Variance.Elliptical = NULL;
1194
+ //---------------------------------------------
1195
+ if (Proto->Magnitude.Elliptical != NULL)
1196
+ {
1197
+ NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1198
+ for (i=0; i < N; i++)
1199
+ NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
1200
+ }
1201
+ else
1202
+ NewProto->Magnitude.Elliptical = NULL;
1203
+ //------------------------------------------------
1204
+ if (Proto->Weight.Elliptical != NULL)
1205
+ {
1206
+ NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
1207
+ for (i=0; i < N; i++)
1208
+ NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
1209
+ }
1210
+ else
1211
+ NewProto->Weight.Elliptical = NULL;
1212
+
1213
+ NewProto->TotalMagnitude = Proto->TotalMagnitude;
1214
+ NewProto->LogMagnitude = Proto->LogMagnitude;
1215
+ NewProtoList = push_last(NewProtoList, NewProto);
1216
+ }
1217
+ }
1218
+ //FreeProtoList (ProtoList);
1219
+ return (NewProtoList);
1220
+ } /* RemoveInsignificantProtos */
1221
+ /*-----------------------------------------------------------------------------*/
1222
+ void CleanUpUnusedData(
1223
+ LIST ProtoList)
1224
+ {
1225
+ PROTOTYPE* Prototype;
1226
+
1227
+ iterate(ProtoList)
1228
+ {
1229
+ Prototype = (PROTOTYPE *) first_node (ProtoList);
1230
+ if(Prototype->Variance.Elliptical != NULL)
1231
+ {
1232
+ memfree(Prototype->Variance.Elliptical);
1233
+ Prototype->Variance.Elliptical = NULL;
1234
+ }
1235
+ if(Prototype->Magnitude.Elliptical != NULL)
1236
+ {
1237
+ memfree(Prototype->Magnitude.Elliptical);
1238
+ Prototype->Magnitude.Elliptical = NULL;
1239
+ }
1240
+ if(Prototype->Weight.Elliptical != NULL)
1241
+ {
1242
+ memfree(Prototype->Weight.Elliptical);
1243
+ Prototype->Weight.Elliptical = NULL;
1244
+ }
1245
+ }
1246
+ }
1247
+
1248
+ /*--------------------------------------------------------------------------*/
1249
+ void Normalize (
1250
+ float *Values)
1251
+ {
1252
+ register float Slope;
1253
+ register float Intercept;
1254
+ register float Normalizer;
1255
+
1256
+ Slope = tan (Values [2] * 2 * PI);
1257
+ Intercept = Values [1] - Slope * Values [0];
1258
+ Normalizer = 1 / sqrt (Slope * Slope + 1.0);
1259
+
1260
+ Values [0] = Slope * Normalizer;
1261
+ Values [1] = - Normalizer;
1262
+ Values [2] = Intercept * Normalizer;
1263
+ } // Normalize
1264
+
1265
+ /** SetUpForFloat2Int **************************************************/
1266
+ void SetUpForFloat2Int(
1267
+ LIST LabeledClassList)
1268
+ {
1269
+ MERGE_CLASS MergeClass;
1270
+ CLASS_TYPE Class;
1271
+ int NumProtos;
1272
+ int NumConfigs;
1273
+ int NumWords;
1274
+ int i, j;
1275
+ float Values[3];
1276
+ PROTO NewProto;
1277
+ PROTO OldProto;
1278
+ BIT_VECTOR NewConfig;
1279
+ BIT_VECTOR OldConfig;
1280
+
1281
+ // printf("Float2Int ...\n");
1282
+
1283
+ iterate(LabeledClassList)
1284
+ {
1285
+ MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
1286
+ Class = &TrainingData[unicharset_mftraining.unichar_to_id(
1287
+ MergeClass->Label)];
1288
+ NumProtos = (MergeClass->Class)->NumProtos;
1289
+ NumConfigs = MergeClass->Class->NumConfigs;
1290
+
1291
+ Class->NumProtos = NumProtos;
1292
+ Class->MaxNumProtos = NumProtos;
1293
+ Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
1294
+ for(i=0; i < NumProtos; i++)
1295
+ {
1296
+ NewProto = ProtoIn(Class, i);
1297
+ OldProto = ProtoIn(MergeClass->Class, i);
1298
+ Values[0] = OldProto->X;
1299
+ Values[1] = OldProto->Y;
1300
+ Values[2] = OldProto->Angle;
1301
+ Normalize(Values);
1302
+ NewProto->X = OldProto->X;
1303
+ NewProto->Y = OldProto->Y;
1304
+ NewProto->Length = OldProto->Length;
1305
+ NewProto->Angle = OldProto->Angle;
1306
+ NewProto->A = Values[0];
1307
+ NewProto->B = Values[1];
1308
+ NewProto->C = Values[2];
1309
+ }
1310
+
1311
+ Class->NumConfigs = NumConfigs;
1312
+ Class->MaxNumConfigs = NumConfigs;
1313
+ Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
1314
+ NumWords = WordsInVectorOfSize(NumProtos);
1315
+ for(i=0; i < NumConfigs; i++)
1316
+ {
1317
+ NewConfig = NewBitVector(NumProtos);
1318
+ OldConfig = MergeClass->Class->Configurations[i];
1319
+ for(j=0; j < NumWords; j++)
1320
+ NewConfig[j] = OldConfig[j];
1321
+ Class->Configurations[i] = NewConfig;
1322
+ }
1323
+ }
1324
+ } // SetUpForFloat2Int
1325
+
1326
+ /*--------------------------------------------------------------------------*/
1327
+ void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) {
1328
+ FILE* fp = Efopen(filename, "wb");
1329
+ /* then write out each class */
1330
+ for (int i = 0; i < Templates->NumClasses; i++) {
1331
+ int MaxLength = 0;
1332
+ INT_CLASS Class = Templates->Class[i];
1333
+ for (int ConfigId = 0; ConfigId < Class->NumConfigs; ConfigId++) {
1334
+ if (Class->ConfigLengths[ConfigId] > MaxLength)
1335
+ MaxLength = Class->ConfigLengths[ConfigId];
1336
+ }
1337
+ fprintf(fp, "%s %d\n", unicharset_mftraining.id_to_unichar(
1338
+ Templates->ClassIdFor[i]), MaxLength);
1339
+ }
1340
+ fclose(fp);
1341
+ } // WritePFFMTable