tesseract_bin 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,2958 @@
1
+ /******************************************************************************
2
+ ** Filename: adaptmatch.c
3
+ ** Purpose: High level adaptive matcher.
4
+ ** Author: Dan Johnson
5
+ ** History: Mon Mar 11 10:00:10 1991, DSJ, Created.
6
+ **
7
+ ** (c) Copyright Hewlett-Packard Company, 1988.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ ******************************************************************************/
18
+
19
+ /**----------------------------------------------------------------------------
20
+ Include Files and Type Defines
21
+ ----------------------------------------------------------------------------**/
22
+ #include <ctype.h>
23
+ #include "adaptmatch.h"
24
+ #include "normfeat.h"
25
+ #include "mfoutline.h"
26
+ #include "picofeat.h"
27
+ #include "float2int.h"
28
+ #include "outfeat.h"
29
+ #include "emalloc.h"
30
+ #include "intfx.h"
31
+ #include "permnum.h"
32
+ #include "speckle.h"
33
+ #include "efio.h"
34
+ #include "normmatch.h"
35
+ #include "stopper.h"
36
+ #include "permute.h"
37
+ #include "context.h"
38
+ #include "ndminx.h"
39
+ #include "intproto.h"
40
+ #include "const.h"
41
+ #include "globals.h"
42
+ #include "werd.h"
43
+ #include "callcpp.h"
44
+ #include "tordvars.h"
45
+
46
+ #include <stdio.h>
47
+ #include <string.h>
48
+ #include <ctype.h>
49
+ #include <stdlib.h>
50
+ #include <math.h>
51
+ #ifdef __UNIX__
52
+ #include <assert.h>
53
+ #endif
54
+
55
+ #define ADAPT_TEMPLATE_SUFFIX ".a"
56
+ #define BUILT_IN_TEMPLATES_FILE "inttemp"
57
+ #define BUILT_IN_CUTOFFS_FILE "pffmtable"
58
+
59
+ #define MAX_MATCHES 10
60
+ #define UNLIKELY_NUM_FEAT 200
61
+ #define NO_DEBUG 0
62
+ #define MAX_ADAPTABLE_WERD_SIZE 40
63
+
64
+ #define ADAPTABLE_WERD (GOOD_NUMBER + 0.05)
65
+
66
+ #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
67
+
68
+ #define WORST_POSSIBLE_RATING (1.0)
69
+
70
+ typedef struct
71
+ {
72
+ inT32 BlobLength;
73
+ int NumMatches;
74
+ CLASS_ID Classes[MAX_NUM_CLASSES];
75
+ FLOAT32 Ratings[MAX_CLASS_ID + 1];
76
+ uinT8 Configs[MAX_CLASS_ID + 1];
77
+ FLOAT32 BestRating;
78
+ CLASS_ID BestClass;
79
+ uinT8 BestConfig;
80
+ CLASS_PRUNER_RESULTS CPResults;
81
+ }
82
+
83
+
84
+ ADAPT_RESULTS;
85
+
86
+ typedef struct
87
+ {
88
+ ADAPT_TEMPLATES Templates;
89
+ CLASS_ID ClassId;
90
+ int ConfigId;
91
+ }
92
+
93
+
94
+ PROTO_KEY;
95
+
96
+ /**----------------------------------------------------------------------------
97
+ Private Macros
98
+ ----------------------------------------------------------------------------**/
99
+ #define MarginalMatch(Rating) \
100
+ ((Rating) > GreatAdaptiveMatch)
101
+
102
+ #define TempConfigReliable(Config) \
103
+ ((Config)->NumTimesSeen >= ReliableConfigThreshold)
104
+
105
+ #define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
106
+
107
+ /**----------------------------------------------------------------------------
108
+ Private Function Prototypes
109
+ ----------------------------------------------------------------------------**/
110
+ void AdaptToChar(TBLOB *Blob,
111
+ LINE_STATS *LineStats,
112
+ CLASS_ID ClassId,
113
+ FLOAT32 Threshold);
114
+
115
+ void AdaptToPunc(TBLOB *Blob,
116
+ LINE_STATS *LineStats,
117
+ CLASS_ID ClassId,
118
+ FLOAT32 Threshold);
119
+
120
+ void AddNewResult(ADAPT_RESULTS *Results,
121
+ CLASS_ID ClassId,
122
+ FLOAT32 Rating,
123
+ int ConfigId);
124
+
125
+ void AmbigClassifier(TBLOB *Blob,
126
+ LINE_STATS *LineStats,
127
+ INT_TEMPLATES Templates,
128
+ UNICHAR_ID *Ambiguities,
129
+ ADAPT_RESULTS *Results);
130
+
131
+ UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
132
+ LINE_STATS *LineStats,
133
+ ADAPT_TEMPLATES Templates,
134
+ ADAPT_RESULTS *Results);
135
+
136
+ void make_config_pruner(INT_TEMPLATES templates, CONFIG_PRUNER *config_pruner);
137
+
138
+ void CharNormClassifier(TBLOB *Blob,
139
+ LINE_STATS *LineStats,
140
+ INT_TEMPLATES Templates,
141
+ ADAPT_RESULTS *Results);
142
+
143
+ void ClassifyAsNoise(TBLOB *Blob,
144
+ LINE_STATS *LineStats,
145
+ ADAPT_RESULTS *Results);
146
+
147
+ int CompareCurrentRatings(const void *arg1,
148
+ const void *arg2);
149
+
150
+ LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results);
151
+
152
+ void DebugAdaptiveClassifier(TBLOB *Blob,
153
+ LINE_STATS *LineStats,
154
+ ADAPT_RESULTS *Results);
155
+
156
+ void DoAdaptiveMatch(TBLOB *Blob,
157
+ LINE_STATS *LineStats,
158
+ ADAPT_RESULTS *Results);
159
+
160
+ void GetAdaptThresholds(TWERD * Word,
161
+ LINE_STATS * LineStats,
162
+ const WERD_CHOICE& BestChoice,
163
+ const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]);
164
+
165
+ UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
166
+ LINE_STATS *LineStats,
167
+ CLASS_ID CorrectClass);
168
+
169
+ int GetBaselineFeatures(TBLOB *Blob,
170
+ LINE_STATS *LineStats,
171
+ INT_TEMPLATES Templates,
172
+ INT_FEATURE_ARRAY IntFeatures,
173
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
174
+ inT32 *BlobLength);
175
+
176
+ FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId);
177
+
178
+ int GetCharNormFeatures(TBLOB *Blob,
179
+ LINE_STATS *LineStats,
180
+ INT_TEMPLATES Templates,
181
+ INT_FEATURE_ARRAY IntFeatures,
182
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
183
+ inT32 *BlobLength);
184
+
185
+ int GetIntBaselineFeatures(TBLOB *Blob,
186
+ LINE_STATS *LineStats,
187
+ INT_TEMPLATES Templates,
188
+ INT_FEATURE_ARRAY IntFeatures,
189
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
190
+ inT32 *BlobLength);
191
+
192
+ int GetIntCharNormFeatures(TBLOB *Blob,
193
+ LINE_STATS *LineStats,
194
+ INT_TEMPLATES Templates,
195
+ INT_FEATURE_ARRAY IntFeatures,
196
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
197
+ inT32 *BlobLength);
198
+
199
+ void InitMatcherRatings(register FLOAT32 *Rating);
200
+
201
+ int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
202
+ CLASS_ID ClassId,
203
+ int NumFeatures,
204
+ INT_FEATURE_ARRAY Features,
205
+ FEATURE_SET FloatFeatures);
206
+
207
+ PROTO_ID MakeNewTempProtos(FEATURE_SET Features,
208
+ int NumBadFeat,
209
+ FEATURE_ID BadFeat[],
210
+ INT_CLASS IClass,
211
+ ADAPT_CLASS Class, BIT_VECTOR TempProtoMask);
212
+
213
+ void MakePermanent(ADAPT_TEMPLATES Templates,
214
+ CLASS_ID ClassId,
215
+ int ConfigId,
216
+ TBLOB *Blob,
217
+ LINE_STATS *LineStats);
218
+
219
+ int MakeTempProtoPerm(void *item1, void *item2);
220
+
221
+ int NumBlobsIn(TWERD *Word);
222
+
223
+ int NumOutlinesInBlob(TBLOB *Blob);
224
+
225
+ void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results);
226
+
227
+ void RemoveBadMatches(ADAPT_RESULTS *Results);
228
+
229
+ void RemoveExtraPuncs(ADAPT_RESULTS *Results);
230
+
231
+ void SetAdaptiveThreshold(FLOAT32 Threshold);
232
+ void ShowBestMatchFor(TBLOB *Blob,
233
+ LINE_STATS *LineStats,
234
+ CLASS_ID ClassId,
235
+ BOOL8 AdaptiveOn,
236
+ BOOL8 PreTrainedOn);
237
+
238
+
239
+ /**----------------------------------------------------------------------------
240
+ Global Data Definitions and Declarations
241
+ ----------------------------------------------------------------------------**/
242
+ /* name of current image file being processed */
243
+ extern char imagefile[];
244
+ INT_VAR(tessedit_single_match, FALSE, "Top choice only from CP");
245
+
246
+ /* variables used to hold performance statistics */
247
+ static int AdaptiveMatcherCalls = 0;
248
+ static int BaselineClassifierCalls = 0;
249
+ static int CharNormClassifierCalls = 0;
250
+ static int AmbigClassifierCalls = 0;
251
+ static int NumWordsAdaptedTo = 0;
252
+ static int NumCharsAdaptedTo = 0;
253
+ static int NumBaselineClassesTried = 0;
254
+ static int NumCharNormClassesTried = 0;
255
+ static int NumAmbigClassesTried = 0;
256
+ static int NumClassesOutput = 0;
257
+ static int NumAdaptationsFailed = 0;
258
+
259
+ /* define globals used to hold onto extracted features. This is used
260
+ to map from the old scheme in which baseline features and char norm
261
+ features are extracted separately, to the new scheme in which they
262
+ are extracted at the same time. */
263
+ static BOOL8 FeaturesHaveBeenExtracted = FALSE;
264
+ static BOOL8 FeaturesOK = TRUE;
265
+ static INT_FEATURE_ARRAY BaselineFeatures;
266
+ static INT_FEATURE_ARRAY CharNormFeatures;
267
+ static INT_FX_RESULT_STRUCT FXInfo;
268
+
269
+ /* use a global variable to hold onto the current ratings so that the
270
+ comparison function passes to qsort can get at them */
271
+ static FLOAT32 *CurrentRatings;
272
+
273
+ /* define globals to hold filenames of training data */
274
+ static const char *BuiltInTemplatesFile = BUILT_IN_TEMPLATES_FILE;
275
+ static const char *BuiltInCutoffsFile = BUILT_IN_CUTOFFS_FILE;
276
+ static CLASS_CUTOFF_ARRAY CharNormCutoffs;
277
+ static CLASS_CUTOFF_ARRAY BaselineCutoffs;
278
+
279
+ /* use global variables to hold onto built-in templates and adapted
280
+ templates */
281
+ static INT_TEMPLATES PreTrainedTemplates;
282
+ static ADAPT_TEMPLATES AdaptedTemplates;
283
+
284
+ /* create dummy proto and config masks for use with the built-in templates */
285
+ static BIT_VECTOR AllProtosOn;
286
+ static BIT_VECTOR PrunedProtos;
287
+ static BIT_VECTOR AllConfigsOn;
288
+ static BIT_VECTOR AllProtosOff;
289
+ static BIT_VECTOR AllConfigsOff;
290
+ static BIT_VECTOR TempProtoMask;
291
+
292
+ /* define control knobs for adaptive matcher */
293
+ make_toggle_const(EnableAdaptiveMatcher, 1, MakeEnableAdaptiveMatcher);
294
+ /* PREV DEFAULT 0 */
295
+
296
+ make_toggle_const(UsePreAdaptedTemplates, 0, MakeUsePreAdaptedTemplates);
297
+ make_toggle_const(SaveAdaptedTemplates, 0, MakeSaveAdaptedTemplates);
298
+
299
+ make_toggle_var(EnableAdaptiveDebugger, 0, MakeEnableAdaptiveDebugger,
300
+ 18, 1, SetEnableAdaptiveDebugger, "Enable match debugger");
301
+
302
+ make_int_var(MatcherDebugLevel, 0, MakeMatcherDebugLevel,
303
+ 18, 2, SetMatcherDebugLevel, "Matcher Debug Level: ");
304
+
305
+ make_int_var(MatchDebugFlags, 0, MakeMatchDebugFlags,
306
+ 18, 3, SetMatchDebugFlags, "Matcher Debug Flags: ");
307
+
308
+ make_toggle_var(EnableLearning, 1, MakeEnableLearning,
309
+ 18, 4, SetEnableLearning, "Enable learning");
310
+ /* PREV DEFAULT 0 */
311
+ /*record it for multiple pages */
312
+ static int old_enable_learning = 1;
313
+
314
+ make_int_var(LearningDebugLevel, 0, MakeLearningDebugLevel,
315
+ 18, 5, SetLearningDebugLevel, "Learning Debug Level: ");
316
+
317
+ make_float_var(GoodAdaptiveMatch, 0.125, MakeGoodAdaptiveMatch,
318
+ 18, 6, SetGoodAdaptiveMatch, "Good Match (0-1): ");
319
+
320
+ make_float_var(GreatAdaptiveMatch, 0.0, MakeGreatAdaptiveMatch,
321
+ 18, 7, SetGreatAdaptiveMatch, "Great Match (0-1): ");
322
+ /* PREV DEFAULT 0.10 */
323
+
324
+ make_float_var(PerfectRating, 0.02, MakePerfectRating,
325
+ 18, 8, SetPerfectRating, "Perfect Match (0-1): ");
326
+
327
+ make_float_var(BadMatchPad, 0.15, MakeBadMatchPad,
328
+ 18, 9, SetBadMatchPad, "Bad Match Pad (0-1): ");
329
+
330
+ make_float_var(RatingMargin, 0.1, MakeRatingMargin,
331
+ 18, 10, SetRatingMargin, "New template margin (0-1): ");
332
+
333
+ make_float_var(NoiseBlobLength, 12.0, MakeNoiseBlobLength,
334
+ 18, 11, SetNoiseBlobLength, "Avg. noise blob length: ");
335
+
336
+ make_int_var(MinNumPermClasses, 1, MakeMinNumPermClasses,
337
+ 18, 12, SetMinNumPermClasses, "Min # of permanent classes: ");
338
+ /* PREV DEFAULT 200 */
339
+
340
+ make_int_var(ReliableConfigThreshold, 2, MakeReliableConfigThreshold,
341
+ 18, 13, SetReliableConfigThreshold,
342
+ "Reliable Config Threshold: ");
343
+
344
+ make_float_var(MaxAngleDelta, 0.015, MakeMaxAngleDelta,
345
+ 18, 14, SetMaxAngleDelta,
346
+ "Maximum angle delta for proto clustering: ");
347
+
348
+ make_toggle_var(EnableIntFX, 1, MakeEnableIntFX,
349
+ 18, 15, SetEnableIntFX, "Enable integer fx");
350
+ /* PREV DEFAULT 0 */
351
+
352
+ make_toggle_var(EnableNewAdaptRules, 1, MakeEnableNewAdaptRules,
353
+ 18, 16, SetEnableNewAdaptRules,
354
+ "Enable new adaptation rules");
355
+ /* PREV DEFAULT 0 */
356
+
357
+ make_float_var(RatingScale, 1.5, MakeRatingScale,
358
+ 18, 17, SetRatingScale, "Rating scale: ");
359
+
360
+ make_float_var(CertaintyScale, 20.0, MakeCertaintyScale,
361
+ 18, 18, SetCertaintyScale, "CertaintyScale: ");
362
+
363
+ make_int_var(FailedAdaptionsBeforeReset, 150, MakeFailedAdaptionsBeforeReset,
364
+ 18, 19, SetFailedAdaptionsBeforeReset,
365
+ "Number of failed adaptions before adapted templates reset: ");
366
+ double_VAR(tessedit_class_miss_scale, 0.00390625,
367
+ "Scale factor for features not used");
368
+
369
+ int tess_cn_matching = 0;
370
+ int tess_bn_matching = 0;
371
+
372
+ /**----------------------------------------------------------------------------
373
+ Public Code
374
+ ----------------------------------------------------------------------------**/
375
+ /*---------------------------------------------------------------------------*/
376
+ LIST AdaptiveClassifier(TBLOB *Blob, TBLOB *DotBlob, TEXTROW *Row) {
377
+ /*
378
+ ** Parameters:
379
+ ** Blob blob to be classified
380
+ ** DotBlob (obsolete)
381
+ ** Row row of text that word appears in
382
+ ** Globals:
383
+ ** CurrentRatings
384
+ used by compare function for qsort
385
+ ** Operation: This routine calls the adaptive matcher which returns
386
+ ** (in an array) the class id of each class matched. It also
387
+ ** returns the number of classes matched.
388
+ ** For each class matched it places the best rating
389
+ ** found for that class into the Ratings array.
390
+ ** Bad matches are then removed so that they don't need to be
391
+ ** sorted. The remaining good matches are then sorted and
392
+ ** converted to choices.
393
+ ** This routine also performs some simple speckle filtering.
394
+ ** Return: List of choices found by adaptive matcher.
395
+ ** Exceptions: none
396
+ ** History: Mon Mar 11 10:00:58 1991, DSJ, Created.
397
+ */
398
+ LIST Choices;
399
+ ADAPT_RESULTS* Results = new ADAPT_RESULTS;
400
+ LINE_STATS LineStats;
401
+
402
+ if (FailedAdaptionsBeforeReset >= 0 &&
403
+ NumAdaptationsFailed >= FailedAdaptionsBeforeReset) {
404
+ NumAdaptationsFailed = 0;
405
+ ResetAdaptiveClassifier();
406
+ }
407
+ if (AdaptedTemplates == NULL)
408
+ AdaptedTemplates = NewAdaptedTemplates ();
409
+ EnterClassifyMode;
410
+
411
+ Results->BlobLength = MAX_INT32;
412
+ Results->NumMatches = 0;
413
+ Results->BestRating = WORST_POSSIBLE_RATING;
414
+ Results->BestClass = NO_CLASS;
415
+ Results->BestConfig = 0;
416
+ GetLineStatsFromRow(Row, &LineStats);
417
+ InitMatcherRatings (Results->Ratings);
418
+
419
+ DoAdaptiveMatch(Blob, &LineStats, Results);
420
+ RemoveBadMatches(Results);
421
+
422
+ /* save ratings in a global so that CompareCurrentRatings() can see them */
423
+ CurrentRatings = Results->Ratings;
424
+ qsort((void*) (Results->Classes), Results->NumMatches,
425
+ sizeof (CLASS_ID), CompareCurrentRatings);
426
+ RemoveExtraPuncs(Results);
427
+ Choices = ConvertMatchesToChoices(Results);
428
+
429
+ if (MatcherDebugLevel >= 1) {
430
+ cprintf ("AD Matches = ");
431
+ PrintAdaptiveMatchResults(stdout, Results);
432
+ }
433
+
434
+ if (LargeSpeckle (Blob, Row))
435
+ Choices = AddLargeSpeckleTo (Choices);
436
+
437
+ #ifndef GRAPHICS_DISABLED
438
+ if (EnableAdaptiveDebugger)
439
+ DebugAdaptiveClassifier(Blob, &LineStats, Results);
440
+ #endif
441
+
442
+ NumClassesOutput += count (Choices);
443
+ if (Choices == NIL) {
444
+ char empty_lengths[] = {0};
445
+ if (!bln_numericmode)
446
+ tprintf ("Nil classification!\n"); // Should never normally happen.
447
+ return (append_choice (NIL, "", empty_lengths, 50.0f, -20.0f, -1));
448
+ }
449
+
450
+ delete Results;
451
+ return Choices;
452
+ } /* AdaptiveClassifier */
453
+
454
+
455
+ /*---------------------------------------------------------------------------*/
456
+ void AdaptToWord(TWERD *Word,
457
+ TEXTROW *Row,
458
+ const WERD_CHOICE& BestChoice,
459
+ const WERD_CHOICE& BestRawChoice,
460
+ const char *rejmap) {
461
+ /*
462
+ ** Parameters:
463
+ ** Word
464
+ word to be adapted to
465
+ ** Row
466
+ row of text that word is found in
467
+ ** BestChoice
468
+ best choice for word found by system
469
+ ** BestRawChoice
470
+ best choice for word found by classifier only
471
+ ** Globals:
472
+ ** EnableLearning
473
+ TRUE if learning is enabled
474
+ ** Operation: This routine implements a preliminary version of the
475
+ ** rules which are used to decide which characters to adapt to.
476
+ ** A word is adapted to if it is in the dictionary or if it
477
+ ** is a "good" number (no trailing units, etc.). It cannot
478
+ ** contain broken or merged characters. Within that word, only
479
+ ** letters and digits are adapted to (no punctuation).
480
+ ** Return: none
481
+ ** Exceptions: none
482
+ ** History: Thu Mar 14 07:40:36 1991, DSJ, Created.
483
+ */
484
+ TBLOB *Blob;
485
+ LINE_STATS LineStats;
486
+ FLOAT32 Thresholds[MAX_ADAPTABLE_WERD_SIZE];
487
+ FLOAT32 *Threshold;
488
+ const char *map = rejmap;
489
+ char map_char = '1';
490
+ const char* BestChoice_string = BestChoice.string().string();
491
+ const char* BestChoice_lengths = BestChoice.lengths().string();
492
+
493
+ if (strlen(BestChoice_lengths) > MAX_ADAPTABLE_WERD_SIZE)
494
+ return;
495
+
496
+ if (EnableLearning) {
497
+ NumWordsAdaptedTo++;
498
+
499
+ #ifndef SECURE_NAMES
500
+ if (LearningDebugLevel >= 1)
501
+ cprintf ("\n\nAdapting to word = %s\n", BestChoice.string().string());
502
+ #endif
503
+ GetLineStatsFromRow(Row, &LineStats);
504
+
505
+ GetAdaptThresholds(Word,
506
+ &LineStats,
507
+ BestChoice,
508
+ BestRawChoice,
509
+ Thresholds);
510
+
511
+ for (Blob = Word->blobs, Threshold = Thresholds; Blob != NULL;
512
+ Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++),
513
+ Threshold++) {
514
+ InitIntFX();
515
+
516
+ if (rejmap != NULL)
517
+ map_char = *map++;
518
+
519
+ assert (map_char == '1' || map_char == '0');
520
+
521
+ if (map_char == '1') {
522
+
523
+ // if (unicharset.get_isalpha (BestChoice_string, *BestChoice_lengths) ||
524
+ // unicharset.get_isdigit (BestChoice_string, *BestChoice_lengths)) {
525
+ /* SPECIAL RULE: don't adapt to an 'i' which is the first char
526
+ in a word because they are too ambiguous with 'I'.
527
+ The new adaptation rules should account for this
528
+ automatically, since they exclude ambiguous words from
529
+ adaptation, but for safety's sake we'll leave the rule in.
530
+ Also, don't adapt to i's that have only 1 blob in them
531
+ because this creates too much ambiguity for broken
532
+ characters. */
533
+ if (*BestChoice_lengths == 1 &&
534
+ (*BestChoice_string == 'i'
535
+ || (il1_adaption_test && *BestChoice_string == 'I' &&
536
+ (Blob->next == NULL ||
537
+ unicharset.get_islower (BestChoice_string + *BestChoice_lengths,
538
+ *(BestChoice_lengths + 1)))))
539
+ && (Blob == Word->blobs
540
+ || (!(unicharset.get_isalpha (BestChoice_string -
541
+ *(BestChoice_lengths - 1),
542
+ *(BestChoice_lengths - 1)) ||
543
+ unicharset.get_isdigit (BestChoice_string -
544
+ *(BestChoice_lengths - 1),
545
+ *(BestChoice_lengths - 1))))
546
+
547
+ || (!il1_adaption_test && NumOutlinesInBlob(Blob) != 2))) {
548
+ if (LearningDebugLevel >= 1)
549
+ cprintf ("Rejecting char = %s\n", unicharset.id_to_unichar(
550
+ unicharset.unichar_to_id(BestChoice_string,
551
+ *BestChoice_lengths)));
552
+ }
553
+ else {
554
+ #ifndef SECURE_NAMES
555
+ if (LearningDebugLevel >= 1)
556
+ cprintf ("Adapting to char = %s, thr= %g\n",
557
+ unicharset.id_to_unichar(
558
+ unicharset.unichar_to_id(BestChoice_string,
559
+ *BestChoice_lengths)),
560
+ *Threshold);
561
+ #endif
562
+ AdaptToChar(Blob, &LineStats,
563
+ unicharset.unichar_to_id(BestChoice_string,
564
+ *BestChoice_lengths),
565
+ *Threshold);
566
+ }
567
+ // }
568
+ // else
569
+ // AdaptToPunc(Blob, &LineStats,
570
+ // unicharset.unichar_to_id(BestChoice_string,
571
+ // *BestChoice_lengths),
572
+ // *Threshold);
573
+ }
574
+ }
575
+ if (LearningDebugLevel >= 1)
576
+ cprintf ("\n");
577
+ }
578
+ } /* AdaptToWord */
579
+
580
+
581
+ /*---------------------------------------------------------------------------*/
582
+ void EndAdaptiveClassifier() {
583
+ /*
584
+ ** Parameters: none
585
+ ** Globals:
586
+ ** AdaptedTemplates
587
+ current set of adapted templates
588
+ ** SaveAdaptedTemplates
589
+ TRUE if templates should be saved
590
+ ** EnableAdaptiveMatcher
591
+ TRUE if adaptive matcher is enabled
592
+ ** Operation: This routine performs cleanup operations on the
593
+ ** adaptive classifier. It should be called before the
594
+ ** program is terminated. Its main function is to save
595
+ ** the adapted templates to a file.
596
+ ** Return: none
597
+ ** Exceptions: none
598
+ ** History: Tue Mar 19 14:37:06 1991, DSJ, Created.
599
+ */
600
+ char Filename[256];
601
+ FILE *File;
602
+
603
+ #ifndef SECURE_NAMES
604
+ if (EnableAdaptiveMatcher && SaveAdaptedTemplates) {
605
+ strcpy(Filename, imagefile);
606
+ strcat(Filename, ADAPT_TEMPLATE_SUFFIX);
607
+ File = fopen (Filename, "wb");
608
+ if (File == NULL)
609
+ cprintf ("Unable to save adapted templates to %s!\n", Filename);
610
+ else {
611
+ cprintf ("\nSaving adapted templates to %s ...", Filename);
612
+ fflush(stdout);
613
+ WriteAdaptedTemplates(File, AdaptedTemplates);
614
+ cprintf ("\n");
615
+ fclose(File);
616
+ }
617
+ }
618
+ #endif
619
+ if (PreTrainedTemplates == NULL)
620
+ return; // This function isn't safe to run twice.
621
+ EndDangerousAmbigs();
622
+ FreeNormProtos();
623
+ free_int_templates(PreTrainedTemplates);
624
+ PreTrainedTemplates = NULL;
625
+ FreeBitVector(AllProtosOn);
626
+ FreeBitVector(PrunedProtos);
627
+ FreeBitVector(AllConfigsOn);
628
+ FreeBitVector(AllProtosOff);
629
+ FreeBitVector(AllConfigsOff);
630
+ FreeBitVector(TempProtoMask);
631
+ AllProtosOn = NULL;
632
+ PrunedProtos = NULL;
633
+ AllConfigsOn = NULL;
634
+ AllProtosOff = NULL;
635
+ AllConfigsOff = NULL;
636
+ TempProtoMask = NULL;
637
+ } /* EndAdaptiveClassifier */
638
+
639
+
640
+ /*---------------------------------------------------------------------------*/
641
+ void InitAdaptiveClassifier() {
642
+ /*
643
+ ** Parameters: none
644
+ ** Globals:
645
+ ** BuiltInTemplatesFile
646
+ file to get built-in temps from
647
+ ** BuiltInCutoffsFile
648
+ file to get avg. feat per class from
649
+ ** PreTrainedTemplates
650
+ pre-trained configs and protos
651
+ ** AdaptedTemplates
652
+ templates adapted to current page
653
+ ** CharNormCutoffs
654
+ avg # of features per class
655
+ ** AllProtosOn
656
+ dummy proto mask with all bits 1
657
+ ** AllConfigsOn
658
+ dummy config mask with all bits 1
659
+ ** UsePreAdaptedTemplates
660
+ enables use of pre-adapted templates
661
+ ** Operation: This routine reads in the training information needed
662
+ ** by the adaptive classifier and saves it into global
663
+ ** variables.
664
+ ** Return: none
665
+ ** Exceptions: none
666
+ ** History: Mon Mar 11 12:49:34 1991, DSJ, Created.
667
+ */
668
+ int i;
669
+ FILE *File;
670
+ STRING Filename;
671
+
672
+ if (!EnableAdaptiveMatcher)
673
+ return;
674
+ if (PreTrainedTemplates != NULL)
675
+ EndAdaptiveClassifier(); // Don't leak with multiple inits.
676
+
677
+ Filename = language_data_path_prefix;
678
+ Filename += BuiltInTemplatesFile;
679
+ #ifndef SECURE_NAMES
680
+ // cprintf( "\nReading built-in templates from %s ...",
681
+ // Filename);
682
+ fflush(stdout);
683
+ #endif
684
+
685
+ #ifdef __UNIX__
686
+ File = Efopen (Filename.string(), "r");
687
+ #else
688
+ File = Efopen (Filename.string(), "rb");
689
+ #endif
690
+ PreTrainedTemplates = ReadIntTemplates (File, TRUE);
691
+ fclose(File);
692
+
693
+ Filename = language_data_path_prefix;
694
+ Filename += BuiltInCutoffsFile;
695
+ #ifndef SECURE_NAMES
696
+ // cprintf( "\nReading built-in pico-feature cutoffs from %s ...",
697
+ // Filename);
698
+ fflush(stdout);
699
+ #endif
700
+ ReadNewCutoffs (Filename.string(), PreTrainedTemplates->IndexFor,
701
+ CharNormCutoffs);
702
+
703
+ GetNormProtos();
704
+
705
+ InitIntegerMatcher();
706
+ InitIntegerFX();
707
+
708
+ AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
709
+ PrunedProtos = NewBitVector(MAX_NUM_PROTOS);
710
+ AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
711
+ AllProtosOff = NewBitVector(MAX_NUM_PROTOS);
712
+ AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
713
+ TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
714
+ set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
715
+ set_all_bits(PrunedProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
716
+ set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
717
+ zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS));
718
+ zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
719
+
720
+ if (UsePreAdaptedTemplates) {
721
+ Filename = imagefile;
722
+ Filename += ADAPT_TEMPLATE_SUFFIX;
723
+ File = fopen (Filename.string(), "rb");
724
+ if (File == NULL)
725
+ AdaptedTemplates = NewAdaptedTemplates ();
726
+ else {
727
+ #ifndef SECURE_NAMES
728
+ cprintf ("\nReading pre-adapted templates from %s ...", Filename.string());
729
+ fflush(stdout);
730
+ #endif
731
+ AdaptedTemplates = ReadAdaptedTemplates (File);
732
+ cprintf ("\n");
733
+ fclose(File);
734
+ PrintAdaptedTemplates(stdout, AdaptedTemplates);
735
+
736
+ for (i = 0; i < (AdaptedTemplates->Templates)->NumClasses; i++) {
737
+ BaselineCutoffs[i] =
738
+ CharNormCutoffs[PreTrainedTemplates->IndexFor[
739
+ AdaptedTemplates->Templates->ClassIdFor[i]]];
740
+ }
741
+ }
742
+ } else {
743
+ if (AdaptedTemplates != NULL)
744
+ free_adapted_templates(AdaptedTemplates);
745
+ AdaptedTemplates = NewAdaptedTemplates ();
746
+ }
747
+ old_enable_learning = EnableLearning;
748
+
749
+ } /* InitAdaptiveClassifier */
750
+
751
+ void ResetAdaptiveClassifier() {
752
+ free_adapted_templates(AdaptedTemplates);
753
+ AdaptedTemplates = NULL;
754
+ }
755
+
756
+
757
+ /*---------------------------------------------------------------------------*/
758
+ void InitAdaptiveClassifierVars() {
759
+ /*
760
+ ** Parameters: none
761
+ ** Globals: none
762
+ ** Operation: This routine installs the control knobs used by the
763
+ ** adaptive matcher.
764
+ ** Return: none
765
+ ** Exceptions: none
766
+ ** History: Mon Mar 11 12:49:34 1991, DSJ, Created.
767
+ */
768
+ VALUE dummy;
769
+
770
+ string_variable (BuiltInTemplatesFile, "BuiltInTemplatesFile",
771
+ BUILT_IN_TEMPLATES_FILE);
772
+ string_variable (BuiltInCutoffsFile, "BuiltInCutoffsFile",
773
+ BUILT_IN_CUTOFFS_FILE);
774
+
775
+ MakeEnableAdaptiveMatcher();
776
+ MakeUsePreAdaptedTemplates();
777
+ MakeSaveAdaptedTemplates();
778
+
779
+ MakeEnableLearning();
780
+ MakeEnableAdaptiveDebugger();
781
+ MakeBadMatchPad();
782
+ MakeGoodAdaptiveMatch();
783
+ MakeGreatAdaptiveMatch();
784
+ MakeNoiseBlobLength();
785
+ MakeMinNumPermClasses();
786
+ MakeReliableConfigThreshold();
787
+ MakeMaxAngleDelta();
788
+ MakeLearningDebugLevel();
789
+ MakeMatcherDebugLevel();
790
+ MakeMatchDebugFlags();
791
+ MakeRatingMargin();
792
+ MakePerfectRating();
793
+ MakeEnableIntFX();
794
+ MakeEnableNewAdaptRules();
795
+ MakeRatingScale();
796
+ MakeCertaintyScale();
797
+ MakeFailedAdaptionsBeforeReset();
798
+
799
+ InitPicoFXVars();
800
+ InitOutlineFXVars(); //?
801
+
802
+ } /* InitAdaptiveClassifierVars */
803
+
804
+
805
+ /*---------------------------------------------------------------------------*/
806
+ void PrintAdaptiveStatistics(FILE *File) {
807
+ /*
808
+ ** Parameters:
809
+ ** File
810
+ open text file to print adaptive statistics to
811
+ ** Globals: none
812
+ ** Operation: Print to File the statistics which have been gathered
813
+ ** for the adaptive matcher.
814
+ ** Return: none
815
+ ** Exceptions: none
816
+ ** History: Thu Apr 18 14:37:37 1991, DSJ, Created.
817
+ */
818
+ #ifndef SECURE_NAMES
819
+
820
+ fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
821
+ fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
822
+ fprintf (File, "\tNum classes output = %d (Avg = %4.2f)\n",
823
+ NumClassesOutput,
824
+ ((AdaptiveMatcherCalls == 0) ? (0.0) :
825
+ ((float) NumClassesOutput / AdaptiveMatcherCalls)));
826
+ fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
827
+ BaselineClassifierCalls,
828
+ ((BaselineClassifierCalls == 0) ? (0.0) :
829
+ ((float) NumBaselineClassesTried / BaselineClassifierCalls)));
830
+ fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
831
+ CharNormClassifierCalls,
832
+ ((CharNormClassifierCalls == 0) ? (0.0) :
833
+ ((float) NumCharNormClassesTried / CharNormClassifierCalls)));
834
+ fprintf (File, "\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n",
835
+ AmbigClassifierCalls,
836
+ ((AmbigClassifierCalls == 0) ? (0.0) :
837
+ ((float) NumAmbigClassesTried / AmbigClassifierCalls)));
838
+
839
+ fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
840
+ fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
841
+ fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
842
+
843
+ if (UsePreAdaptedTemplates)
844
+ PrintAdaptedTemplates(File, AdaptedTemplates);
845
+ #endif
846
+ } /* PrintAdaptiveStatistics */
847
+
848
+
849
+ /*---------------------------------------------------------------------------*/
850
+ void SettupPass1() {
851
+ /*
852
+ ** Parameters: none
853
+ ** Globals:
854
+ ** EnableLearning
855
+ set to TRUE by this routine
856
+ ** Operation: This routine prepares the adaptive matcher for the start
857
+ ** of the first pass. Learning is enabled (unless it is
858
+ ** disabled for the whole program).
859
+ ** Return: none
860
+ ** Exceptions: none
861
+ ** History: Mon Apr 15 16:39:29 1991, DSJ, Created.
862
+ */
863
+ /* Note: this is somewhat redundant, it simply says that if learning is
864
+ enabled then it will remain enabled on the first pass. If it is
865
+ disabled, then it will remain disabled. This is only put here to
866
+ make it very clear that learning is controlled directly by the global
867
+ setting of EnableLearning. */
868
+ EnableLearning = old_enable_learning;
869
+
870
+ SettupStopperPass1();
871
+
872
+ } /* SettupPass1 */
873
+
874
+
875
+ /*---------------------------------------------------------------------------*/
876
+ void SettupPass2() {
877
+ /*
878
+ ** Parameters: none
879
+ ** Globals:
880
+ ** EnableLearning
881
+ set to FALSE by this routine
882
+ ** Operation: This routine prepares the adaptive matcher for the start
883
+ ** of the second pass. Further learning is disabled.
884
+ ** Return: none
885
+ ** Exceptions: none
886
+ ** History: Mon Apr 15 16:39:29 1991, DSJ, Created.
887
+ */
888
+ EnableLearning = FALSE;
889
+ SettupStopperPass2();
890
+
891
+ } /* SettupPass2 */
892
+
893
+
894
+ /*---------------------------------------------------------------------------*/
895
+ void MakeNewAdaptedClass(TBLOB *Blob,
896
+ LINE_STATS *LineStats,
897
+ CLASS_ID ClassId,
898
+ ADAPT_TEMPLATES Templates) {
899
+ /*
900
+ ** Parameters:
901
+ ** Blob
902
+ blob to model new class after
903
+ ** LineStats
904
+ statistics for text row blob is in
905
+ ** ClassId
906
+ id of new class to be created
907
+ ** Templates
908
+ adapted templates to add new class to
909
+ ** Globals:
910
+ ** AllProtosOn
911
+ dummy mask with all 1's
912
+ ** BaselineCutoffs
913
+ kludge needed to get cutoffs
914
+ ** PreTrainedTemplates
915
+ kludge needed to get cutoffs
916
+ ** Operation: This routine creates a new adapted class and uses Blob
917
+ ** as the model for the first config in that class.
918
+ ** Return: none
919
+ ** Exceptions: none
920
+ ** History: Thu Mar 14 12:49:39 1991, DSJ, Created.
921
+ */
922
+ FEATURE_SET Features;
923
+ int Fid, Pid;
924
+ FEATURE Feature;
925
+ int NumFeatures;
926
+ TEMP_PROTO TempProto;
927
+ PROTO Proto;
928
+ ADAPT_CLASS Class;
929
+ INT_CLASS IClass;
930
+ CLASS_INDEX ClassIndex;
931
+ TEMP_CONFIG Config;
932
+
933
+ NormMethod = baseline;
934
+ Features = ExtractOutlineFeatures (Blob, LineStats);
935
+ NumFeatures = Features->NumFeatures;
936
+ if (NumFeatures > UNLIKELY_NUM_FEAT) {
937
+ FreeFeatureSet(Features);
938
+ return;
939
+ }
940
+
941
+ Class = NewAdaptedClass ();
942
+ ClassIndex = AddAdaptedClass (Templates, Class, ClassId);
943
+ Config = NewTempConfig (NumFeatures - 1);
944
+ TempConfigFor (Class, 0) = Config;
945
+
946
+ /* this is a kludge to construct cutoffs for adapted templates */
947
+ if (Templates == AdaptedTemplates)
948
+ BaselineCutoffs[ClassIndex] =
949
+ CharNormCutoffs[PreTrainedTemplates->IndexFor[ClassId]];
950
+
951
+ IClass = ClassForClassId (Templates->Templates, ClassId);
952
+
953
+ for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
954
+ Pid = AddIntProto (IClass);
955
+ assert (Pid != NO_PROTO);
956
+
957
+ Feature = Features->Features[Fid];
958
+ TempProto = NewTempProto ();
959
+ Proto = &(TempProto->Proto);
960
+
961
+ /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
962
+ ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
963
+ instead of the -0.25 to 0.75 used in baseline normalization */
964
+ Proto->Angle = Feature->Params[OutlineFeatDir];
965
+ Proto->X = Feature->Params[OutlineFeatX];
966
+ Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
967
+ Proto->Length = Feature->Params[OutlineFeatLength];
968
+ FillABC(Proto);
969
+
970
+ TempProto->ProtoId = Pid;
971
+ SET_BIT (Config->Protos, Pid);
972
+
973
+ ConvertProto(Proto, Pid, IClass);
974
+ AddProtoToProtoPruner(Proto, Pid, IClass);
975
+
976
+ Class->TempProtos = push (Class->TempProtos, TempProto);
977
+ }
978
+ FreeFeatureSet(Features);
979
+
980
+ AddIntConfig(IClass);
981
+ ConvertConfig (AllProtosOn, 0, IClass);
982
+
983
+ if (LearningDebugLevel >= 1) {
984
+ cprintf ("Added new class '%s' with index %d and %d protos.\n",
985
+ unicharset.id_to_unichar(ClassId), ClassIndex, NumFeatures);
986
+ }
987
+ } /* MakeNewAdaptedClass */
988
+
989
+
990
+ /*---------------------------------------------------------------------------*/
991
+ int GetAdaptiveFeatures(TBLOB *Blob,
992
+ LINE_STATS *LineStats,
993
+ INT_FEATURE_ARRAY IntFeatures,
994
+ FEATURE_SET *FloatFeatures) {
995
+ /*
996
+ ** Parameters:
997
+ ** Blob
998
+ blob to extract features from
999
+ ** LineStats
1000
+ statistics about text row blob is in
1001
+ ** IntFeatures
1002
+ array to fill with integer features
1003
+ ** FloatFeatures
1004
+ place to return actual floating-pt features
1005
+ ** Globals: none
1006
+ ** Operation: This routine sets up the feature extractor to extract
1007
+ ** baseline normalized pico-features.
1008
+ ** The extracted pico-features are converted
1009
+ ** to integer form and placed in IntFeatures. The original
1010
+ ** floating-pt. features are returned in FloatFeatures.
1011
+ ** Return: Number of pico-features returned (0 if an error occurred)
1012
+ ** Exceptions: none
1013
+ ** History: Tue Mar 12 17:55:18 1991, DSJ, Created.
1014
+ */
1015
+ FEATURE_SET Features;
1016
+ int NumFeatures;
1017
+
1018
+ NormMethod = baseline;
1019
+ Features = ExtractPicoFeatures (Blob, LineStats);
1020
+
1021
+ NumFeatures = Features->NumFeatures;
1022
+ if (NumFeatures > UNLIKELY_NUM_FEAT) {
1023
+ FreeFeatureSet(Features);
1024
+ return (0);
1025
+ }
1026
+
1027
+ ComputeIntFeatures(Features, IntFeatures);
1028
+ *FloatFeatures = Features;
1029
+
1030
+ return (NumFeatures);
1031
+
1032
+ } /* GetAdaptiveFeatures */
1033
+
1034
+
1035
+ /**----------------------------------------------------------------------------
1036
+ Private Code
1037
+ ----------------------------------------------------------------------------**/
1038
+ /*---------------------------------------------------------------------------*/
1039
+ int AdaptableWord(TWERD *Word,
1040
+ const char *BestChoice,
1041
+ const char *BestChoice_lengths,
1042
+ const char *BestRawChoice,
1043
+ const char *BestRawChoice_lengths) {
1044
+ /*
1045
+ ** Parameters:
1046
+ ** Word
1047
+ current word
1048
+ ** BestChoice
1049
+ best overall choice for word with context
1050
+ ** BestRawChoice
1051
+ best choice for word without context
1052
+ ** Globals: none
1053
+ ** Operation: Return TRUE if the specified word is acceptable for
1054
+ ** adaptation.
1055
+ ** Return: TRUE or FALSE
1056
+ ** Exceptions: none
1057
+ ** History: Thu May 30 14:25:06 1991, DSJ, Created.
1058
+ */
1059
+ int BestChoiceLength;
1060
+
1061
+ return ( /* rules that apply in general - simplest to compute first */
1062
+ /* EnableLearning && */
1063
+ /* new rules */
1064
+ BestChoice != NULL && BestRawChoice != NULL && Word != NULL &&
1065
+ (BestChoiceLength = strlen (BestChoice_lengths)) > 0 &&
1066
+ BestChoiceLength == NumBlobsIn (Word) &&
1067
+ BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && (
1068
+ (EnableNewAdaptRules
1069
+ &&
1070
+ CurrentBestChoiceAdjustFactor
1071
+ ()
1072
+ <=
1073
+ ADAPTABLE_WERD
1074
+ &&
1075
+ AlternativeChoicesWorseThan
1076
+ (ADAPTABLE_WERD)
1077
+ &&
1078
+ CurrentBestChoiceIs
1079
+ (BestChoice, BestChoice_lengths))
1080
+ ||
1081
+ /* old rules */
1082
+ (!EnableNewAdaptRules
1083
+ &&
1084
+ BestChoiceLength
1085
+ ==
1086
+ strlen
1087
+ (BestRawChoice_lengths)
1088
+ &&
1089
+ ((valid_word (BestChoice) && case_ok (BestChoice, BestChoice_lengths)) || (valid_number (BestChoice, BestChoice_lengths) && pure_number (BestChoice, BestChoice_lengths))) && punctuation_ok (BestChoice, BestChoice_lengths) != -1 && punctuation_ok (BestChoice, BestChoice_lengths) <= 1)));
1090
+
1091
+ } /* AdaptableWord */
1092
+
1093
+
1094
+ /*---------------------------------------------------------------------------*/
1095
+ void AdaptToChar(TBLOB *Blob,
1096
+ LINE_STATS *LineStats,
1097
+ CLASS_ID ClassId,
1098
+ FLOAT32 Threshold) {
1099
+ /*
1100
+ ** Parameters:
1101
+ ** Blob
1102
+ blob to add to templates for ClassId
1103
+ ** LineStats
1104
+ statistics about text line blob is in
1105
+ ** ClassId
1106
+ class to add blob to
1107
+ ** Threshold
1108
+ minimum match rating to existing template
1109
+ ** Globals:
1110
+ ** AdaptedTemplates
1111
+ current set of adapted templates
1112
+ ** AllProtosOn
1113
+ dummy mask to match against all protos
1114
+ ** AllConfigsOn
1115
+ dummy mask to match against all configs
1116
+ ** Operation:
1117
+ ** Return: none
1118
+ ** Exceptions: none
1119
+ ** History: Thu Mar 14 09:36:03 1991, DSJ, Created.
1120
+ */
1121
+ int NumFeatures;
1122
+ INT_FEATURE_ARRAY IntFeatures;
1123
+ INT_RESULT_STRUCT IntResult;
1124
+ CLASS_INDEX ClassIndex;
1125
+ INT_CLASS IClass;
1126
+ ADAPT_CLASS Class;
1127
+ TEMP_CONFIG TempConfig;
1128
+ FEATURE_SET FloatFeatures;
1129
+ int NewTempConfigId;
1130
+
1131
+ NumCharsAdaptedTo++;
1132
+ if (!LegalClassId (ClassId))
1133
+ return;
1134
+
1135
+ if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
1136
+ MakeNewAdaptedClass(Blob, LineStats, ClassId, AdaptedTemplates);
1137
+ }
1138
+ else {
1139
+ IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId);
1140
+ ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
1141
+ Class = AdaptedTemplates->Class[ClassIndex];
1142
+
1143
+ NumFeatures = GetAdaptiveFeatures (Blob, LineStats,
1144
+ IntFeatures, &FloatFeatures);
1145
+ if (NumFeatures <= 0)
1146
+ return;
1147
+
1148
+ SetBaseLineMatch();
1149
+ IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
1150
+ NumFeatures, NumFeatures, IntFeatures, 0,
1151
+ &IntResult, NO_DEBUG);
1152
+
1153
+ SetAdaptiveThreshold(Threshold);
1154
+
1155
+ if (IntResult.Rating <= Threshold) {
1156
+ if (ConfigIsPermanent (Class, IntResult.Config)) {
1157
+ if (LearningDebugLevel >= 1)
1158
+ cprintf ("Found good match to perm config %d = %4.1f%%.\n",
1159
+ IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
1160
+ FreeFeatureSet(FloatFeatures);
1161
+ return;
1162
+ }
1163
+
1164
+ TempConfig = TempConfigFor (Class, IntResult.Config);
1165
+ IncreaseConfidence(TempConfig);
1166
+ if (LearningDebugLevel >= 1)
1167
+ cprintf ("Increasing reliability of temp config %d to %d.\n",
1168
+ IntResult.Config, TempConfig->NumTimesSeen);
1169
+
1170
+ if (TempConfigReliable (TempConfig))
1171
+ MakePermanent (AdaptedTemplates, ClassId, IntResult.Config,
1172
+ Blob, LineStats);
1173
+ }
1174
+ else {
1175
+ if (LearningDebugLevel >= 1)
1176
+ cprintf ("Found poor match to temp config %d = %4.1f%%.\n",
1177
+ IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
1178
+ NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates,
1179
+ ClassId,
1180
+ NumFeatures,
1181
+ IntFeatures,
1182
+ FloatFeatures);
1183
+
1184
+ if (NewTempConfigId >= 0 &&
1185
+ TempConfigReliable (TempConfigFor (Class, NewTempConfigId)))
1186
+ MakePermanent (AdaptedTemplates, ClassId, NewTempConfigId,
1187
+ Blob, LineStats);
1188
+
1189
+ #ifndef GRAPHICS_DISABLED
1190
+ if (LearningDebugLevel >= 1) {
1191
+ IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
1192
+ NumFeatures, NumFeatures, IntFeatures, 0,
1193
+ &IntResult, NO_DEBUG);
1194
+ cprintf ("Best match to temp config %d = %4.1f%%.\n",
1195
+ IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
1196
+ if (LearningDebugLevel >= 2) {
1197
+ uinT32 ConfigMask;
1198
+ ConfigMask = 1 << IntResult.Config;
1199
+ ShowMatchDisplay();
1200
+ IntegerMatcher (IClass, AllProtosOn, (BIT_VECTOR)&ConfigMask,
1201
+ NumFeatures, NumFeatures, IntFeatures, 0,
1202
+ &IntResult, 6 | 0x19);
1203
+ UpdateMatchDisplay();
1204
+ GetClassToDebug ("Adapting");
1205
+ }
1206
+ }
1207
+ #endif // GRAPHICS_DISABLED
1208
+ }
1209
+ FreeFeatureSet(FloatFeatures);
1210
+ }
1211
+ } /* AdaptToChar */
1212
+
1213
+
1214
+ /*---------------------------------------------------------------------------*/
1215
+ void AdaptToPunc(TBLOB *Blob,
1216
+ LINE_STATS *LineStats,
1217
+ CLASS_ID ClassId,
1218
+ FLOAT32 Threshold) {
1219
+ /*
1220
+ ** Parameters:
1221
+ ** Blob
1222
+ blob to add to templates for ClassId
1223
+ ** LineStats
1224
+ statistics about text line blob is in
1225
+ ** ClassId
1226
+ class to add blob to
1227
+ ** Threshold
1228
+ minimum match rating to existing template
1229
+ ** Globals:
1230
+ ** PreTrainedTemplates
1231
+ current set of built-in templates
1232
+ ** Operation:
1233
+ ** Return: none
1234
+ ** Exceptions: none
1235
+ ** History: Thu Mar 14 09:36:03 1991, DSJ, Created.
1236
+ */
1237
+ ADAPT_RESULTS Results;
1238
+ int i;
1239
+
1240
+ Results.BlobLength = MAX_INT32;
1241
+ Results.NumMatches = 0;
1242
+ Results.BestRating = WORST_POSSIBLE_RATING;
1243
+ Results.BestClass = NO_CLASS;
1244
+ Results.BestConfig = 0;
1245
+ InitMatcherRatings (Results.Ratings);
1246
+ CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
1247
+ RemoveBadMatches(&Results);
1248
+
1249
+ if (Results.NumMatches != 1) {
1250
+ if (LearningDebugLevel >= 1) {
1251
+ cprintf ("Rejecting punc = %s (Alternatives = ",
1252
+ unicharset.id_to_unichar(ClassId));
1253
+
1254
+ for (i = 0; i < Results.NumMatches; i++)
1255
+ cprintf ("%s", unicharset.id_to_unichar(Results.Classes[i]));
1256
+ cprintf (")\n");
1257
+ }
1258
+ return;
1259
+ }
1260
+
1261
+ #ifndef SECURE_NAMES
1262
+ if (LearningDebugLevel >= 1)
1263
+ cprintf ("Adapting to punc = %s, thr= %g\n",
1264
+ unicharset.id_to_unichar(ClassId), Threshold);
1265
+ #endif
1266
+ AdaptToChar(Blob, LineStats, ClassId, Threshold);
1267
+
1268
+ } /* AdaptToPunc */
1269
+
1270
+
1271
+ /*---------------------------------------------------------------------------*/
1272
+ void AddNewResult(ADAPT_RESULTS *Results,
1273
+ CLASS_ID ClassId,
1274
+ FLOAT32 Rating,
1275
+ int ConfigId) {
1276
+ /*
1277
+ ** Parameters:
1278
+ ** Results
1279
+ results to add new result to
1280
+ ** ClassId
1281
+ class of new result
1282
+ ** Rating
1283
+ rating of new result
1284
+ ** ConfigId
1285
+ config id of new result
1286
+ ** Globals:
1287
+ ** BadMatchPad
1288
+ defines limits of an acceptable match
1289
+ ** Operation: This routine adds the result of a classification into
1290
+ ** Results. If the new rating is much worse than the current
1291
+ ** best rating, it is not entered into results because it
1292
+ ** would end up being stripped later anyway. If the new rating
1293
+ ** is better than the old rating for the class, it replaces the
1294
+ ** old rating. If this is the first rating for the class, the
1295
+ ** class is added to the list of matched classes in Results.
1296
+ ** If the new rating is better than the best so far, it
1297
+ ** becomes the best so far.
1298
+ ** Return: none
1299
+ ** Exceptions: none
1300
+ ** History: Tue Mar 12 18:19:29 1991, DSJ, Created.
1301
+ */
1302
+ FLOAT32 OldRating;
1303
+ INT_CLASS_STRUCT* CharClass = NULL;
1304
+
1305
+ OldRating = Results->Ratings[ClassId];
1306
+ if (Rating <= Results->BestRating + BadMatchPad && Rating < OldRating) {
1307
+ Results->Ratings[ClassId] = Rating;
1308
+ if (ClassId != NO_CLASS)
1309
+ CharClass = ClassForClassId(PreTrainedTemplates, ClassId);
1310
+ if (CharClass != NULL && CharClass->NumConfigs == 32)
1311
+ Results->Configs[ClassId] = ConfigId;
1312
+ else
1313
+ Results->Configs[ClassId] = ~0;
1314
+
1315
+ if (Rating < Results->BestRating) {
1316
+ Results->BestRating = Rating;
1317
+ Results->BestClass = ClassId;
1318
+ Results->BestConfig = ConfigId;
1319
+ }
1320
+
1321
+ /* if this is first rating for class, add to list of classes matched */
1322
+ if (OldRating == WORST_POSSIBLE_RATING)
1323
+ Results->Classes[Results->NumMatches++] = ClassId;
1324
+ }
1325
+ } /* AddNewResult */
1326
+
1327
+
1328
+ /*---------------------------------------------------------------------------*/
1329
+ void AmbigClassifier(TBLOB *Blob,
1330
+ LINE_STATS *LineStats,
1331
+ INT_TEMPLATES Templates,
1332
+ UNICHAR_ID *Ambiguities,
1333
+ ADAPT_RESULTS *Results) {
1334
+ /*
1335
+ ** Parameters:
1336
+ ** Blob
1337
+ blob to be classified
1338
+ ** LineStats
1339
+ statistics for text line Blob is in
1340
+ ** Templates
1341
+ built-in templates to classify against
1342
+ ** Ambiguities
1343
+ array of class id's to match against
1344
+ ** Results
1345
+ place to put match results
1346
+ ** Globals:
1347
+ ** AllProtosOn
1348
+ mask that enables all protos
1349
+ ** AllConfigsOn
1350
+ mask that enables all configs
1351
+ ** Operation: This routine is identical to CharNormClassifier()
1352
+ ** except that it does no class pruning. It simply matches
1353
+ ** the unknown blob against the classes listed in
1354
+ ** Ambiguities.
1355
+ ** Return: none
1356
+ ** Exceptions: none
1357
+ ** History: Tue Mar 12 19:40:36 1991, DSJ, Created.
1358
+ */
1359
+ int NumFeatures;
1360
+ INT_FEATURE_ARRAY IntFeatures;
1361
+ CLASS_NORMALIZATION_ARRAY CharNormArray;
1362
+ INT_RESULT_STRUCT IntResult;
1363
+ CLASS_ID ClassId;
1364
+ CLASS_INDEX ClassIndex;
1365
+
1366
+ AmbigClassifierCalls++;
1367
+
1368
+ NumFeatures = GetCharNormFeatures (Blob, LineStats,
1369
+ Templates,
1370
+ IntFeatures, CharNormArray,
1371
+ &(Results->BlobLength));
1372
+ if (NumFeatures <= 0)
1373
+ return;
1374
+
1375
+ if (MatcherDebugLevel >= 2)
1376
+ cprintf ("AM Matches = ");
1377
+
1378
+ while (*Ambiguities >= 0) {
1379
+ ClassId = *Ambiguities;
1380
+ ClassIndex = Templates->IndexFor[ClassId];
1381
+
1382
+ SetCharNormMatch();
1383
+ IntegerMatcher (ClassForClassId (Templates, ClassId),
1384
+ AllProtosOn, AllConfigsOn,
1385
+ Results->BlobLength, NumFeatures, IntFeatures,
1386
+ CharNormArray[ClassIndex], &IntResult, NO_DEBUG);
1387
+
1388
+ if (MatcherDebugLevel >= 2)
1389
+ cprintf ("%s-%-2d %2.0f ", unicharset.id_to_unichar(ClassId),
1390
+ IntResult.Config,
1391
+ IntResult.Rating * 100.0);
1392
+
1393
+ AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
1394
+
1395
+ Ambiguities++;
1396
+
1397
+ NumAmbigClassesTried++;
1398
+ }
1399
+ if (MatcherDebugLevel >= 2)
1400
+ cprintf ("\n");
1401
+
1402
+ } /* AmbigClassifier */
1403
+
1404
+ /*---------------------------------------------------------------------------*/
1405
+ // Factored-out calls to IntegerMatcher based on class pruner results.
1406
+ // Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.
1407
+ void MasterMatcher(INT_TEMPLATES templates,
1408
+ inT16 num_features,
1409
+ INT_FEATURE_ARRAY features,
1410
+ CLASS_NORMALIZATION_ARRAY norm_factors,
1411
+ ADAPT_CLASS* classes,
1412
+ int debug,
1413
+ int num_classes,
1414
+ CLASS_PRUNER_RESULTS results,
1415
+ ADAPT_RESULTS* final_results) {
1416
+ for (int c = 0; c < num_classes; c++) {
1417
+ CLASS_ID class_id = results[c].Class;
1418
+ INT_RESULT_STRUCT& int_result = results[c].IMResult;
1419
+ CLASS_INDEX class_index = templates->IndexFor[class_id];
1420
+ BIT_VECTOR protos = classes != NULL ? classes[class_index]->PermProtos
1421
+ : AllProtosOn;
1422
+ BIT_VECTOR configs = classes != NULL ? classes[class_index]->PermConfigs
1423
+ : AllConfigsOn;
1424
+
1425
+ IntegerMatcher(ClassForClassId(templates, class_id),
1426
+ protos, configs, final_results->BlobLength,
1427
+ num_features, features, norm_factors[class_index],
1428
+ &int_result, NO_DEBUG);
1429
+ // Compute class feature corrections.
1430
+ double miss_penalty = tessedit_class_miss_scale *
1431
+ int_result.FeatureMisses;
1432
+ if (MatcherDebugLevel >= 2 || display_ratings > 1) {
1433
+ cprintf("%s-%-2d %2.1f(CP%2.1f, IM%2.1f + MP%2.1f) ",
1434
+ unicharset.id_to_unichar(class_id), int_result.Config,
1435
+ (int_result.Rating + miss_penalty) * 100.0,
1436
+ results[c].Rating * 100.0,
1437
+ int_result.Rating * 100.0, miss_penalty * 100.0);
1438
+ if (c % 4 == 3)
1439
+ cprintf ("\n");
1440
+ }
1441
+ int_result.Rating += miss_penalty;
1442
+ if (int_result.Rating > WORST_POSSIBLE_RATING)
1443
+ int_result.Rating = WORST_POSSIBLE_RATING;
1444
+ AddNewResult(final_results, class_id, int_result.Rating, int_result.Config);
1445
+ }
1446
+ if (MatcherDebugLevel >= 2 || display_ratings > 1)
1447
+ cprintf("\n");
1448
+ }
1449
+
1450
+ /*---------------------------------------------------------------------------*/
1451
+ UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
1452
+ LINE_STATS *LineStats,
1453
+ ADAPT_TEMPLATES Templates,
1454
+ ADAPT_RESULTS *Results) {
1455
+ /*
1456
+ ** Parameters:
1457
+ ** Blob
1458
+ blob to be classified
1459
+ ** LineStats
1460
+ statistics for text line Blob is in
1461
+ ** Templates
1462
+ current set of adapted templates
1463
+ ** Results
1464
+ place to put match results
1465
+ ** Globals:
1466
+ ** BaselineCutoffs
1467
+ expected num features for each class
1468
+ ** Operation: This routine extracts baseline normalized features
1469
+ ** from the unknown character and matches them against the
1470
+ ** specified set of templates. The classes which match
1471
+ ** are added to Results.
1472
+ ** Return: Array of possible ambiguous chars that should be checked.
1473
+ ** Exceptions: none
1474
+ ** History: Tue Mar 12 19:38:03 1991, DSJ, Created.
1475
+ */
1476
+ int NumFeatures;
1477
+ int NumClasses;
1478
+ INT_FEATURE_ARRAY IntFeatures;
1479
+ CLASS_NORMALIZATION_ARRAY CharNormArray;
1480
+ CLASS_ID ClassId;
1481
+ CLASS_INDEX ClassIndex;
1482
+
1483
+ BaselineClassifierCalls++;
1484
+
1485
+ NumFeatures = GetBaselineFeatures (Blob, LineStats,
1486
+ Templates->Templates,
1487
+ IntFeatures, CharNormArray,
1488
+ &(Results->BlobLength));
1489
+ if (NumFeatures <= 0)
1490
+ return NULL;
1491
+
1492
+ NumClasses = ClassPruner (Templates->Templates, NumFeatures,
1493
+ IntFeatures, CharNormArray,
1494
+ BaselineCutoffs, Results->CPResults,
1495
+ MatchDebugFlags);
1496
+
1497
+ NumBaselineClassesTried += NumClasses;
1498
+
1499
+ if (MatcherDebugLevel >= 2 || display_ratings > 1)
1500
+ cprintf ("BL Matches = ");
1501
+
1502
+ SetBaseLineMatch();
1503
+ MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray,
1504
+ Templates->Class, MatchDebugFlags, NumClasses,
1505
+ Results->CPResults, Results);
1506
+
1507
+ ClassId = Results->BestClass;
1508
+ if (ClassId == NO_CLASS)
1509
+ return (NULL);
1510
+ /* this is a bug - maybe should return "" */
1511
+
1512
+ ClassIndex = Templates->Templates->IndexFor[ClassId];
1513
+ return (Templates->Class[ClassIndex]->
1514
+ Config[Results->BestConfig].Perm);
1515
+ } /* BaselineClassifier */
1516
+
1517
+
1518
+ /*---------------------------------------------------------------------------*/
1519
+ void CharNormClassifier(TBLOB *Blob,
1520
+ LINE_STATS *LineStats,
1521
+ INT_TEMPLATES Templates,
1522
+ ADAPT_RESULTS *Results) {
1523
+ /*
1524
+ ** Parameters:
1525
+ ** Blob
1526
+ blob to be classified
1527
+ ** LineStats
1528
+ statistics for text line Blob is in
1529
+ ** Templates
1530
+ templates to classify unknown against
1531
+ ** Results
1532
+ place to put match results
1533
+ ** Globals:
1534
+ ** CharNormCutoffs
1535
+ expected num features for each class
1536
+ ** AllProtosOn
1537
+ mask that enables all protos
1538
+ ** AllConfigsOn
1539
+ mask that enables all configs
1540
+ ** Operation: This routine extracts character normalized features
1541
+ ** from the unknown character and matches them against the
1542
+ ** specified set of templates. The classes which match
1543
+ ** are added to Results.
1544
+ ** Return: none
1545
+ ** Exceptions: none
1546
+ ** History: Tue Mar 12 16:02:52 1991, DSJ, Created.
1547
+ */
1548
+ int NumFeatures;
1549
+ int NumClasses;
1550
+ INT_FEATURE_ARRAY IntFeatures;
1551
+ CLASS_NORMALIZATION_ARRAY CharNormArray;
1552
+
1553
+ CharNormClassifierCalls++;
1554
+
1555
+ NumFeatures = GetCharNormFeatures(Blob, LineStats,
1556
+ Templates,
1557
+ IntFeatures, CharNormArray,
1558
+ &(Results->BlobLength));
1559
+ if (NumFeatures <= 0)
1560
+ return;
1561
+
1562
+ NumClasses = ClassPruner(Templates, NumFeatures,
1563
+ IntFeatures, CharNormArray,
1564
+ CharNormCutoffs, Results->CPResults,
1565
+ MatchDebugFlags);
1566
+
1567
+ if (tessedit_single_match && NumClasses > 1)
1568
+ NumClasses = 1;
1569
+ NumCharNormClassesTried += NumClasses;
1570
+
1571
+ if (MatcherDebugLevel >= 2 || display_ratings > 1)
1572
+ cprintf("CN Matches = ");
1573
+
1574
+ SetCharNormMatch();
1575
+ MasterMatcher(Templates, NumFeatures, IntFeatures, CharNormArray,
1576
+ NULL, MatchDebugFlags, NumClasses,
1577
+ Results->CPResults, Results);
1578
+ } /* CharNormClassifier */
1579
+
1580
+
1581
+ /*---------------------------------------------------------------------------*/
1582
+ void ClassifyAsNoise(TBLOB *Blob,
1583
+ LINE_STATS *LineStats,
1584
+ ADAPT_RESULTS *Results) {
1585
+ /*
1586
+ ** Parameters:
1587
+ ** Blob
1588
+ blob to be classified
1589
+ ** LineStats
1590
+ statistics for text line Blob is in
1591
+ ** Results
1592
+ results to add noise classification to
1593
+ ** Globals:
1594
+ ** NoiseBlobLength
1595
+ avg. length of a noise blob
1596
+ ** Operation: This routine computes a rating which reflects the
1597
+ ** likelihood that the blob being classified is a noise
1598
+ ** blob. NOTE: assumes that the blob length has already been
1599
+ ** computed and placed into Results.
1600
+ ** Return: none
1601
+ ** Exceptions: none
1602
+ ** History: Tue Mar 12 18:36:52 1991, DSJ, Created.
1603
+ */
1604
+ register FLOAT32 Rating;
1605
+
1606
+ Rating = Results->BlobLength / NoiseBlobLength;
1607
+ Rating *= Rating;
1608
+ Rating /= 1.0 + Rating;
1609
+
1610
+ AddNewResult (Results, NO_CLASS, Rating, 0);
1611
+ } /* ClassifyAsNoise */
1612
+
1613
+
1614
+ /*---------------------------------------------------------------------------*/
1615
+ int CompareCurrentRatings( //CLASS_ID *Class1,
1616
+ const void *arg1,
1617
+ const void *arg2) { //CLASS_ID *Class2)
1618
+ /*
1619
+ ** Parameters:
1620
+ ** Class1, Class2
1621
+ classes whose ratings are to be compared
1622
+ ** Globals:
1623
+ ** CurrentRatings
1624
+ contains actual ratings for each class
1625
+ ** Operation: This routine gets the ratings for the 2 specified classes
1626
+ ** from a global variable (CurrentRatings) and returns:
1627
+ ** -1 if Rating1 < Rating2
1628
+ ** 0 if Rating1 = Rating2
1629
+ ** 1 if Rating1 > Rating2
1630
+ ** Return: Order of classes based on their ratings (see above).
1631
+ ** Exceptions: none
1632
+ ** History: Tue Mar 12 14:18:31 1991, DSJ, Created.
1633
+ */
1634
+ FLOAT32 Rating1, Rating2;
1635
+ CLASS_ID *Class1 = (CLASS_ID *) arg1;
1636
+ CLASS_ID *Class2 = (CLASS_ID *) arg2;
1637
+
1638
+ Rating1 = CurrentRatings[*Class1];
1639
+ Rating2 = CurrentRatings[*Class2];
1640
+
1641
+ if (Rating1 < Rating2)
1642
+ return (-1);
1643
+ else if (Rating1 > Rating2)
1644
+ return (1);
1645
+ else
1646
+ return (0);
1647
+
1648
+ } /* CompareCurrentRatings */
1649
+
1650
+
1651
+ /*---------------------------------------------------------------------------*/
1652
+ LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results) {
1653
+ /*
1654
+ ** Parameters:
1655
+ ** Results
1656
+ adaptive matcher results to convert to choices
1657
+ ** Globals: none
1658
+ ** Operation: This routine creates a choice for each matching class
1659
+ ** in Results (up to MAX_MATCHES) and returns a list of
1660
+ ** these choices. The match
1661
+ ** ratings are converted to be the ratings and certainties
1662
+ ** as used by the context checkers.
1663
+ ** Return: List of choices.
1664
+ ** Exceptions: none
1665
+ ** History: Tue Mar 12 08:55:37 1991, DSJ, Created.
1666
+ */
1667
+ int i;
1668
+ LIST Choices;
1669
+ CLASS_ID NextMatch;
1670
+ FLOAT32 Rating;
1671
+ FLOAT32 Certainty;
1672
+ const char *NextMatch_unichar;
1673
+ char choice_lengths[2] = {0, 0};
1674
+
1675
+ if (Results->NumMatches > MAX_MATCHES)
1676
+ Results->NumMatches = MAX_MATCHES;
1677
+
1678
+ for (Choices = NIL, i = 0; i < Results->NumMatches; i++) {
1679
+ NextMatch = Results->Classes[i];
1680
+ Rating = Certainty = Results->Ratings[NextMatch];
1681
+ Rating *= RatingScale * Results->BlobLength;
1682
+ Certainty *= -CertaintyScale;
1683
+ if (NextMatch != NO_CLASS)
1684
+ NextMatch_unichar = unicharset.id_to_unichar(NextMatch);
1685
+ else
1686
+ NextMatch_unichar = "";
1687
+ choice_lengths[0] = strlen(NextMatch_unichar);
1688
+ Choices = append_choice (Choices,
1689
+ NextMatch_unichar,
1690
+ choice_lengths,
1691
+ Rating, Certainty,
1692
+ Results->Configs[NextMatch],
1693
+ unicharset.get_script(NextMatch));
1694
+ }
1695
+ return (Choices);
1696
+
1697
+ } /* ConvertMatchesToChoices */
1698
+
1699
+
1700
+ /*---------------------------------------------------------------------------*/
1701
+ #ifndef GRAPHICS_DISABLED
1702
+ void DebugAdaptiveClassifier(TBLOB *Blob,
1703
+ LINE_STATS *LineStats,
1704
+ ADAPT_RESULTS *Results) {
1705
+ /*
1706
+ ** Parameters:
1707
+ ** Blob
1708
+ blob whose classification is being debugged
1709
+ ** LineStats
1710
+ statistics for text line blob is in
1711
+ ** Results
1712
+ results of match being debugged
1713
+ ** Globals: none
1714
+ ** Operation:
1715
+ ** Return: none
1716
+ ** Exceptions: none
1717
+ ** History: Wed Mar 13 16:44:41 1991, DSJ, Created.
1718
+ */
1719
+ const char *Prompt =
1720
+ "Left-click in IntegerMatch Window to continue or right click to debug...";
1721
+ const char *DebugMode = "All Templates";
1722
+ CLASS_ID LastClass = Results->BestClass;
1723
+ CLASS_ID ClassId;
1724
+ BOOL8 AdaptiveOn = TRUE;
1725
+ BOOL8 PreTrainedOn = TRUE;
1726
+
1727
+ ShowMatchDisplay();
1728
+ cprintf ("\nDebugging class = %s (%s) ...\n",
1729
+ unicharset.id_to_unichar(LastClass), DebugMode);
1730
+ ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
1731
+ UpdateMatchDisplay();
1732
+
1733
+ while ((ClassId = GetClassToDebug (Prompt)) != 0) {
1734
+ #if 0
1735
+ switch (ClassId) {
1736
+ case 'b':
1737
+ AdaptiveOn = TRUE;
1738
+ PreTrainedOn = FALSE;
1739
+ DebugMode = "Adaptive Templates Only";
1740
+ break;
1741
+
1742
+ case 'c':
1743
+ AdaptiveOn = FALSE;
1744
+ PreTrainedOn = TRUE;
1745
+ DebugMode = "PreTrained Templates Only";
1746
+ break;
1747
+
1748
+ case 'a':
1749
+ AdaptiveOn = TRUE;
1750
+ PreTrainedOn = TRUE;
1751
+ DebugMode = "All Templates";
1752
+ break;
1753
+
1754
+ default:
1755
+ LastClass = ClassId;
1756
+ break;
1757
+ }
1758
+ #endif
1759
+ LastClass = ClassId;
1760
+
1761
+ ShowMatchDisplay();
1762
+ cprintf ("\nDebugging class = %d = %s (%s) ...\n",
1763
+ LastClass, unicharset.id_to_unichar(LastClass), DebugMode);
1764
+ ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
1765
+ UpdateMatchDisplay();
1766
+ }
1767
+ } /* DebugAdaptiveClassifier */
1768
+ #endif
1769
+
1770
+ /*---------------------------------------------------------------------------*/
1771
+ void DoAdaptiveMatch(TBLOB *Blob,
1772
+ LINE_STATS *LineStats,
1773
+ ADAPT_RESULTS *Results) {
1774
+ /*
1775
+ ** Parameters:
1776
+ ** Blob
1777
+ blob to be classified
1778
+ ** LineStats
1779
+ statistics for text line Blob is in
1780
+ ** Results
1781
+ place to put match results
1782
+ ** Globals:
1783
+ ** PreTrainedTemplates
1784
+ built-in training templates
1785
+ ** AdaptedTemplates
1786
+ templates adapted for this page
1787
+ ** GreatAdaptiveMatch
1788
+ rating limit for a great match
1789
+ ** Operation: This routine performs an adaptive classification.
1790
+ ** If we have not yet adapted to enough classes, a simple
1791
+ ** classification to the pre-trained templates is performed.
1792
+ ** Otherwise, we match the blob against the adapted templates.
1793
+ ** If the adapted templates do not match well, we try a
1794
+ ** match against the pre-trained templates. If an adapted
1795
+ ** template match is found, we do a match to any pre-trained
1796
+ ** templates which could be ambiguous. The results from all
1797
+ ** of these classifications are merged together into Results.
1798
+ ** Return: none
1799
+ ** Exceptions: none
1800
+ ** History: Tue Mar 12 08:50:11 1991, DSJ, Created.
1801
+ */
1802
+ UNICHAR_ID *Ambiguities;
1803
+
1804
+ AdaptiveMatcherCalls++;
1805
+ InitIntFX();
1806
+
1807
+ if (AdaptedTemplates->NumPermClasses < MinNumPermClasses
1808
+ || tess_cn_matching) {
1809
+ CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
1810
+ }
1811
+ else {
1812
+ Ambiguities = BaselineClassifier (Blob, LineStats,
1813
+ AdaptedTemplates, Results);
1814
+
1815
+ if ((Results->NumMatches > 0 && MarginalMatch (Results->BestRating)
1816
+ && !tess_bn_matching) || Results->NumMatches == 0) {
1817
+ CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
1818
+ }
1819
+ else if (Ambiguities && *Ambiguities >= 0) {
1820
+ AmbigClassifier(Blob,
1821
+ LineStats,
1822
+ PreTrainedTemplates,
1823
+ Ambiguities,
1824
+ Results);
1825
+ }
1826
+ }
1827
+
1828
+ if (Results->NumMatches == 0)
1829
+ ClassifyAsNoise(Blob, LineStats, Results);
1830
+ /**/} /* DoAdaptiveMatch */
1831
+
1832
+ /*---------------------------------------------------------------------------*/
1833
+ void
1834
+ GetAdaptThresholds (TWERD * Word,
1835
+ LINE_STATS * LineStats,
1836
+ const WERD_CHOICE& BestChoice,
1837
+ const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]) {
1838
+ /*
1839
+ ** Parameters:
1840
+ ** Word
1841
+ current word
1842
+ ** LineStats
1843
+ line stats for row word is in
1844
+ ** BestChoice
1845
+ best choice for current word with context
1846
+ ** BestRawChoice
1847
+ best choice for current word without context
1848
+ ** Thresholds
1849
+ array of thresholds to be filled in
1850
+ ** Globals:
1851
+ ** EnableNewAdaptRules
1852
+ ** GoodAdaptiveMatch
1853
+ ** PerfectRating
1854
+ ** RatingMargin
1855
+ ** Operation: This routine tries to estimate how tight the adaptation
1856
+ ** threshold should be set for each character in the current
1857
+ ** word. In general, the routine tries to set tighter
1858
+ ** thresholds for a character when the current set of templates
1859
+ ** would have made an error on that character. It tries
1860
+ ** to set a threshold tight enough to eliminate the error.
1861
+ ** Two different sets of rules can be used to determine the
1862
+ ** desired thresholds.
1863
+ ** Return: none (results are returned in Thresholds)
1864
+ ** Exceptions: none
1865
+ ** History: Fri May 31 09:22:08 1991, DSJ, Created.
1866
+ */
1867
+ TBLOB *Blob;
1868
+ const char* BestChoice_string = BestChoice.string().string();
1869
+ const char* BestChoice_lengths = BestChoice.lengths().string();
1870
+ const char* BestRawChoice_string = BestRawChoice.string().string();
1871
+ const char* BestRawChoice_lengths = BestRawChoice.lengths().string();
1872
+
1873
+ if (EnableNewAdaptRules && /* new rules */
1874
+ CurrentBestChoiceIs (BestChoice_string, BestChoice_lengths)) {
1875
+ FindClassifierErrors(PerfectRating,
1876
+ GoodAdaptiveMatch,
1877
+ RatingMargin,
1878
+ Thresholds);
1879
+ }
1880
+ else { /* old rules */
1881
+ for (Blob = Word->blobs;
1882
+ Blob != NULL;
1883
+ Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++),
1884
+ BestRawChoice_string += *(BestRawChoice_lengths++), Thresholds++)
1885
+ if (*(BestChoice_lengths) == *(BestRawChoice_lengths) &&
1886
+ strncmp(BestChoice_string, BestRawChoice_string,
1887
+ *(BestChoice_lengths)) == 0)
1888
+ *Thresholds = GoodAdaptiveMatch;
1889
+ else {
1890
+ /* the blob was incorrectly classified - find the rating threshold
1891
+ needed to create a template which will correct the error with
1892
+ some margin. However, don't waste time trying to make
1893
+ templates which are too tight. */
1894
+ *Thresholds = GetBestRatingFor (Blob, LineStats,
1895
+ unicharset.unichar_to_id(
1896
+ BestChoice_string,
1897
+ *BestChoice_lengths));
1898
+ *Thresholds *= (1.0 - RatingMargin);
1899
+ if (*Thresholds > GoodAdaptiveMatch)
1900
+ *Thresholds = GoodAdaptiveMatch;
1901
+ if (*Thresholds < PerfectRating)
1902
+ *Thresholds = PerfectRating;
1903
+ }
1904
+ }
1905
+ } /* GetAdaptThresholds */
1906
+
1907
+ /*---------------------------------------------------------------------------*/
1908
+ UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
1909
+ LINE_STATS *LineStats,
1910
+ CLASS_ID CorrectClass) {
1911
+ /*
1912
+ ** Parameters:
1913
+ ** Blob
1914
+ blob to get classification ambiguities for
1915
+ ** LineStats
1916
+ statistics for text line blob is in
1917
+ ** CorrectClass
1918
+ correct class for Blob
1919
+ ** Globals:
1920
+ ** CurrentRatings
1921
+ used by qsort compare routine
1922
+ ** PreTrainedTemplates
1923
+ built-in templates
1924
+ ** Operation: This routine matches blob to the built-in templates
1925
+ ** to find out if there are any classes other than the correct
1926
+ ** class which are potential ambiguities.
1927
+ ** Return: String containing all possible ambiguous classes.
1928
+ ** Exceptions: none
1929
+ ** History: Fri Mar 15 08:08:22 1991, DSJ, Created.
1930
+ */
1931
+ ADAPT_RESULTS Results;
1932
+ UNICHAR_ID *Ambiguities;
1933
+ int i;
1934
+
1935
+ EnterClassifyMode;
1936
+
1937
+ Results.NumMatches = 0;
1938
+ Results.BestRating = WORST_POSSIBLE_RATING;
1939
+ Results.BestClass = NO_CLASS;
1940
+ Results.BestConfig = 0;
1941
+ InitMatcherRatings (Results.Ratings);
1942
+
1943
+ CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
1944
+ RemoveBadMatches(&Results);
1945
+
1946
+ /* save ratings in a global so that CompareCurrentRatings() can see them */
1947
+ CurrentRatings = Results.Ratings;
1948
+ qsort ((void *) (Results.Classes), Results.NumMatches,
1949
+ sizeof (CLASS_ID), CompareCurrentRatings);
1950
+
1951
+ /* copy the class id's into an string of ambiguities - don't copy if
1952
+ the correct class is the only class id matched */
1953
+ Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) *
1954
+ (Results.NumMatches + 1));
1955
+ if (Results.NumMatches > 1 ||
1956
+ (Results.NumMatches == 1 && Results.Classes[0] != CorrectClass)) {
1957
+ for (i = 0; i < Results.NumMatches; i++)
1958
+ Ambiguities[i] = Results.Classes[i];
1959
+ Ambiguities[i] = -1;
1960
+ }
1961
+ else
1962
+ Ambiguities[0] = -1;
1963
+
1964
+ return (Ambiguities);
1965
+
1966
+ } /* GetAmbiguities */
1967
+
1968
+ /*---------------------------------------------------------------------------*/
1969
+ int GetBaselineFeatures(TBLOB *Blob,
1970
+ LINE_STATS *LineStats,
1971
+ INT_TEMPLATES Templates,
1972
+ INT_FEATURE_ARRAY IntFeatures,
1973
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
1974
+ inT32 *BlobLength) {
1975
+ /*
1976
+ ** Parameters:
1977
+ ** Blob
1978
+ blob to extract features from
1979
+ ** LineStats
1980
+ statistics about text row blob is in
1981
+ ** Templates
1982
+ used to compute char norm adjustments
1983
+ ** IntFeatures
1984
+ array to fill with integer features
1985
+ ** CharNormArray
1986
+ array to fill with dummy char norm adjustments
1987
+ ** BlobLength
1988
+ length of blob in baseline-normalized units
1989
+ ** Globals: none
1990
+ ** Operation: This routine sets up the feature extractor to extract
1991
+ ** baseline normalized pico-features.
1992
+ ** The extracted pico-features are converted
1993
+ ** to integer form and placed in IntFeatures. CharNormArray
1994
+ ** is filled with 0's to indicate to the matcher that no
1995
+ ** character normalization adjustment needs to be done.
1996
+ ** The total length of all blob outlines
1997
+ ** in baseline normalized units is also returned.
1998
+ ** Return: Number of pico-features returned (0 if an error occurred)
1999
+ ** Exceptions: none
2000
+ ** History: Tue Mar 12 17:55:18 1991, DSJ, Created.
2001
+ */
2002
+ FEATURE_SET Features;
2003
+ int NumFeatures;
2004
+
2005
+ if (EnableIntFX)
2006
+ return (GetIntBaselineFeatures (Blob, LineStats, Templates,
2007
+ IntFeatures, CharNormArray, BlobLength));
2008
+
2009
+ NormMethod = baseline;
2010
+ Features = ExtractPicoFeatures (Blob, LineStats);
2011
+
2012
+ NumFeatures = Features->NumFeatures;
2013
+ *BlobLength = NumFeatures;
2014
+ if (NumFeatures > UNLIKELY_NUM_FEAT) {
2015
+ FreeFeatureSet(Features);
2016
+ return (0);
2017
+ }
2018
+
2019
+ ComputeIntFeatures(Features, IntFeatures);
2020
+ ClearCharNormArray(Templates, CharNormArray);
2021
+
2022
+ FreeFeatureSet(Features);
2023
+ return (NumFeatures);
2024
+
2025
+ } /* GetBaselineFeatures */
2026
+
2027
+ /*---------------------------------------------------------------------------*/
2028
+ FLOAT32 GetBestRatingFor(TBLOB *Blob,
2029
+ LINE_STATS *LineStats,
2030
+ CLASS_ID ClassId) {
2031
+ /*
2032
+ ** Parameters:
2033
+ ** Blob
2034
+ blob to get best rating for
2035
+ ** LineStats
2036
+ statistics about text line blob is in
2037
+ ** ClassId
2038
+ class blob is to be compared to
2039
+ ** Globals:
2040
+ ** PreTrainedTemplates
2041
+ built-in templates
2042
+ ** AdaptedTemplates
2043
+ current set of adapted templates
2044
+ ** AllProtosOn
2045
+ dummy mask to enable all protos
2046
+ ** AllConfigsOn
2047
+ dummy mask to enable all configs
2048
+ ** Operation: This routine classifies Blob against both sets of
2049
+ ** templates for the specified class and returns the best
2050
+ ** rating found.
2051
+ ** Return: Best rating for match of Blob to ClassId.
2052
+ ** Exceptions: none
2053
+ ** History: Tue Apr 9 09:01:24 1991, DSJ, Created.
2054
+ */
2055
+ int NumCNFeatures, NumBLFeatures;
2056
+ INT_FEATURE_ARRAY CNFeatures, BLFeatures;
2057
+ INT_RESULT_STRUCT CNResult, BLResult;
2058
+ CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
2059
+ CLASS_INDEX ClassIndex;
2060
+ inT32 BlobLength;
2061
+
2062
+ CNResult.Rating = BLResult.Rating = 1.0;
2063
+
2064
+ if (!LegalClassId (ClassId))
2065
+ return (1.0);
2066
+
2067
+ if (!UnusedClassIdIn (PreTrainedTemplates, ClassId)) {
2068
+ NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
2069
+ PreTrainedTemplates,
2070
+ CNFeatures, CNAdjust, &BlobLength);
2071
+ if (NumCNFeatures > 0) {
2072
+ ClassIndex = PreTrainedTemplates->IndexFor[ClassId];
2073
+
2074
+ SetCharNormMatch();
2075
+ IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
2076
+ AllProtosOn, AllConfigsOn,
2077
+ BlobLength, NumCNFeatures, CNFeatures,
2078
+ CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
2079
+ }
2080
+ }
2081
+
2082
+ if (!UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
2083
+ NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
2084
+ AdaptedTemplates->Templates,
2085
+ BLFeatures, BLAdjust, &BlobLength);
2086
+ if (NumBLFeatures > 0) {
2087
+ ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
2088
+
2089
+ SetBaseLineMatch();
2090
+ IntegerMatcher (ClassForClassId
2091
+ (AdaptedTemplates->Templates, ClassId),
2092
+ AdaptedTemplates->Class[ClassIndex]->PermProtos,
2093
+ AdaptedTemplates->Class[ClassIndex]->PermConfigs,
2094
+ BlobLength, NumBLFeatures, BLFeatures,
2095
+ BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
2096
+ }
2097
+ }
2098
+
2099
+ return (MIN (BLResult.Rating, CNResult.Rating));
2100
+
2101
+ } /* GetBestRatingFor */
2102
+
2103
+ /*---------------------------------------------------------------------------*/
2104
+ int GetCharNormFeatures(TBLOB *Blob,
2105
+ LINE_STATS *LineStats,
2106
+ INT_TEMPLATES Templates,
2107
+ INT_FEATURE_ARRAY IntFeatures,
2108
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
2109
+ inT32 *BlobLength) {
2110
+ /*
2111
+ ** Parameters:
2112
+ ** Blob
2113
+ blob to extract features from
2114
+ ** LineStats
2115
+ statistics about text row blob is in
2116
+ ** Templates
2117
+ used to compute char norm adjustments
2118
+ ** IntFeatures
2119
+ array to fill with integer features
2120
+ ** CharNormArray
2121
+ array to fill with char norm adjustments
2122
+ ** BlobLength
2123
+ length of blob in baseline-normalized units
2124
+ ** Globals: none
2125
+ ** Operation: This routine sets up the feature extractor to extract
2126
+ ** character normalization features and character normalized
2127
+ ** pico-features. The extracted pico-features are converted
2128
+ ** to integer form and placed in IntFeatures. The character
2129
+ ** normalization features are matched to each class in
2130
+ ** templates and the resulting adjustment factors are returned
2131
+ ** in CharNormArray. The total length of all blob outlines
2132
+ ** in baseline normalized units is also returned.
2133
+ ** Return: Number of pico-features returned (0 if an error occurred)
2134
+ ** Exceptions: none
2135
+ ** History: Tue Mar 12 17:55:18 1991, DSJ, Created.
2136
+ */
2137
+ return (GetIntCharNormFeatures (Blob, LineStats, Templates,
2138
+ IntFeatures, CharNormArray, BlobLength));
2139
+ } /* GetCharNormFeatures */
2140
+
2141
+ /*---------------------------------------------------------------------------*/
2142
+ int GetIntBaselineFeatures(TBLOB *Blob,
2143
+ LINE_STATS *LineStats,
2144
+ INT_TEMPLATES Templates,
2145
+ INT_FEATURE_ARRAY IntFeatures,
2146
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
2147
+ inT32 *BlobLength) {
2148
+ /*
2149
+ ** Parameters:
2150
+ ** Blob
2151
+ blob to extract features from
2152
+ ** LineStats
2153
+ statistics about text row blob is in
2154
+ ** Templates
2155
+ used to compute char norm adjustments
2156
+ ** IntFeatures
2157
+ array to fill with integer features
2158
+ ** CharNormArray
2159
+ array to fill with dummy char norm adjustments
2160
+ ** BlobLength
2161
+ length of blob in baseline-normalized units
2162
+ ** Globals:
2163
+ ** FeaturesHaveBeenExtracted
2164
+ TRUE if fx has been done
2165
+ ** BaselineFeatures
2166
+ holds extracted baseline feat
2167
+ ** CharNormFeatures
2168
+ holds extracted char norm feat
2169
+ ** FXInfo
2170
+ holds misc. FX info
2171
+ ** Operation: This routine calls the integer (Hardware) feature
2172
+ ** extractor if it has not been called before for this blob.
2173
+ ** The results from the feature extractor are placed into
2174
+ ** globals so that they can be used in other routines without
2175
+ ** re-extracting the features.
2176
+ ** It then copies the baseline features into the IntFeatures
2177
+ ** array provided by the caller.
2178
+ ** Return: Number of features extracted or 0 if an error occured.
2179
+ ** Exceptions: none
2180
+ ** History: Tue May 28 10:40:52 1991, DSJ, Created.
2181
+ */
2182
+ register INT_FEATURE Src, Dest, End;
2183
+
2184
+ if (!FeaturesHaveBeenExtracted) {
2185
+ FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
2186
+ CharNormFeatures, &FXInfo);
2187
+ FeaturesHaveBeenExtracted = TRUE;
2188
+ }
2189
+
2190
+ if (!FeaturesOK) {
2191
+ *BlobLength = FXInfo.NumBL;
2192
+ return (0);
2193
+ }
2194
+
2195
+ for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures;
2196
+ Src < End; *Dest++ = *Src++);
2197
+
2198
+ ClearCharNormArray(Templates, CharNormArray);
2199
+ *BlobLength = FXInfo.NumBL;
2200
+ return (FXInfo.NumBL);
2201
+
2202
+ } /* GetIntBaselineFeatures */
2203
+
2204
+ /*---------------------------------------------------------------------------*/
2205
+ int GetIntCharNormFeatures(TBLOB *Blob,
2206
+ LINE_STATS *LineStats,
2207
+ INT_TEMPLATES Templates,
2208
+ INT_FEATURE_ARRAY IntFeatures,
2209
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
2210
+ inT32 *BlobLength) {
2211
+ /*
2212
+ ** Parameters:
2213
+ ** Blob
2214
+ blob to extract features from
2215
+ ** LineStats
2216
+ statistics about text row blob is in
2217
+ ** Templates
2218
+ used to compute char norm adjustments
2219
+ ** IntFeatures
2220
+ array to fill with integer features
2221
+ ** CharNormArray
2222
+ array to fill with dummy char norm adjustments
2223
+ ** BlobLength
2224
+ length of blob in baseline-normalized units
2225
+ ** Globals:
2226
+ ** FeaturesHaveBeenExtracted
2227
+ TRUE if fx has been done
2228
+ ** BaselineFeatures
2229
+ holds extracted baseline feat
2230
+ ** CharNormFeatures
2231
+ holds extracted char norm feat
2232
+ ** FXInfo
2233
+ holds misc. FX info
2234
+ ** Operation: This routine calls the integer (Hardware) feature
2235
+ ** extractor if it has not been called before for this blob.
2236
+ ** The results from the feature extractor are placed into
2237
+ ** globals so that they can be used in other routines without
2238
+ ** re-extracting the features.
2239
+ ** It then copies the char norm features into the IntFeatures
2240
+ ** array provided by the caller.
2241
+ ** Return: Number of features extracted or 0 if an error occured.
2242
+ ** Exceptions: none
2243
+ ** History: Tue May 28 10:40:52 1991, DSJ, Created.
2244
+ */
2245
+ register INT_FEATURE Src, Dest, End;
2246
+ FEATURE NormFeature;
2247
+ FLOAT32 Baseline, Scale;
2248
+
2249
+ if (!FeaturesHaveBeenExtracted) {
2250
+ FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
2251
+ CharNormFeatures, &FXInfo);
2252
+ FeaturesHaveBeenExtracted = TRUE;
2253
+ }
2254
+
2255
+ if (!FeaturesOK) {
2256
+ *BlobLength = FXInfo.NumBL;
2257
+ return (0);
2258
+ }
2259
+
2260
+ for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures;
2261
+ Src < End; *Dest++ = *Src++);
2262
+
2263
+ NormFeature = NewFeature (&CharNormDesc);
2264
+ Baseline = BaselineAt (LineStats, FXInfo.Xmean);
2265
+ Scale = ComputeScaleFactor (LineStats);
2266
+ NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale;
2267
+ NormFeature->Params[CharNormLength] =
2268
+ FXInfo.Length * Scale / LENGTH_COMPRESSION;
2269
+ NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale;
2270
+ NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale;
2271
+ ComputeIntCharNormArray(NormFeature, Templates, CharNormArray);
2272
+ FreeFeature(NormFeature);
2273
+
2274
+ *BlobLength = FXInfo.NumBL;
2275
+ return (FXInfo.NumCN);
2276
+
2277
+ } /* GetIntCharNormFeatures */
2278
+
2279
+ /*---------------------------------------------------------------------------*/
2280
+ void InitMatcherRatings(register FLOAT32 *Rating) {
2281
+ /*
2282
+ ** Parameters:
2283
+ ** Rating
2284
+ ptr to array of ratings to be initialized
2285
+ ** Globals: none
2286
+ ** Operation: This routine initializes the best rating for each class
2287
+ ** to be the worst possible rating (1.0).
2288
+ ** Return: none
2289
+ ** Exceptions: none
2290
+ ** History: Tue Mar 12 13:43:28 1991, DSJ, Created.
2291
+ */
2292
+ register FLOAT32 *LastRating;
2293
+ register FLOAT32 WorstRating = WORST_POSSIBLE_RATING;
2294
+
2295
+ for (LastRating = Rating + MAX_CLASS_ID;
2296
+ Rating <= LastRating; *Rating++ = WorstRating);
2297
+
2298
+ } /* InitMatcherRatings */
2299
+
2300
+ /*---------------------------------------------------------------------------*/
2301
+ int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
2302
+ CLASS_ID ClassId,
2303
+ int NumFeatures,
2304
+ INT_FEATURE_ARRAY Features,
2305
+ FEATURE_SET FloatFeatures) {
2306
+ /*
2307
+ ** Parameters:
2308
+ ** Templates
2309
+ adapted templates to add new config to
2310
+ ** ClassId
2311
+ class id to associate with new config
2312
+ ** NumFeatures
2313
+ number of features in IntFeatures
2314
+ ** Features
2315
+ features describing model for new config
2316
+ ** FloatFeatures
2317
+ floating-pt representation of features
2318
+ ** Globals:
2319
+ ** AllProtosOn
2320
+ mask to enable all protos
2321
+ ** AllConfigsOff
2322
+ mask to disable all configs
2323
+ ** TempProtoMask
2324
+ defines old protos matched in new config
2325
+ ** Operation:
2326
+ ** Return: The id of the new config created, a negative integer in
2327
+ ** case of error.
2328
+ ** Exceptions: none
2329
+ ** History: Fri Mar 15 08:49:46 1991, DSJ, Created.
2330
+ */
2331
+ CLASS_INDEX ClassIndex;
2332
+ INT_CLASS IClass;
2333
+ ADAPT_CLASS Class;
2334
+ PROTO_ID OldProtos[MAX_NUM_PROTOS];
2335
+ FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
2336
+ int NumOldProtos;
2337
+ int NumBadFeatures;
2338
+ int MaxProtoId, OldMaxProtoId;
2339
+ int BlobLength = 0;
2340
+ int MaskSize;
2341
+ int ConfigId;
2342
+ TEMP_CONFIG Config;
2343
+ int i;
2344
+ int debug_level = NO_DEBUG;
2345
+
2346
+ if (LearningDebugLevel >= 3)
2347
+ debug_level =
2348
+ PRINT_MATCH_SUMMARY | PRINT_FEATURE_MATCHES | PRINT_PROTO_MATCHES;
2349
+
2350
+ ClassIndex = Templates->Templates->IndexFor[ClassId];
2351
+ IClass = ClassForClassId (Templates->Templates, ClassId);
2352
+ Class = Templates->Class[ClassIndex];
2353
+
2354
+ if (IClass->NumConfigs >= MAX_NUM_CONFIGS)
2355
+ {
2356
+ ++NumAdaptationsFailed;
2357
+ if (LearningDebugLevel >= 1)
2358
+ cprintf ("Cannot make new temporary config: maximum number exceeded.\n");
2359
+ return -1;
2360
+ }
2361
+
2362
+ OldMaxProtoId = IClass->NumProtos - 1;
2363
+
2364
+ NumOldProtos = FindGoodProtos (IClass, AllProtosOn, AllConfigsOff,
2365
+ BlobLength, NumFeatures, Features,
2366
+ OldProtos, debug_level);
2367
+
2368
+ MaskSize = WordsInVectorOfSize (MAX_NUM_PROTOS);
2369
+ zero_all_bits(TempProtoMask, MaskSize);
2370
+ for (i = 0; i < NumOldProtos; i++)
2371
+ SET_BIT (TempProtoMask, OldProtos[i]);
2372
+
2373
+ NumBadFeatures = FindBadFeatures (IClass, TempProtoMask, AllConfigsOn,
2374
+ BlobLength, NumFeatures, Features,
2375
+ BadFeatures, debug_level);
2376
+
2377
+ MaxProtoId = MakeNewTempProtos (FloatFeatures, NumBadFeatures, BadFeatures,
2378
+ IClass, Class, TempProtoMask);
2379
+ if (MaxProtoId == NO_PROTO)
2380
+ {
2381
+ ++NumAdaptationsFailed;
2382
+ if (LearningDebugLevel >= 1)
2383
+ cprintf ("Cannot make new temp protos: maximum number exceeded.\n");
2384
+ return -1;
2385
+ }
2386
+
2387
+ ConfigId = AddIntConfig (IClass);
2388
+ ConvertConfig(TempProtoMask, ConfigId, IClass);
2389
+ Config = NewTempConfig (MaxProtoId);
2390
+ TempConfigFor (Class, ConfigId) = Config;
2391
+ copy_all_bits (TempProtoMask, Config->Protos, Config->ProtoVectorSize);
2392
+
2393
+ if (LearningDebugLevel >= 1)
2394
+ cprintf ("Making new temp config %d using %d old and %d new protos.\n",
2395
+ ConfigId, NumOldProtos, MaxProtoId - OldMaxProtoId);
2396
+
2397
+ return ConfigId;
2398
+ } /* MakeNewTemporaryConfig */
2399
+
2400
+ /*---------------------------------------------------------------------------*/
2401
+ PROTO_ID
2402
+ MakeNewTempProtos (FEATURE_SET Features,
2403
+ int NumBadFeat,
2404
+ FEATURE_ID BadFeat[],
2405
+ INT_CLASS IClass,
2406
+ ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) {
2407
+ /*
2408
+ ** Parameters:
2409
+ ** Features
2410
+ floating-pt features describing new character
2411
+ ** NumBadFeat
2412
+ number of bad features to turn into protos
2413
+ ** BadFeat
2414
+ feature id's of bad features
2415
+ ** IClass
2416
+ integer class templates to add new protos to
2417
+ ** Class
2418
+ adapted class templates to add new protos to
2419
+ ** TempProtoMask
2420
+ proto mask to add new protos to
2421
+ ** Globals: none
2422
+ ** Operation: This routine finds sets of sequential bad features
2423
+ ** that all have the same angle and converts each set into
2424
+ ** a new temporary proto. The temp proto is added to the
2425
+ ** proto pruner for IClass, pushed onto the list of temp
2426
+ ** protos in Class, and added to TempProtoMask.
2427
+ ** Return: Max proto id in class after all protos have been added.
2428
+ ** Exceptions: none
2429
+ ** History: Fri Mar 15 11:39:38 1991, DSJ, Created.
2430
+ */
2431
+ FEATURE_ID *ProtoStart;
2432
+ FEATURE_ID *ProtoEnd;
2433
+ FEATURE_ID *LastBad;
2434
+ TEMP_PROTO TempProto;
2435
+ PROTO Proto;
2436
+ FEATURE F1, F2;
2437
+ FLOAT32 X1, X2, Y1, Y2;
2438
+ FLOAT32 A1, A2, AngleDelta;
2439
+ FLOAT32 SegmentLength;
2440
+ PROTO_ID Pid;
2441
+
2442
+ for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
2443
+ ProtoStart < LastBad; ProtoStart = ProtoEnd) {
2444
+ F1 = Features->Features[*ProtoStart];
2445
+ X1 = F1->Params[PicoFeatX];
2446
+ Y1 = F1->Params[PicoFeatY];
2447
+ A1 = F1->Params[PicoFeatDir];
2448
+
2449
+ for (ProtoEnd = ProtoStart + 1,
2450
+ SegmentLength = GetPicoFeatureLength ();
2451
+ ProtoEnd < LastBad;
2452
+ ProtoEnd++, SegmentLength += GetPicoFeatureLength ()) {
2453
+ F2 = Features->Features[*ProtoEnd];
2454
+ X2 = F2->Params[PicoFeatX];
2455
+ Y2 = F2->Params[PicoFeatY];
2456
+ A2 = F2->Params[PicoFeatDir];
2457
+
2458
+ AngleDelta = fabs (A1 - A2);
2459
+ if (AngleDelta > 0.5)
2460
+ AngleDelta = 1.0 - AngleDelta;
2461
+
2462
+ if (AngleDelta > MaxAngleDelta ||
2463
+ fabs (X1 - X2) > SegmentLength ||
2464
+ fabs (Y1 - Y2) > SegmentLength)
2465
+ break;
2466
+ }
2467
+
2468
+ F2 = Features->Features[*(ProtoEnd - 1)];
2469
+ X2 = F2->Params[PicoFeatX];
2470
+ Y2 = F2->Params[PicoFeatY];
2471
+ A2 = F2->Params[PicoFeatDir];
2472
+
2473
+ Pid = AddIntProto (IClass);
2474
+ if (Pid == NO_PROTO)
2475
+ return (NO_PROTO);
2476
+
2477
+ TempProto = NewTempProto ();
2478
+ Proto = &(TempProto->Proto);
2479
+
2480
+ /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
2481
+ ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
2482
+ instead of the -0.25 to 0.75 used in baseline normalization */
2483
+ Proto->Length = SegmentLength;
2484
+ Proto->Angle = A1;
2485
+ Proto->X = (X1 + X2) / 2.0;
2486
+ Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
2487
+ FillABC(Proto);
2488
+
2489
+ TempProto->ProtoId = Pid;
2490
+ SET_BIT(TempProtoMask, Pid);
2491
+
2492
+ ConvertProto(Proto, Pid, IClass);
2493
+ AddProtoToProtoPruner(Proto, Pid, IClass);
2494
+
2495
+ Class->TempProtos = push (Class->TempProtos, TempProto);
2496
+ }
2497
+ return (IClass->NumProtos - 1);
2498
+ } /* MakeNewTempProtos */
2499
+
2500
+ /*---------------------------------------------------------------------------*/
2501
+ void MakePermanent(ADAPT_TEMPLATES Templates,
2502
+ CLASS_ID ClassId,
2503
+ int ConfigId,
2504
+ TBLOB *Blob,
2505
+ LINE_STATS *LineStats) {
2506
+ /*
2507
+ ** Parameters:
2508
+ ** Templates
2509
+ current set of adaptive templates
2510
+ ** ClassId
2511
+ class containing config to be made permanent
2512
+ ** ConfigId
2513
+ config to be made permanent
2514
+ ** Blob
2515
+ current blob being adapted to
2516
+ ** LineStats
2517
+ statistics about text line Blob is in
2518
+ ** Globals: none
2519
+ ** Operation:
2520
+ ** Return: none
2521
+ ** Exceptions: none
2522
+ ** History: Thu Mar 14 15:54:08 1991, DSJ, Created.
2523
+ */
2524
+ UNICHAR_ID *Ambigs;
2525
+ TEMP_CONFIG Config;
2526
+ CLASS_INDEX ClassIndex;
2527
+ ADAPT_CLASS Class;
2528
+ PROTO_KEY ProtoKey;
2529
+
2530
+ ClassIndex = Templates->Templates->IndexFor[ClassId];
2531
+ Class = Templates->Class[ClassIndex];
2532
+ Config = TempConfigFor (Class, ConfigId);
2533
+
2534
+ MakeConfigPermanent(Class, ConfigId);
2535
+ if (Class->NumPermConfigs == 0)
2536
+ Templates->NumPermClasses++;
2537
+ Class->NumPermConfigs++;
2538
+
2539
+ ProtoKey.Templates = Templates;
2540
+ ProtoKey.ClassId = ClassId;
2541
+ ProtoKey.ConfigId = ConfigId;
2542
+ Class->TempProtos = delete_d (Class->TempProtos, &ProtoKey,
2543
+ MakeTempProtoPerm);
2544
+ FreeTempConfig(Config);
2545
+
2546
+ Ambigs = GetAmbiguities (Blob, LineStats, ClassId);
2547
+ PermConfigFor (Class, ConfigId) = Ambigs;
2548
+
2549
+ if (LearningDebugLevel >= 1) {
2550
+ cprintf ("Making config %d permanent with ambiguities '",
2551
+ ConfigId, Ambigs);
2552
+ for (UNICHAR_ID *AmbigsPointer = Ambigs;
2553
+ *AmbigsPointer >= 0; ++AmbigsPointer)
2554
+ cprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
2555
+ cprintf("'.\n");
2556
+ }
2557
+
2558
+ } /* MakePermanent */
2559
+
2560
+ /*---------------------------------------------------------------------------*/
2561
+ int MakeTempProtoPerm(void *item1, //TEMP_PROTO TempProto,
2562
+ void *item2) { //PROTO_KEY *ProtoKey)
2563
+ /*
2564
+ ** Parameters:
2565
+ ** TempProto
2566
+ temporary proto to compare to key
2567
+ ** ProtoKey
2568
+ defines which protos to make permanent
2569
+ ** Globals: none
2570
+ ** Operation: This routine converts TempProto to be permanent if
2571
+ ** its proto id is used by the configuration specified in
2572
+ ** ProtoKey.
2573
+ ** Return: TRUE if TempProto is converted, FALSE otherwise
2574
+ ** Exceptions: none
2575
+ ** History: Thu Mar 14 18:49:54 1991, DSJ, Created.
2576
+ */
2577
+ CLASS_INDEX ClassIndex;
2578
+ ADAPT_CLASS Class;
2579
+ TEMP_CONFIG Config;
2580
+ TEMP_PROTO TempProto;
2581
+ PROTO_KEY *ProtoKey;
2582
+
2583
+ TempProto = (TEMP_PROTO) item1;
2584
+ ProtoKey = (PROTO_KEY *) item2;
2585
+
2586
+ ClassIndex = ProtoKey->Templates->Templates->IndexFor[ProtoKey->ClassId];
2587
+ Class = ProtoKey->Templates->Class[ClassIndex];
2588
+ Config = TempConfigFor (Class, ProtoKey->ConfigId);
2589
+
2590
+ if (TempProto->ProtoId > Config->MaxProtoId ||
2591
+ !test_bit (Config->Protos, TempProto->ProtoId))
2592
+ return (FALSE);
2593
+
2594
+ MakeProtoPermanent (Class, TempProto->ProtoId);
2595
+ AddProtoToClassPruner (&(TempProto->Proto), ProtoKey->ClassId,
2596
+ ProtoKey->Templates->Templates);
2597
+ FreeTempProto(TempProto);
2598
+
2599
+ return (TRUE);
2600
+
2601
+ } /* MakeTempProtoPerm */
2602
+
2603
+ /*---------------------------------------------------------------------------*/
2604
+ int NumBlobsIn(TWERD *Word) {
2605
+ /*
2606
+ ** Parameters:
2607
+ ** Word
2608
+ word to count blobs in
2609
+ ** Globals: none
2610
+ ** Operation: This routine returns the number of blobs in Word.
2611
+ ** Return: Number of blobs in Word.
2612
+ ** Exceptions: none
2613
+ ** History: Thu Mar 14 08:30:27 1991, DSJ, Created.
2614
+ */
2615
+ register TBLOB *Blob;
2616
+ register int NumBlobs;
2617
+
2618
+ if (Word == NULL)
2619
+ return (0);
2620
+
2621
+ for (Blob = Word->blobs, NumBlobs = 0;
2622
+ Blob != NULL; Blob = Blob->next, NumBlobs++);
2623
+
2624
+ return (NumBlobs);
2625
+
2626
+ } /* NumBlobsIn */
2627
+
2628
+ /*---------------------------------------------------------------------------*/
2629
+ int NumOutlinesInBlob(TBLOB *Blob) {
2630
+ /*
2631
+ ** Parameters:
2632
+ ** Blob
2633
+ blob to count outlines in
2634
+ ** Globals: none
2635
+ ** Operation: This routine returns the number of OUTER outlines
2636
+ ** in Blob.
2637
+ ** Return: Number of outer outlines in Blob.
2638
+ ** Exceptions: none
2639
+ ** History: Mon Jun 10 15:46:20 1991, DSJ, Created.
2640
+ */
2641
+ register TESSLINE *Outline;
2642
+ register int NumOutlines;
2643
+
2644
+ if (Blob == NULL)
2645
+ return (0);
2646
+
2647
+ for (Outline = Blob->outlines, NumOutlines = 0;
2648
+ Outline != NULL; Outline = Outline->next, NumOutlines++);
2649
+
2650
+ return (NumOutlines);
2651
+
2652
+ } /* NumOutlinesInBlob */
2653
+
2654
+ /*---------------------------------------------------------------------------*/
2655
+ void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) {
2656
+ /*
2657
+ ** Parameters:
2658
+ ** File
2659
+ open text file to write Results to
2660
+ ** Results
2661
+ match results to write to File
2662
+ ** Globals: none
2663
+ ** Operation: This routine writes the matches in Results to File.
2664
+ ** Return: none
2665
+ ** Exceptions: none
2666
+ ** History: Mon Mar 18 09:24:53 1991, DSJ, Created.
2667
+ */
2668
+ for (int i = 0; i < Results->NumMatches; ++i) {
2669
+ cprintf("%s(%d) %.2f ",
2670
+ unicharset.debug_str(Results->Classes[i]).string(),
2671
+ Results->Classes[i],
2672
+ Results->Ratings[Results->Classes[i]] * 100.0);
2673
+ }
2674
+ } /* PrintAdaptiveMatchResults */
2675
+
2676
+ /*---------------------------------------------------------------------------*/
2677
+ void RemoveBadMatches(ADAPT_RESULTS *Results) {
2678
+ /*
2679
+ ** Parameters:
2680
+ ** Results
2681
+ contains matches to be filtered
2682
+ ** Globals:
2683
+ ** BadMatchPad
2684
+ defines a "bad match"
2685
+ ** Operation: This routine steps thru each matching class in Results
2686
+ ** and removes it from the match list if its rating
2687
+ ** is worse than the BestRating plus a pad. In other words,
2688
+ ** all good matches get moved to the front of the classes
2689
+ ** array.
2690
+ ** Return: none
2691
+ ** Exceptions: none
2692
+ ** History: Tue Mar 12 13:51:03 1991, DSJ, Created.
2693
+ */
2694
+ int Next, NextGood;
2695
+ FLOAT32 *Rating = Results->Ratings;
2696
+ CLASS_ID *Match = Results->Classes;
2697
+ FLOAT32 BadMatchThreshold;
2698
+ static const char* romans = "i v x I V X";
2699
+ BadMatchThreshold = Results->BestRating + BadMatchPad;
2700
+
2701
+ if (bln_numericmode) {
2702
+ UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2703
+ unicharset.unichar_to_id("1") : -1;
2704
+ UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2705
+ unicharset.unichar_to_id("0") : -1;
2706
+ for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
2707
+ if (Rating[Match[Next]] <= BadMatchThreshold) {
2708
+ if (!unicharset.get_isalpha(Match[Next]) ||
2709
+ strstr(romans, unicharset.id_to_unichar(Match[Next])) != NULL) {
2710
+ Match[NextGood++] = Match[Next];
2711
+ } else if (unichar_id_one >= 0 && unicharset.eq(Match[Next], "l") &&
2712
+ Rating[unichar_id_one] >= BadMatchThreshold) {
2713
+ Match[NextGood++] = unichar_id_one;
2714
+ Rating[unichar_id_one] = Rating[unicharset.unichar_to_id("l")];
2715
+ } else if (unichar_id_zero >= 0 && unicharset.eq(Match[Next], "O") &&
2716
+ Rating[unichar_id_zero] >= BadMatchThreshold) {
2717
+ Match[NextGood++] = unichar_id_zero;
2718
+ Rating[unichar_id_zero] = Rating[unicharset.unichar_to_id("O")];
2719
+ }
2720
+ }
2721
+ }
2722
+ }
2723
+ else {
2724
+ for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
2725
+ if (Rating[Match[Next]] <= BadMatchThreshold)
2726
+ Match[NextGood++] = Match[Next];
2727
+ }
2728
+ }
2729
+
2730
+ Results->NumMatches = NextGood;
2731
+
2732
+ } /* RemoveBadMatches */
2733
+
2734
+ /*----------------------------------------------------------------------------------*/
2735
+ void RemoveExtraPuncs(ADAPT_RESULTS *Results) {
2736
+ /*
2737
+ ** Parameters:
2738
+ ** Results
2739
+ contains matches to be filtered
2740
+ ** Globals:
2741
+ ** BadMatchPad
2742
+ defines a "bad match"
2743
+ ** Operation: This routine steps thru each matching class in Results
2744
+ ** and removes it from the match list if its rating
2745
+ ** is worse than the BestRating plus a pad. In other words,
2746
+ ** all good matches get moved to the front of the classes
2747
+ ** array.
2748
+ ** Return: none
2749
+ ** Exceptions: none
2750
+ ** History: Tue Mar 12 13:51:03 1991, DSJ, Created.
2751
+ */
2752
+ int Next, NextGood;
2753
+ int punc_count; /*no of garbage characters */
2754
+ int digit_count;
2755
+ CLASS_ID *Match = Results->Classes;
2756
+ /*garbage characters */
2757
+ static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2758
+ static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2759
+
2760
+ punc_count = 0;
2761
+ digit_count = 0;
2762
+ for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
2763
+ if (strstr (punc_chars,
2764
+ unicharset.id_to_unichar(Match[Next])) == NULL) {
2765
+ if (strstr (digit_chars,
2766
+ unicharset.id_to_unichar(Match[Next])) == NULL) {
2767
+ Match[NextGood++] = Match[Next];
2768
+ }
2769
+ else {
2770
+ if (digit_count < 1)
2771
+ Match[NextGood++] = Match[Next];
2772
+ digit_count++;
2773
+ }
2774
+ }
2775
+ else {
2776
+ if (punc_count < 2)
2777
+ Match[NextGood++] = Match[Next];
2778
+ punc_count++; /*count them */
2779
+ }
2780
+ }
2781
+ Results->NumMatches = NextGood;
2782
+ } /* RemoveExtraPuncs */
2783
+
2784
+ /*---------------------------------------------------------------------------*/
2785
+ void SetAdaptiveThreshold(FLOAT32 Threshold) {
2786
+ /*
2787
+ ** Parameters:
2788
+ ** Threshold
2789
+ threshold for creating new templates
2790
+ ** Globals:
2791
+ ** GoodAdaptiveMatch
2792
+ default good match rating
2793
+ ** Operation: This routine resets the internal thresholds inside
2794
+ ** the integer matcher to correspond to the specified
2795
+ ** threshold.
2796
+ ** Return: none
2797
+ ** Exceptions: none
2798
+ ** History: Tue Apr 9 08:33:13 1991, DSJ, Created.
2799
+ */
2800
+ if (Threshold == GoodAdaptiveMatch) {
2801
+ /* the blob was probably classified correctly - use the default rating
2802
+ threshold */
2803
+ SetProtoThresh (0.9);
2804
+ SetFeatureThresh (0.9);
2805
+ }
2806
+ else {
2807
+ /* the blob was probably incorrectly classified */
2808
+ SetProtoThresh (1.0 - Threshold);
2809
+ SetFeatureThresh (1.0 - Threshold);
2810
+ }
2811
+ } /* SetAdaptiveThreshold */
2812
+
2813
+ /*---------------------------------------------------------------------------*/
2814
+ void ShowBestMatchFor(TBLOB *Blob,
2815
+ LINE_STATS *LineStats,
2816
+ CLASS_ID ClassId,
2817
+ BOOL8 AdaptiveOn,
2818
+ BOOL8 PreTrainedOn) {
2819
+ /*
2820
+ ** Parameters:
2821
+ ** Blob
2822
+ blob to show best matching config for
2823
+ ** LineStats
2824
+ statistics for text line Blob is in
2825
+ ** ClassId
2826
+ class whose configs are to be searched
2827
+ ** AdaptiveOn
2828
+ TRUE if adaptive configs are enabled
2829
+ ** PreTrainedOn
2830
+ TRUE if pretrained configs are enabled
2831
+ ** Globals:
2832
+ ** PreTrainedTemplates
2833
+ built-in training
2834
+ ** AdaptedTemplates
2835
+ adaptive templates
2836
+ ** AllProtosOn
2837
+ dummy proto mask
2838
+ ** AllConfigsOn
2839
+ dummy config mask
2840
+ ** Operation: This routine compares Blob to both sets of templates
2841
+ ** (adaptive and pre-trained) and then displays debug
2842
+ ** information for the config which matched best.
2843
+ ** Return: none
2844
+ ** Exceptions: none
2845
+ ** History: Fri Mar 22 08:43:52 1991, DSJ, Created.
2846
+ */
2847
+ int NumCNFeatures = 0, NumBLFeatures = 0;
2848
+ INT_FEATURE_ARRAY CNFeatures, BLFeatures;
2849
+ INT_RESULT_STRUCT CNResult, BLResult;
2850
+ CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
2851
+ CLASS_INDEX ClassIndex;
2852
+ inT32 BlobLength;
2853
+ uinT32 ConfigMask;
2854
+ static int next_config = -1;
2855
+
2856
+ if (PreTrainedOn) next_config = -1;
2857
+
2858
+ CNResult.Rating = BLResult.Rating = 2.0;
2859
+
2860
+ if (!LegalClassId (ClassId)) {
2861
+ cprintf ("%d is not a legal class id!!\n", ClassId);
2862
+ return;
2863
+ }
2864
+
2865
+ if (PreTrainedOn) {
2866
+ if (UnusedClassIdIn (PreTrainedTemplates, ClassId))
2867
+ cprintf ("No built-in templates for class %d = %s\n",
2868
+ ClassId, unicharset.id_to_unichar(ClassId));
2869
+ else {
2870
+ NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
2871
+ PreTrainedTemplates,
2872
+ CNFeatures, CNAdjust,
2873
+ &BlobLength);
2874
+ if (NumCNFeatures <= 0)
2875
+ cprintf ("Illegal blob (char norm features)!\n");
2876
+ else {
2877
+ ClassIndex = PreTrainedTemplates->IndexFor[ClassId];
2878
+
2879
+ SetCharNormMatch();
2880
+ IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
2881
+ AllProtosOn, AllConfigsOn,
2882
+ BlobLength, NumCNFeatures, CNFeatures,
2883
+ CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
2884
+
2885
+ cprintf ("Best built-in template match is config %2d (%4.1f) (cn=%d)\n",
2886
+ CNResult.Config, CNResult.Rating * 100.0, CNAdjust[ClassIndex]);
2887
+ }
2888
+ }
2889
+ }
2890
+
2891
+ if (AdaptiveOn) {
2892
+ if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId))
2893
+ cprintf ("No AD templates for class %d = %s\n",
2894
+ ClassId, unicharset.id_to_unichar(ClassId));
2895
+ else {
2896
+ NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
2897
+ AdaptedTemplates->Templates,
2898
+ BLFeatures, BLAdjust,
2899
+ &BlobLength);
2900
+ if (NumBLFeatures <= 0)
2901
+ cprintf ("Illegal blob (baseline features)!\n");
2902
+ else {
2903
+ ClassIndex =AdaptedTemplates->Templates->IndexFor[ClassId];
2904
+
2905
+ SetBaseLineMatch();
2906
+ IntegerMatcher (ClassForClassId
2907
+ (AdaptedTemplates->Templates, ClassId),
2908
+ AllProtosOn, AllConfigsOn,
2909
+ // AdaptedTemplates->Class[ClassIndex]->PermProtos,
2910
+ // AdaptedTemplates->Class[ClassIndex]->PermConfigs,
2911
+ BlobLength, NumBLFeatures, BLFeatures,
2912
+ BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
2913
+
2914
+ #ifndef SECURE_NAMES
2915
+ int ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
2916
+ ADAPT_CLASS Class = AdaptedTemplates->Class[ClassIndex];
2917
+ cprintf ("Best adaptive template match is config %2d (%4.1f) %s\n",
2918
+ BLResult.Config, BLResult.Rating * 100.0,
2919
+ ConfigIsPermanent(Class, BLResult.Config) ? "Perm" : "Temp");
2920
+ #endif
2921
+ }
2922
+ }
2923
+ }
2924
+
2925
+ cprintf ("\n");
2926
+ if (BLResult.Rating < CNResult.Rating) {
2927
+ ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
2928
+ if (next_config < 0) {
2929
+ ConfigMask = 1 << BLResult.Config;
2930
+ next_config = 0;
2931
+ } else {
2932
+ ConfigMask = 1 << next_config;
2933
+ ++next_config;
2934
+ }
2935
+ NormMethod = baseline;
2936
+
2937
+ SetBaseLineMatch();
2938
+ IntegerMatcher (ClassForClassId (AdaptedTemplates->Templates, ClassId),
2939
+ AllProtosOn,
2940
+ // AdaptedTemplates->Class[ClassIndex]->PermProtos,
2941
+ (BIT_VECTOR) & ConfigMask,
2942
+ BlobLength, NumBLFeatures, BLFeatures,
2943
+ BLAdjust[ClassIndex], &BLResult, MatchDebugFlags);
2944
+ cprintf ("Adaptive template match for config %2d is %4.1f\n",
2945
+ BLResult.Config, BLResult.Rating * 100.0);
2946
+ }
2947
+ else {
2948
+ ClassIndex = PreTrainedTemplates->IndexFor[ClassId];
2949
+ ConfigMask = 1 << CNResult.Config;
2950
+ NormMethod = character;
2951
+
2952
+ SetCharNormMatch();
2953
+ //xiaofan
2954
+ IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), AllProtosOn, (BIT_VECTOR) & ConfigMask,
2955
+ BlobLength, NumCNFeatures, CNFeatures,
2956
+ CNAdjust[ClassIndex], &CNResult, MatchDebugFlags);
2957
+ }
2958
+ } /* ShowBestMatchFor */