tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,2958 @@
1
+ /******************************************************************************
2
+ ** Filename: adaptmatch.c
3
+ ** Purpose: High level adaptive matcher.
4
+ ** Author: Dan Johnson
5
+ ** History: Mon Mar 11 10:00:10 1991, DSJ, Created.
6
+ **
7
+ ** (c) Copyright Hewlett-Packard Company, 1988.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ ******************************************************************************/
18
+
19
+ /**----------------------------------------------------------------------------
20
+ Include Files and Type Defines
21
+ ----------------------------------------------------------------------------**/
22
+ #include <ctype.h>
23
+ #include "adaptmatch.h"
24
+ #include "normfeat.h"
25
+ #include "mfoutline.h"
26
+ #include "picofeat.h"
27
+ #include "float2int.h"
28
+ #include "outfeat.h"
29
+ #include "emalloc.h"
30
+ #include "intfx.h"
31
+ #include "permnum.h"
32
+ #include "speckle.h"
33
+ #include "efio.h"
34
+ #include "normmatch.h"
35
+ #include "stopper.h"
36
+ #include "permute.h"
37
+ #include "context.h"
38
+ #include "ndminx.h"
39
+ #include "intproto.h"
40
+ #include "const.h"
41
+ #include "globals.h"
42
+ #include "werd.h"
43
+ #include "callcpp.h"
44
+ #include "tordvars.h"
45
+
46
+ #include <stdio.h>
47
+ #include <string.h>
48
+ #include <ctype.h>
49
+ #include <stdlib.h>
50
+ #include <math.h>
51
+ #ifdef __UNIX__
52
+ #include <assert.h>
53
+ #endif
54
+
55
+ #define ADAPT_TEMPLATE_SUFFIX ".a"
56
+ #define BUILT_IN_TEMPLATES_FILE "inttemp"
57
+ #define BUILT_IN_CUTOFFS_FILE "pffmtable"
58
+
59
+ #define MAX_MATCHES 10
60
+ #define UNLIKELY_NUM_FEAT 200
61
+ #define NO_DEBUG 0
62
+ #define MAX_ADAPTABLE_WERD_SIZE 40
63
+
64
+ #define ADAPTABLE_WERD (GOOD_NUMBER + 0.05)
65
+
66
+ #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
67
+
68
+ #define WORST_POSSIBLE_RATING (1.0)
69
+
70
+ typedef struct
71
+ {
72
+ inT32 BlobLength;
73
+ int NumMatches;
74
+ CLASS_ID Classes[MAX_NUM_CLASSES];
75
+ FLOAT32 Ratings[MAX_CLASS_ID + 1];
76
+ uinT8 Configs[MAX_CLASS_ID + 1];
77
+ FLOAT32 BestRating;
78
+ CLASS_ID BestClass;
79
+ uinT8 BestConfig;
80
+ CLASS_PRUNER_RESULTS CPResults;
81
+ }
82
+
83
+
84
+ ADAPT_RESULTS;
85
+
86
+ typedef struct
87
+ {
88
+ ADAPT_TEMPLATES Templates;
89
+ CLASS_ID ClassId;
90
+ int ConfigId;
91
+ }
92
+
93
+
94
+ PROTO_KEY;
95
+
96
+ /**----------------------------------------------------------------------------
97
+ Private Macros
98
+ ----------------------------------------------------------------------------**/
99
+ #define MarginalMatch(Rating) \
100
+ ((Rating) > GreatAdaptiveMatch)
101
+
102
+ #define TempConfigReliable(Config) \
103
+ ((Config)->NumTimesSeen >= ReliableConfigThreshold)
104
+
105
+ #define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
106
+
107
+ /**----------------------------------------------------------------------------
108
+ Private Function Prototypes
109
+ ----------------------------------------------------------------------------**/
110
+ void AdaptToChar(TBLOB *Blob,
111
+ LINE_STATS *LineStats,
112
+ CLASS_ID ClassId,
113
+ FLOAT32 Threshold);
114
+
115
+ void AdaptToPunc(TBLOB *Blob,
116
+ LINE_STATS *LineStats,
117
+ CLASS_ID ClassId,
118
+ FLOAT32 Threshold);
119
+
120
+ void AddNewResult(ADAPT_RESULTS *Results,
121
+ CLASS_ID ClassId,
122
+ FLOAT32 Rating,
123
+ int ConfigId);
124
+
125
+ void AmbigClassifier(TBLOB *Blob,
126
+ LINE_STATS *LineStats,
127
+ INT_TEMPLATES Templates,
128
+ UNICHAR_ID *Ambiguities,
129
+ ADAPT_RESULTS *Results);
130
+
131
+ UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
132
+ LINE_STATS *LineStats,
133
+ ADAPT_TEMPLATES Templates,
134
+ ADAPT_RESULTS *Results);
135
+
136
+ void make_config_pruner(INT_TEMPLATES templates, CONFIG_PRUNER *config_pruner);
137
+
138
+ void CharNormClassifier(TBLOB *Blob,
139
+ LINE_STATS *LineStats,
140
+ INT_TEMPLATES Templates,
141
+ ADAPT_RESULTS *Results);
142
+
143
+ void ClassifyAsNoise(TBLOB *Blob,
144
+ LINE_STATS *LineStats,
145
+ ADAPT_RESULTS *Results);
146
+
147
+ int CompareCurrentRatings(const void *arg1,
148
+ const void *arg2);
149
+
150
+ LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results);
151
+
152
+ void DebugAdaptiveClassifier(TBLOB *Blob,
153
+ LINE_STATS *LineStats,
154
+ ADAPT_RESULTS *Results);
155
+
156
+ void DoAdaptiveMatch(TBLOB *Blob,
157
+ LINE_STATS *LineStats,
158
+ ADAPT_RESULTS *Results);
159
+
160
+ void GetAdaptThresholds(TWERD * Word,
161
+ LINE_STATS * LineStats,
162
+ const WERD_CHOICE& BestChoice,
163
+ const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]);
164
+
165
+ UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
166
+ LINE_STATS *LineStats,
167
+ CLASS_ID CorrectClass);
168
+
169
+ int GetBaselineFeatures(TBLOB *Blob,
170
+ LINE_STATS *LineStats,
171
+ INT_TEMPLATES Templates,
172
+ INT_FEATURE_ARRAY IntFeatures,
173
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
174
+ inT32 *BlobLength);
175
+
176
+ FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId);
177
+
178
+ int GetCharNormFeatures(TBLOB *Blob,
179
+ LINE_STATS *LineStats,
180
+ INT_TEMPLATES Templates,
181
+ INT_FEATURE_ARRAY IntFeatures,
182
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
183
+ inT32 *BlobLength);
184
+
185
+ int GetIntBaselineFeatures(TBLOB *Blob,
186
+ LINE_STATS *LineStats,
187
+ INT_TEMPLATES Templates,
188
+ INT_FEATURE_ARRAY IntFeatures,
189
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
190
+ inT32 *BlobLength);
191
+
192
+ int GetIntCharNormFeatures(TBLOB *Blob,
193
+ LINE_STATS *LineStats,
194
+ INT_TEMPLATES Templates,
195
+ INT_FEATURE_ARRAY IntFeatures,
196
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
197
+ inT32 *BlobLength);
198
+
199
+ void InitMatcherRatings(register FLOAT32 *Rating);
200
+
201
+ int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
202
+ CLASS_ID ClassId,
203
+ int NumFeatures,
204
+ INT_FEATURE_ARRAY Features,
205
+ FEATURE_SET FloatFeatures);
206
+
207
+ PROTO_ID MakeNewTempProtos(FEATURE_SET Features,
208
+ int NumBadFeat,
209
+ FEATURE_ID BadFeat[],
210
+ INT_CLASS IClass,
211
+ ADAPT_CLASS Class, BIT_VECTOR TempProtoMask);
212
+
213
+ void MakePermanent(ADAPT_TEMPLATES Templates,
214
+ CLASS_ID ClassId,
215
+ int ConfigId,
216
+ TBLOB *Blob,
217
+ LINE_STATS *LineStats);
218
+
219
+ int MakeTempProtoPerm(void *item1, void *item2);
220
+
221
+ int NumBlobsIn(TWERD *Word);
222
+
223
+ int NumOutlinesInBlob(TBLOB *Blob);
224
+
225
+ void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results);
226
+
227
+ void RemoveBadMatches(ADAPT_RESULTS *Results);
228
+
229
+ void RemoveExtraPuncs(ADAPT_RESULTS *Results);
230
+
231
+ void SetAdaptiveThreshold(FLOAT32 Threshold);
232
+ void ShowBestMatchFor(TBLOB *Blob,
233
+ LINE_STATS *LineStats,
234
+ CLASS_ID ClassId,
235
+ BOOL8 AdaptiveOn,
236
+ BOOL8 PreTrainedOn);
237
+
238
+
239
+ /**----------------------------------------------------------------------------
240
+ Global Data Definitions and Declarations
241
+ ----------------------------------------------------------------------------**/
242
+ /* name of current image file being processed */
243
+ extern char imagefile[];
244
+ INT_VAR(tessedit_single_match, FALSE, "Top choice only from CP");
245
+
246
+ /* variables used to hold performance statistics */
247
+ static int AdaptiveMatcherCalls = 0;
248
+ static int BaselineClassifierCalls = 0;
249
+ static int CharNormClassifierCalls = 0;
250
+ static int AmbigClassifierCalls = 0;
251
+ static int NumWordsAdaptedTo = 0;
252
+ static int NumCharsAdaptedTo = 0;
253
+ static int NumBaselineClassesTried = 0;
254
+ static int NumCharNormClassesTried = 0;
255
+ static int NumAmbigClassesTried = 0;
256
+ static int NumClassesOutput = 0;
257
+ static int NumAdaptationsFailed = 0;
258
+
259
+ /* define globals used to hold onto extracted features. This is used
260
+ to map from the old scheme in which baseline features and char norm
261
+ features are extracted separately, to the new scheme in which they
262
+ are extracted at the same time. */
263
+ static BOOL8 FeaturesHaveBeenExtracted = FALSE;
264
+ static BOOL8 FeaturesOK = TRUE;
265
+ static INT_FEATURE_ARRAY BaselineFeatures;
266
+ static INT_FEATURE_ARRAY CharNormFeatures;
267
+ static INT_FX_RESULT_STRUCT FXInfo;
268
+
269
+ /* use a global variable to hold onto the current ratings so that the
270
+ comparison function passes to qsort can get at them */
271
+ static FLOAT32 *CurrentRatings;
272
+
273
+ /* define globals to hold filenames of training data */
274
+ static const char *BuiltInTemplatesFile = BUILT_IN_TEMPLATES_FILE;
275
+ static const char *BuiltInCutoffsFile = BUILT_IN_CUTOFFS_FILE;
276
+ static CLASS_CUTOFF_ARRAY CharNormCutoffs;
277
+ static CLASS_CUTOFF_ARRAY BaselineCutoffs;
278
+
279
+ /* use global variables to hold onto built-in templates and adapted
280
+ templates */
281
+ static INT_TEMPLATES PreTrainedTemplates;
282
+ static ADAPT_TEMPLATES AdaptedTemplates;
283
+
284
+ /* create dummy proto and config masks for use with the built-in templates */
285
+ static BIT_VECTOR AllProtosOn;
286
+ static BIT_VECTOR PrunedProtos;
287
+ static BIT_VECTOR AllConfigsOn;
288
+ static BIT_VECTOR AllProtosOff;
289
+ static BIT_VECTOR AllConfigsOff;
290
+ static BIT_VECTOR TempProtoMask;
291
+
292
+ /* define control knobs for adaptive matcher */
293
+ make_toggle_const(EnableAdaptiveMatcher, 1, MakeEnableAdaptiveMatcher);
294
+ /* PREV DEFAULT 0 */
295
+
296
+ make_toggle_const(UsePreAdaptedTemplates, 0, MakeUsePreAdaptedTemplates);
297
+ make_toggle_const(SaveAdaptedTemplates, 0, MakeSaveAdaptedTemplates);
298
+
299
+ make_toggle_var(EnableAdaptiveDebugger, 0, MakeEnableAdaptiveDebugger,
300
+ 18, 1, SetEnableAdaptiveDebugger, "Enable match debugger");
301
+
302
+ make_int_var(MatcherDebugLevel, 0, MakeMatcherDebugLevel,
303
+ 18, 2, SetMatcherDebugLevel, "Matcher Debug Level: ");
304
+
305
+ make_int_var(MatchDebugFlags, 0, MakeMatchDebugFlags,
306
+ 18, 3, SetMatchDebugFlags, "Matcher Debug Flags: ");
307
+
308
+ make_toggle_var(EnableLearning, 1, MakeEnableLearning,
309
+ 18, 4, SetEnableLearning, "Enable learning");
310
+ /* PREV DEFAULT 0 */
311
+ /*record it for multiple pages */
312
+ static int old_enable_learning = 1;
313
+
314
+ make_int_var(LearningDebugLevel, 0, MakeLearningDebugLevel,
315
+ 18, 5, SetLearningDebugLevel, "Learning Debug Level: ");
316
+
317
+ make_float_var(GoodAdaptiveMatch, 0.125, MakeGoodAdaptiveMatch,
318
+ 18, 6, SetGoodAdaptiveMatch, "Good Match (0-1): ");
319
+
320
+ make_float_var(GreatAdaptiveMatch, 0.0, MakeGreatAdaptiveMatch,
321
+ 18, 7, SetGreatAdaptiveMatch, "Great Match (0-1): ");
322
+ /* PREV DEFAULT 0.10 */
323
+
324
+ make_float_var(PerfectRating, 0.02, MakePerfectRating,
325
+ 18, 8, SetPerfectRating, "Perfect Match (0-1): ");
326
+
327
+ make_float_var(BadMatchPad, 0.15, MakeBadMatchPad,
328
+ 18, 9, SetBadMatchPad, "Bad Match Pad (0-1): ");
329
+
330
+ make_float_var(RatingMargin, 0.1, MakeRatingMargin,
331
+ 18, 10, SetRatingMargin, "New template margin (0-1): ");
332
+
333
+ make_float_var(NoiseBlobLength, 12.0, MakeNoiseBlobLength,
334
+ 18, 11, SetNoiseBlobLength, "Avg. noise blob length: ");
335
+
336
+ make_int_var(MinNumPermClasses, 1, MakeMinNumPermClasses,
337
+ 18, 12, SetMinNumPermClasses, "Min # of permanent classes: ");
338
+ /* PREV DEFAULT 200 */
339
+
340
+ make_int_var(ReliableConfigThreshold, 2, MakeReliableConfigThreshold,
341
+ 18, 13, SetReliableConfigThreshold,
342
+ "Reliable Config Threshold: ");
343
+
344
+ make_float_var(MaxAngleDelta, 0.015, MakeMaxAngleDelta,
345
+ 18, 14, SetMaxAngleDelta,
346
+ "Maximum angle delta for proto clustering: ");
347
+
348
+ make_toggle_var(EnableIntFX, 1, MakeEnableIntFX,
349
+ 18, 15, SetEnableIntFX, "Enable integer fx");
350
+ /* PREV DEFAULT 0 */
351
+
352
+ make_toggle_var(EnableNewAdaptRules, 1, MakeEnableNewAdaptRules,
353
+ 18, 16, SetEnableNewAdaptRules,
354
+ "Enable new adaptation rules");
355
+ /* PREV DEFAULT 0 */
356
+
357
+ make_float_var(RatingScale, 1.5, MakeRatingScale,
358
+ 18, 17, SetRatingScale, "Rating scale: ");
359
+
360
+ make_float_var(CertaintyScale, 20.0, MakeCertaintyScale,
361
+ 18, 18, SetCertaintyScale, "CertaintyScale: ");
362
+
363
+ make_int_var(FailedAdaptionsBeforeReset, 150, MakeFailedAdaptionsBeforeReset,
364
+ 18, 19, SetFailedAdaptionsBeforeReset,
365
+ "Number of failed adaptions before adapted templates reset: ");
366
+ double_VAR(tessedit_class_miss_scale, 0.00390625,
367
+ "Scale factor for features not used");
368
+
369
+ int tess_cn_matching = 0;
370
+ int tess_bn_matching = 0;
371
+
372
+ /**----------------------------------------------------------------------------
373
+ Public Code
374
+ ----------------------------------------------------------------------------**/
375
+ /*---------------------------------------------------------------------------*/
376
+ LIST AdaptiveClassifier(TBLOB *Blob, TBLOB *DotBlob, TEXTROW *Row) {
377
+ /*
378
+ ** Parameters:
379
+ ** Blob blob to be classified
380
+ ** DotBlob (obsolete)
381
+ ** Row row of text that word appears in
382
+ ** Globals:
383
+ ** CurrentRatings
384
+ used by compare function for qsort
385
+ ** Operation: This routine calls the adaptive matcher which returns
386
+ ** (in an array) the class id of each class matched. It also
387
+ ** returns the number of classes matched.
388
+ ** For each class matched it places the best rating
389
+ ** found for that class into the Ratings array.
390
+ ** Bad matches are then removed so that they don't need to be
391
+ ** sorted. The remaining good matches are then sorted and
392
+ ** converted to choices.
393
+ ** This routine also performs some simple speckle filtering.
394
+ ** Return: List of choices found by adaptive matcher.
395
+ ** Exceptions: none
396
+ ** History: Mon Mar 11 10:00:58 1991, DSJ, Created.
397
+ */
398
+ LIST Choices;
399
+ ADAPT_RESULTS* Results = new ADAPT_RESULTS;
400
+ LINE_STATS LineStats;
401
+
402
+ if (FailedAdaptionsBeforeReset >= 0 &&
403
+ NumAdaptationsFailed >= FailedAdaptionsBeforeReset) {
404
+ NumAdaptationsFailed = 0;
405
+ ResetAdaptiveClassifier();
406
+ }
407
+ if (AdaptedTemplates == NULL)
408
+ AdaptedTemplates = NewAdaptedTemplates ();
409
+ EnterClassifyMode;
410
+
411
+ Results->BlobLength = MAX_INT32;
412
+ Results->NumMatches = 0;
413
+ Results->BestRating = WORST_POSSIBLE_RATING;
414
+ Results->BestClass = NO_CLASS;
415
+ Results->BestConfig = 0;
416
+ GetLineStatsFromRow(Row, &LineStats);
417
+ InitMatcherRatings (Results->Ratings);
418
+
419
+ DoAdaptiveMatch(Blob, &LineStats, Results);
420
+ RemoveBadMatches(Results);
421
+
422
+ /* save ratings in a global so that CompareCurrentRatings() can see them */
423
+ CurrentRatings = Results->Ratings;
424
+ qsort((void*) (Results->Classes), Results->NumMatches,
425
+ sizeof (CLASS_ID), CompareCurrentRatings);
426
+ RemoveExtraPuncs(Results);
427
+ Choices = ConvertMatchesToChoices(Results);
428
+
429
+ if (MatcherDebugLevel >= 1) {
430
+ cprintf ("AD Matches = ");
431
+ PrintAdaptiveMatchResults(stdout, Results);
432
+ }
433
+
434
+ if (LargeSpeckle (Blob, Row))
435
+ Choices = AddLargeSpeckleTo (Choices);
436
+
437
+ #ifndef GRAPHICS_DISABLED
438
+ if (EnableAdaptiveDebugger)
439
+ DebugAdaptiveClassifier(Blob, &LineStats, Results);
440
+ #endif
441
+
442
+ NumClassesOutput += count (Choices);
443
+ if (Choices == NIL) {
444
+ char empty_lengths[] = {0};
445
+ if (!bln_numericmode)
446
+ tprintf ("Nil classification!\n"); // Should never normally happen.
447
+ return (append_choice (NIL, "", empty_lengths, 50.0f, -20.0f, -1));
448
+ }
449
+
450
+ delete Results;
451
+ return Choices;
452
+ } /* AdaptiveClassifier */
453
+
454
+
455
+ /*---------------------------------------------------------------------------*/
456
+ void AdaptToWord(TWERD *Word,
457
+ TEXTROW *Row,
458
+ const WERD_CHOICE& BestChoice,
459
+ const WERD_CHOICE& BestRawChoice,
460
+ const char *rejmap) {
461
+ /*
462
+ ** Parameters:
463
+ ** Word
464
+ word to be adapted to
465
+ ** Row
466
+ row of text that word is found in
467
+ ** BestChoice
468
+ best choice for word found by system
469
+ ** BestRawChoice
470
+ best choice for word found by classifier only
471
+ ** Globals:
472
+ ** EnableLearning
473
+ TRUE if learning is enabled
474
+ ** Operation: This routine implements a preliminary version of the
475
+ ** rules which are used to decide which characters to adapt to.
476
+ ** A word is adapted to if it is in the dictionary or if it
477
+ ** is a "good" number (no trailing units, etc.). It cannot
478
+ ** contain broken or merged characters. Within that word, only
479
+ ** letters and digits are adapted to (no punctuation).
480
+ ** Return: none
481
+ ** Exceptions: none
482
+ ** History: Thu Mar 14 07:40:36 1991, DSJ, Created.
483
+ */
484
+ TBLOB *Blob;
485
+ LINE_STATS LineStats;
486
+ FLOAT32 Thresholds[MAX_ADAPTABLE_WERD_SIZE];
487
+ FLOAT32 *Threshold;
488
+ const char *map = rejmap;
489
+ char map_char = '1';
490
+ const char* BestChoice_string = BestChoice.string().string();
491
+ const char* BestChoice_lengths = BestChoice.lengths().string();
492
+
493
+ if (strlen(BestChoice_lengths) > MAX_ADAPTABLE_WERD_SIZE)
494
+ return;
495
+
496
+ if (EnableLearning) {
497
+ NumWordsAdaptedTo++;
498
+
499
+ #ifndef SECURE_NAMES
500
+ if (LearningDebugLevel >= 1)
501
+ cprintf ("\n\nAdapting to word = %s\n", BestChoice.string().string());
502
+ #endif
503
+ GetLineStatsFromRow(Row, &LineStats);
504
+
505
+ GetAdaptThresholds(Word,
506
+ &LineStats,
507
+ BestChoice,
508
+ BestRawChoice,
509
+ Thresholds);
510
+
511
+ for (Blob = Word->blobs, Threshold = Thresholds; Blob != NULL;
512
+ Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++),
513
+ Threshold++) {
514
+ InitIntFX();
515
+
516
+ if (rejmap != NULL)
517
+ map_char = *map++;
518
+
519
+ assert (map_char == '1' || map_char == '0');
520
+
521
+ if (map_char == '1') {
522
+
523
+ // if (unicharset.get_isalpha (BestChoice_string, *BestChoice_lengths) ||
524
+ // unicharset.get_isdigit (BestChoice_string, *BestChoice_lengths)) {
525
+ /* SPECIAL RULE: don't adapt to an 'i' which is the first char
526
+ in a word because they are too ambiguous with 'I'.
527
+ The new adaptation rules should account for this
528
+ automatically, since they exclude ambiguous words from
529
+ adaptation, but for safety's sake we'll leave the rule in.
530
+ Also, don't adapt to i's that have only 1 blob in them
531
+ because this creates too much ambiguity for broken
532
+ characters. */
533
+ if (*BestChoice_lengths == 1 &&
534
+ (*BestChoice_string == 'i'
535
+ || (il1_adaption_test && *BestChoice_string == 'I' &&
536
+ (Blob->next == NULL ||
537
+ unicharset.get_islower (BestChoice_string + *BestChoice_lengths,
538
+ *(BestChoice_lengths + 1)))))
539
+ && (Blob == Word->blobs
540
+ || (!(unicharset.get_isalpha (BestChoice_string -
541
+ *(BestChoice_lengths - 1),
542
+ *(BestChoice_lengths - 1)) ||
543
+ unicharset.get_isdigit (BestChoice_string -
544
+ *(BestChoice_lengths - 1),
545
+ *(BestChoice_lengths - 1))))
546
+
547
+ || (!il1_adaption_test && NumOutlinesInBlob(Blob) != 2))) {
548
+ if (LearningDebugLevel >= 1)
549
+ cprintf ("Rejecting char = %s\n", unicharset.id_to_unichar(
550
+ unicharset.unichar_to_id(BestChoice_string,
551
+ *BestChoice_lengths)));
552
+ }
553
+ else {
554
+ #ifndef SECURE_NAMES
555
+ if (LearningDebugLevel >= 1)
556
+ cprintf ("Adapting to char = %s, thr= %g\n",
557
+ unicharset.id_to_unichar(
558
+ unicharset.unichar_to_id(BestChoice_string,
559
+ *BestChoice_lengths)),
560
+ *Threshold);
561
+ #endif
562
+ AdaptToChar(Blob, &LineStats,
563
+ unicharset.unichar_to_id(BestChoice_string,
564
+ *BestChoice_lengths),
565
+ *Threshold);
566
+ }
567
+ // }
568
+ // else
569
+ // AdaptToPunc(Blob, &LineStats,
570
+ // unicharset.unichar_to_id(BestChoice_string,
571
+ // *BestChoice_lengths),
572
+ // *Threshold);
573
+ }
574
+ }
575
+ if (LearningDebugLevel >= 1)
576
+ cprintf ("\n");
577
+ }
578
+ } /* AdaptToWord */
579
+
580
+
581
+ /*---------------------------------------------------------------------------*/
582
+ void EndAdaptiveClassifier() {
583
+ /*
584
+ ** Parameters: none
585
+ ** Globals:
586
+ ** AdaptedTemplates
587
+ current set of adapted templates
588
+ ** SaveAdaptedTemplates
589
+ TRUE if templates should be saved
590
+ ** EnableAdaptiveMatcher
591
+ TRUE if adaptive matcher is enabled
592
+ ** Operation: This routine performs cleanup operations on the
593
+ ** adaptive classifier. It should be called before the
594
+ ** program is terminated. Its main function is to save
595
+ ** the adapted templates to a file.
596
+ ** Return: none
597
+ ** Exceptions: none
598
+ ** History: Tue Mar 19 14:37:06 1991, DSJ, Created.
599
+ */
600
+ char Filename[256];
601
+ FILE *File;
602
+
603
+ #ifndef SECURE_NAMES
604
+ if (EnableAdaptiveMatcher && SaveAdaptedTemplates) {
605
+ strcpy(Filename, imagefile);
606
+ strcat(Filename, ADAPT_TEMPLATE_SUFFIX);
607
+ File = fopen (Filename, "wb");
608
+ if (File == NULL)
609
+ cprintf ("Unable to save adapted templates to %s!\n", Filename);
610
+ else {
611
+ cprintf ("\nSaving adapted templates to %s ...", Filename);
612
+ fflush(stdout);
613
+ WriteAdaptedTemplates(File, AdaptedTemplates);
614
+ cprintf ("\n");
615
+ fclose(File);
616
+ }
617
+ }
618
+ #endif
619
+ if (PreTrainedTemplates == NULL)
620
+ return; // This function isn't safe to run twice.
621
+ EndDangerousAmbigs();
622
+ FreeNormProtos();
623
+ free_int_templates(PreTrainedTemplates);
624
+ PreTrainedTemplates = NULL;
625
+ FreeBitVector(AllProtosOn);
626
+ FreeBitVector(PrunedProtos);
627
+ FreeBitVector(AllConfigsOn);
628
+ FreeBitVector(AllProtosOff);
629
+ FreeBitVector(AllConfigsOff);
630
+ FreeBitVector(TempProtoMask);
631
+ AllProtosOn = NULL;
632
+ PrunedProtos = NULL;
633
+ AllConfigsOn = NULL;
634
+ AllProtosOff = NULL;
635
+ AllConfigsOff = NULL;
636
+ TempProtoMask = NULL;
637
+ } /* EndAdaptiveClassifier */
638
+
639
+
640
+ /*---------------------------------------------------------------------------*/
641
+ void InitAdaptiveClassifier() {
642
+ /*
643
+ ** Parameters: none
644
+ ** Globals:
645
+ ** BuiltInTemplatesFile
646
+ file to get built-in temps from
647
+ ** BuiltInCutoffsFile
648
+ file to get avg. feat per class from
649
+ ** PreTrainedTemplates
650
+ pre-trained configs and protos
651
+ ** AdaptedTemplates
652
+ templates adapted to current page
653
+ ** CharNormCutoffs
654
+ avg # of features per class
655
+ ** AllProtosOn
656
+ dummy proto mask with all bits 1
657
+ ** AllConfigsOn
658
+ dummy config mask with all bits 1
659
+ ** UsePreAdaptedTemplates
660
+ enables use of pre-adapted templates
661
+ ** Operation: This routine reads in the training information needed
662
+ ** by the adaptive classifier and saves it into global
663
+ ** variables.
664
+ ** Return: none
665
+ ** Exceptions: none
666
+ ** History: Mon Mar 11 12:49:34 1991, DSJ, Created.
667
+ */
668
+ int i;
669
+ FILE *File;
670
+ STRING Filename;
671
+
672
+ if (!EnableAdaptiveMatcher)
673
+ return;
674
+ if (PreTrainedTemplates != NULL)
675
+ EndAdaptiveClassifier(); // Don't leak with multiple inits.
676
+
677
+ Filename = language_data_path_prefix;
678
+ Filename += BuiltInTemplatesFile;
679
+ #ifndef SECURE_NAMES
680
+ // cprintf( "\nReading built-in templates from %s ...",
681
+ // Filename);
682
+ fflush(stdout);
683
+ #endif
684
+
685
+ #ifdef __UNIX__
686
+ File = Efopen (Filename.string(), "r");
687
+ #else
688
+ File = Efopen (Filename.string(), "rb");
689
+ #endif
690
+ PreTrainedTemplates = ReadIntTemplates (File, TRUE);
691
+ fclose(File);
692
+
693
+ Filename = language_data_path_prefix;
694
+ Filename += BuiltInCutoffsFile;
695
+ #ifndef SECURE_NAMES
696
+ // cprintf( "\nReading built-in pico-feature cutoffs from %s ...",
697
+ // Filename);
698
+ fflush(stdout);
699
+ #endif
700
+ ReadNewCutoffs (Filename.string(), PreTrainedTemplates->IndexFor,
701
+ CharNormCutoffs);
702
+
703
+ GetNormProtos();
704
+
705
+ InitIntegerMatcher();
706
+ InitIntegerFX();
707
+
708
+ AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
709
+ PrunedProtos = NewBitVector(MAX_NUM_PROTOS);
710
+ AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
711
+ AllProtosOff = NewBitVector(MAX_NUM_PROTOS);
712
+ AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
713
+ TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
714
+ set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
715
+ set_all_bits(PrunedProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
716
+ set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
717
+ zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS));
718
+ zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
719
+
720
+ if (UsePreAdaptedTemplates) {
721
+ Filename = imagefile;
722
+ Filename += ADAPT_TEMPLATE_SUFFIX;
723
+ File = fopen (Filename.string(), "rb");
724
+ if (File == NULL)
725
+ AdaptedTemplates = NewAdaptedTemplates ();
726
+ else {
727
+ #ifndef SECURE_NAMES
728
+ cprintf ("\nReading pre-adapted templates from %s ...", Filename.string());
729
+ fflush(stdout);
730
+ #endif
731
+ AdaptedTemplates = ReadAdaptedTemplates (File);
732
+ cprintf ("\n");
733
+ fclose(File);
734
+ PrintAdaptedTemplates(stdout, AdaptedTemplates);
735
+
736
+ for (i = 0; i < (AdaptedTemplates->Templates)->NumClasses; i++) {
737
+ BaselineCutoffs[i] =
738
+ CharNormCutoffs[PreTrainedTemplates->IndexFor[
739
+ AdaptedTemplates->Templates->ClassIdFor[i]]];
740
+ }
741
+ }
742
+ } else {
743
+ if (AdaptedTemplates != NULL)
744
+ free_adapted_templates(AdaptedTemplates);
745
+ AdaptedTemplates = NewAdaptedTemplates ();
746
+ }
747
+ old_enable_learning = EnableLearning;
748
+
749
+ } /* InitAdaptiveClassifier */
750
+
751
+ void ResetAdaptiveClassifier() {
752
+ free_adapted_templates(AdaptedTemplates);
753
+ AdaptedTemplates = NULL;
754
+ }
755
+
756
+
757
+ /*---------------------------------------------------------------------------*/
758
+ void InitAdaptiveClassifierVars() {
759
+ /*
760
+ ** Parameters: none
761
+ ** Globals: none
762
+ ** Operation: This routine installs the control knobs used by the
763
+ ** adaptive matcher.
764
+ ** Return: none
765
+ ** Exceptions: none
766
+ ** History: Mon Mar 11 12:49:34 1991, DSJ, Created.
767
+ */
768
+ VALUE dummy;
769
+
770
+ string_variable (BuiltInTemplatesFile, "BuiltInTemplatesFile",
771
+ BUILT_IN_TEMPLATES_FILE);
772
+ string_variable (BuiltInCutoffsFile, "BuiltInCutoffsFile",
773
+ BUILT_IN_CUTOFFS_FILE);
774
+
775
+ MakeEnableAdaptiveMatcher();
776
+ MakeUsePreAdaptedTemplates();
777
+ MakeSaveAdaptedTemplates();
778
+
779
+ MakeEnableLearning();
780
+ MakeEnableAdaptiveDebugger();
781
+ MakeBadMatchPad();
782
+ MakeGoodAdaptiveMatch();
783
+ MakeGreatAdaptiveMatch();
784
+ MakeNoiseBlobLength();
785
+ MakeMinNumPermClasses();
786
+ MakeReliableConfigThreshold();
787
+ MakeMaxAngleDelta();
788
+ MakeLearningDebugLevel();
789
+ MakeMatcherDebugLevel();
790
+ MakeMatchDebugFlags();
791
+ MakeRatingMargin();
792
+ MakePerfectRating();
793
+ MakeEnableIntFX();
794
+ MakeEnableNewAdaptRules();
795
+ MakeRatingScale();
796
+ MakeCertaintyScale();
797
+ MakeFailedAdaptionsBeforeReset();
798
+
799
+ InitPicoFXVars();
800
+ InitOutlineFXVars(); //?
801
+
802
+ } /* InitAdaptiveClassifierVars */
803
+
804
+
805
+ /*---------------------------------------------------------------------------*/
806
+ void PrintAdaptiveStatistics(FILE *File) {
807
+ /*
808
+ ** Parameters:
809
+ ** File
810
+ open text file to print adaptive statistics to
811
+ ** Globals: none
812
+ ** Operation: Print to File the statistics which have been gathered
813
+ ** for the adaptive matcher.
814
+ ** Return: none
815
+ ** Exceptions: none
816
+ ** History: Thu Apr 18 14:37:37 1991, DSJ, Created.
817
+ */
818
+ #ifndef SECURE_NAMES
819
+
820
+ fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
821
+ fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
822
+ fprintf (File, "\tNum classes output = %d (Avg = %4.2f)\n",
823
+ NumClassesOutput,
824
+ ((AdaptiveMatcherCalls == 0) ? (0.0) :
825
+ ((float) NumClassesOutput / AdaptiveMatcherCalls)));
826
+ fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
827
+ BaselineClassifierCalls,
828
+ ((BaselineClassifierCalls == 0) ? (0.0) :
829
+ ((float) NumBaselineClassesTried / BaselineClassifierCalls)));
830
+ fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
831
+ CharNormClassifierCalls,
832
+ ((CharNormClassifierCalls == 0) ? (0.0) :
833
+ ((float) NumCharNormClassesTried / CharNormClassifierCalls)));
834
+ fprintf (File, "\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n",
835
+ AmbigClassifierCalls,
836
+ ((AmbigClassifierCalls == 0) ? (0.0) :
837
+ ((float) NumAmbigClassesTried / AmbigClassifierCalls)));
838
+
839
+ fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
840
+ fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
841
+ fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
842
+
843
+ if (UsePreAdaptedTemplates)
844
+ PrintAdaptedTemplates(File, AdaptedTemplates);
845
+ #endif
846
+ } /* PrintAdaptiveStatistics */
847
+
848
+
849
+ /*---------------------------------------------------------------------------*/
850
+ void SettupPass1() {
851
+ /*
852
+ ** Parameters: none
853
+ ** Globals:
854
+ ** EnableLearning
855
+ set to TRUE by this routine
856
+ ** Operation: This routine prepares the adaptive matcher for the start
857
+ ** of the first pass. Learning is enabled (unless it is
858
+ ** disabled for the whole program).
859
+ ** Return: none
860
+ ** Exceptions: none
861
+ ** History: Mon Apr 15 16:39:29 1991, DSJ, Created.
862
+ */
863
+ /* Note: this is somewhat redundant, it simply says that if learning is
864
+ enabled then it will remain enabled on the first pass. If it is
865
+ disabled, then it will remain disabled. This is only put here to
866
+ make it very clear that learning is controlled directly by the global
867
+ setting of EnableLearning. */
868
+ EnableLearning = old_enable_learning;
869
+
870
+ SettupStopperPass1();
871
+
872
+ } /* SettupPass1 */
873
+
874
+
875
+ /*---------------------------------------------------------------------------*/
876
+ void SettupPass2() {
877
+ /*
878
+ ** Parameters: none
879
+ ** Globals:
880
+ ** EnableLearning
881
+ set to FALSE by this routine
882
+ ** Operation: This routine prepares the adaptive matcher for the start
883
+ ** of the second pass. Further learning is disabled.
884
+ ** Return: none
885
+ ** Exceptions: none
886
+ ** History: Mon Apr 15 16:39:29 1991, DSJ, Created.
887
+ */
888
+ EnableLearning = FALSE;
889
+ SettupStopperPass2();
890
+
891
+ } /* SettupPass2 */
892
+
893
+
894
+ /*---------------------------------------------------------------------------*/
895
+ void MakeNewAdaptedClass(TBLOB *Blob,
896
+ LINE_STATS *LineStats,
897
+ CLASS_ID ClassId,
898
+ ADAPT_TEMPLATES Templates) {
899
+ /*
900
+ ** Parameters:
901
+ ** Blob
902
+ blob to model new class after
903
+ ** LineStats
904
+ statistics for text row blob is in
905
+ ** ClassId
906
+ id of new class to be created
907
+ ** Templates
908
+ adapted templates to add new class to
909
+ ** Globals:
910
+ ** AllProtosOn
911
+ dummy mask with all 1's
912
+ ** BaselineCutoffs
913
+ kludge needed to get cutoffs
914
+ ** PreTrainedTemplates
915
+ kludge needed to get cutoffs
916
+ ** Operation: This routine creates a new adapted class and uses Blob
917
+ ** as the model for the first config in that class.
918
+ ** Return: none
919
+ ** Exceptions: none
920
+ ** History: Thu Mar 14 12:49:39 1991, DSJ, Created.
921
+ */
922
+ FEATURE_SET Features;
923
+ int Fid, Pid;
924
+ FEATURE Feature;
925
+ int NumFeatures;
926
+ TEMP_PROTO TempProto;
927
+ PROTO Proto;
928
+ ADAPT_CLASS Class;
929
+ INT_CLASS IClass;
930
+ CLASS_INDEX ClassIndex;
931
+ TEMP_CONFIG Config;
932
+
933
+ NormMethod = baseline;
934
+ Features = ExtractOutlineFeatures (Blob, LineStats);
935
+ NumFeatures = Features->NumFeatures;
936
+ if (NumFeatures > UNLIKELY_NUM_FEAT) {
937
+ FreeFeatureSet(Features);
938
+ return;
939
+ }
940
+
941
+ Class = NewAdaptedClass ();
942
+ ClassIndex = AddAdaptedClass (Templates, Class, ClassId);
943
+ Config = NewTempConfig (NumFeatures - 1);
944
+ TempConfigFor (Class, 0) = Config;
945
+
946
+ /* this is a kludge to construct cutoffs for adapted templates */
947
+ if (Templates == AdaptedTemplates)
948
+ BaselineCutoffs[ClassIndex] =
949
+ CharNormCutoffs[PreTrainedTemplates->IndexFor[ClassId]];
950
+
951
+ IClass = ClassForClassId (Templates->Templates, ClassId);
952
+
953
+ for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
954
+ Pid = AddIntProto (IClass);
955
+ assert (Pid != NO_PROTO);
956
+
957
+ Feature = Features->Features[Fid];
958
+ TempProto = NewTempProto ();
959
+ Proto = &(TempProto->Proto);
960
+
961
+ /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
962
+ ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
963
+ instead of the -0.25 to 0.75 used in baseline normalization */
964
+ Proto->Angle = Feature->Params[OutlineFeatDir];
965
+ Proto->X = Feature->Params[OutlineFeatX];
966
+ Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
967
+ Proto->Length = Feature->Params[OutlineFeatLength];
968
+ FillABC(Proto);
969
+
970
+ TempProto->ProtoId = Pid;
971
+ SET_BIT (Config->Protos, Pid);
972
+
973
+ ConvertProto(Proto, Pid, IClass);
974
+ AddProtoToProtoPruner(Proto, Pid, IClass);
975
+
976
+ Class->TempProtos = push (Class->TempProtos, TempProto);
977
+ }
978
+ FreeFeatureSet(Features);
979
+
980
+ AddIntConfig(IClass);
981
+ ConvertConfig (AllProtosOn, 0, IClass);
982
+
983
+ if (LearningDebugLevel >= 1) {
984
+ cprintf ("Added new class '%s' with index %d and %d protos.\n",
985
+ unicharset.id_to_unichar(ClassId), ClassIndex, NumFeatures);
986
+ }
987
+ } /* MakeNewAdaptedClass */
988
+
989
+
990
+ /*---------------------------------------------------------------------------*/
991
+ int GetAdaptiveFeatures(TBLOB *Blob,
992
+ LINE_STATS *LineStats,
993
+ INT_FEATURE_ARRAY IntFeatures,
994
+ FEATURE_SET *FloatFeatures) {
995
+ /*
996
+ ** Parameters:
997
+ ** Blob
998
+ blob to extract features from
999
+ ** LineStats
1000
+ statistics about text row blob is in
1001
+ ** IntFeatures
1002
+ array to fill with integer features
1003
+ ** FloatFeatures
1004
+ place to return actual floating-pt features
1005
+ ** Globals: none
1006
+ ** Operation: This routine sets up the feature extractor to extract
1007
+ ** baseline normalized pico-features.
1008
+ ** The extracted pico-features are converted
1009
+ ** to integer form and placed in IntFeatures. The original
1010
+ ** floating-pt. features are returned in FloatFeatures.
1011
+ ** Return: Number of pico-features returned (0 if an error occurred)
1012
+ ** Exceptions: none
1013
+ ** History: Tue Mar 12 17:55:18 1991, DSJ, Created.
1014
+ */
1015
+ FEATURE_SET Features;
1016
+ int NumFeatures;
1017
+
1018
+ NormMethod = baseline;
1019
+ Features = ExtractPicoFeatures (Blob, LineStats);
1020
+
1021
+ NumFeatures = Features->NumFeatures;
1022
+ if (NumFeatures > UNLIKELY_NUM_FEAT) {
1023
+ FreeFeatureSet(Features);
1024
+ return (0);
1025
+ }
1026
+
1027
+ ComputeIntFeatures(Features, IntFeatures);
1028
+ *FloatFeatures = Features;
1029
+
1030
+ return (NumFeatures);
1031
+
1032
+ } /* GetAdaptiveFeatures */
1033
+
1034
+
1035
+ /**----------------------------------------------------------------------------
1036
+ Private Code
1037
+ ----------------------------------------------------------------------------**/
1038
+ /*---------------------------------------------------------------------------*/
1039
+ int AdaptableWord(TWERD *Word,
1040
+ const char *BestChoice,
1041
+ const char *BestChoice_lengths,
1042
+ const char *BestRawChoice,
1043
+ const char *BestRawChoice_lengths) {
1044
+ /*
1045
+ ** Parameters:
1046
+ ** Word
1047
+ current word
1048
+ ** BestChoice
1049
+ best overall choice for word with context
1050
+ ** BestRawChoice
1051
+ best choice for word without context
1052
+ ** Globals: none
1053
+ ** Operation: Return TRUE if the specified word is acceptable for
1054
+ ** adaptation.
1055
+ ** Return: TRUE or FALSE
1056
+ ** Exceptions: none
1057
+ ** History: Thu May 30 14:25:06 1991, DSJ, Created.
1058
+ */
1059
+ int BestChoiceLength;
1060
+
1061
+ return ( /* rules that apply in general - simplest to compute first */
1062
+ /* EnableLearning && */
1063
+ /* new rules */
1064
+ BestChoice != NULL && BestRawChoice != NULL && Word != NULL &&
1065
+ (BestChoiceLength = strlen (BestChoice_lengths)) > 0 &&
1066
+ BestChoiceLength == NumBlobsIn (Word) &&
1067
+ BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && (
1068
+ (EnableNewAdaptRules
1069
+ &&
1070
+ CurrentBestChoiceAdjustFactor
1071
+ ()
1072
+ <=
1073
+ ADAPTABLE_WERD
1074
+ &&
1075
+ AlternativeChoicesWorseThan
1076
+ (ADAPTABLE_WERD)
1077
+ &&
1078
+ CurrentBestChoiceIs
1079
+ (BestChoice, BestChoice_lengths))
1080
+ ||
1081
+ /* old rules */
1082
+ (!EnableNewAdaptRules
1083
+ &&
1084
+ BestChoiceLength
1085
+ ==
1086
+ strlen
1087
+ (BestRawChoice_lengths)
1088
+ &&
1089
+ ((valid_word (BestChoice) && case_ok (BestChoice, BestChoice_lengths)) || (valid_number (BestChoice, BestChoice_lengths) && pure_number (BestChoice, BestChoice_lengths))) && punctuation_ok (BestChoice, BestChoice_lengths) != -1 && punctuation_ok (BestChoice, BestChoice_lengths) <= 1)));
1090
+
1091
+ } /* AdaptableWord */
1092
+
1093
+
1094
+ /*---------------------------------------------------------------------------*/
1095
+ void AdaptToChar(TBLOB *Blob,
1096
+ LINE_STATS *LineStats,
1097
+ CLASS_ID ClassId,
1098
+ FLOAT32 Threshold) {
1099
+ /*
1100
+ ** Parameters:
1101
+ ** Blob
1102
+ blob to add to templates for ClassId
1103
+ ** LineStats
1104
+ statistics about text line blob is in
1105
+ ** ClassId
1106
+ class to add blob to
1107
+ ** Threshold
1108
+ minimum match rating to existing template
1109
+ ** Globals:
1110
+ ** AdaptedTemplates
1111
+ current set of adapted templates
1112
+ ** AllProtosOn
1113
+ dummy mask to match against all protos
1114
+ ** AllConfigsOn
1115
+ dummy mask to match against all configs
1116
+ ** Operation:
1117
+ ** Return: none
1118
+ ** Exceptions: none
1119
+ ** History: Thu Mar 14 09:36:03 1991, DSJ, Created.
1120
+ */
1121
+ int NumFeatures;
1122
+ INT_FEATURE_ARRAY IntFeatures;
1123
+ INT_RESULT_STRUCT IntResult;
1124
+ CLASS_INDEX ClassIndex;
1125
+ INT_CLASS IClass;
1126
+ ADAPT_CLASS Class;
1127
+ TEMP_CONFIG TempConfig;
1128
+ FEATURE_SET FloatFeatures;
1129
+ int NewTempConfigId;
1130
+
1131
+ NumCharsAdaptedTo++;
1132
+ if (!LegalClassId (ClassId))
1133
+ return;
1134
+
1135
+ if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
1136
+ MakeNewAdaptedClass(Blob, LineStats, ClassId, AdaptedTemplates);
1137
+ }
1138
+ else {
1139
+ IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId);
1140
+ ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
1141
+ Class = AdaptedTemplates->Class[ClassIndex];
1142
+
1143
+ NumFeatures = GetAdaptiveFeatures (Blob, LineStats,
1144
+ IntFeatures, &FloatFeatures);
1145
+ if (NumFeatures <= 0)
1146
+ return;
1147
+
1148
+ SetBaseLineMatch();
1149
+ IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
1150
+ NumFeatures, NumFeatures, IntFeatures, 0,
1151
+ &IntResult, NO_DEBUG);
1152
+
1153
+ SetAdaptiveThreshold(Threshold);
1154
+
1155
+ if (IntResult.Rating <= Threshold) {
1156
+ if (ConfigIsPermanent (Class, IntResult.Config)) {
1157
+ if (LearningDebugLevel >= 1)
1158
+ cprintf ("Found good match to perm config %d = %4.1f%%.\n",
1159
+ IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
1160
+ FreeFeatureSet(FloatFeatures);
1161
+ return;
1162
+ }
1163
+
1164
+ TempConfig = TempConfigFor (Class, IntResult.Config);
1165
+ IncreaseConfidence(TempConfig);
1166
+ if (LearningDebugLevel >= 1)
1167
+ cprintf ("Increasing reliability of temp config %d to %d.\n",
1168
+ IntResult.Config, TempConfig->NumTimesSeen);
1169
+
1170
+ if (TempConfigReliable (TempConfig))
1171
+ MakePermanent (AdaptedTemplates, ClassId, IntResult.Config,
1172
+ Blob, LineStats);
1173
+ }
1174
+ else {
1175
+ if (LearningDebugLevel >= 1)
1176
+ cprintf ("Found poor match to temp config %d = %4.1f%%.\n",
1177
+ IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
1178
+ NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates,
1179
+ ClassId,
1180
+ NumFeatures,
1181
+ IntFeatures,
1182
+ FloatFeatures);
1183
+
1184
+ if (NewTempConfigId >= 0 &&
1185
+ TempConfigReliable (TempConfigFor (Class, NewTempConfigId)))
1186
+ MakePermanent (AdaptedTemplates, ClassId, NewTempConfigId,
1187
+ Blob, LineStats);
1188
+
1189
+ #ifndef GRAPHICS_DISABLED
1190
+ if (LearningDebugLevel >= 1) {
1191
+ IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
1192
+ NumFeatures, NumFeatures, IntFeatures, 0,
1193
+ &IntResult, NO_DEBUG);
1194
+ cprintf ("Best match to temp config %d = %4.1f%%.\n",
1195
+ IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
1196
+ if (LearningDebugLevel >= 2) {
1197
+ uinT32 ConfigMask;
1198
+ ConfigMask = 1 << IntResult.Config;
1199
+ ShowMatchDisplay();
1200
+ IntegerMatcher (IClass, AllProtosOn, (BIT_VECTOR)&ConfigMask,
1201
+ NumFeatures, NumFeatures, IntFeatures, 0,
1202
+ &IntResult, 6 | 0x19);
1203
+ UpdateMatchDisplay();
1204
+ GetClassToDebug ("Adapting");
1205
+ }
1206
+ }
1207
+ #endif // GRAPHICS_DISABLED
1208
+ }
1209
+ FreeFeatureSet(FloatFeatures);
1210
+ }
1211
+ } /* AdaptToChar */
1212
+
1213
+
1214
+ /*---------------------------------------------------------------------------*/
1215
+ void AdaptToPunc(TBLOB *Blob,
1216
+ LINE_STATS *LineStats,
1217
+ CLASS_ID ClassId,
1218
+ FLOAT32 Threshold) {
1219
+ /*
1220
+ ** Parameters:
1221
+ ** Blob
1222
+ blob to add to templates for ClassId
1223
+ ** LineStats
1224
+ statistics about text line blob is in
1225
+ ** ClassId
1226
+ class to add blob to
1227
+ ** Threshold
1228
+ minimum match rating to existing template
1229
+ ** Globals:
1230
+ ** PreTrainedTemplates
1231
+ current set of built-in templates
1232
+ ** Operation:
1233
+ ** Return: none
1234
+ ** Exceptions: none
1235
+ ** History: Thu Mar 14 09:36:03 1991, DSJ, Created.
1236
+ */
1237
+ ADAPT_RESULTS Results;
1238
+ int i;
1239
+
1240
+ Results.BlobLength = MAX_INT32;
1241
+ Results.NumMatches = 0;
1242
+ Results.BestRating = WORST_POSSIBLE_RATING;
1243
+ Results.BestClass = NO_CLASS;
1244
+ Results.BestConfig = 0;
1245
+ InitMatcherRatings (Results.Ratings);
1246
+ CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
1247
+ RemoveBadMatches(&Results);
1248
+
1249
+ if (Results.NumMatches != 1) {
1250
+ if (LearningDebugLevel >= 1) {
1251
+ cprintf ("Rejecting punc = %s (Alternatives = ",
1252
+ unicharset.id_to_unichar(ClassId));
1253
+
1254
+ for (i = 0; i < Results.NumMatches; i++)
1255
+ cprintf ("%s", unicharset.id_to_unichar(Results.Classes[i]));
1256
+ cprintf (")\n");
1257
+ }
1258
+ return;
1259
+ }
1260
+
1261
+ #ifndef SECURE_NAMES
1262
+ if (LearningDebugLevel >= 1)
1263
+ cprintf ("Adapting to punc = %s, thr= %g\n",
1264
+ unicharset.id_to_unichar(ClassId), Threshold);
1265
+ #endif
1266
+ AdaptToChar(Blob, LineStats, ClassId, Threshold);
1267
+
1268
+ } /* AdaptToPunc */
1269
+
1270
+
1271
+ /*---------------------------------------------------------------------------*/
1272
+ void AddNewResult(ADAPT_RESULTS *Results,
1273
+ CLASS_ID ClassId,
1274
+ FLOAT32 Rating,
1275
+ int ConfigId) {
1276
+ /*
1277
+ ** Parameters:
1278
+ ** Results
1279
+ results to add new result to
1280
+ ** ClassId
1281
+ class of new result
1282
+ ** Rating
1283
+ rating of new result
1284
+ ** ConfigId
1285
+ config id of new result
1286
+ ** Globals:
1287
+ ** BadMatchPad
1288
+ defines limits of an acceptable match
1289
+ ** Operation: This routine adds the result of a classification into
1290
+ ** Results. If the new rating is much worse than the current
1291
+ ** best rating, it is not entered into results because it
1292
+ ** would end up being stripped later anyway. If the new rating
1293
+ ** is better than the old rating for the class, it replaces the
1294
+ ** old rating. If this is the first rating for the class, the
1295
+ ** class is added to the list of matched classes in Results.
1296
+ ** If the new rating is better than the best so far, it
1297
+ ** becomes the best so far.
1298
+ ** Return: none
1299
+ ** Exceptions: none
1300
+ ** History: Tue Mar 12 18:19:29 1991, DSJ, Created.
1301
+ */
1302
+ FLOAT32 OldRating;
1303
+ INT_CLASS_STRUCT* CharClass = NULL;
1304
+
1305
+ OldRating = Results->Ratings[ClassId];
1306
+ if (Rating <= Results->BestRating + BadMatchPad && Rating < OldRating) {
1307
+ Results->Ratings[ClassId] = Rating;
1308
+ if (ClassId != NO_CLASS)
1309
+ CharClass = ClassForClassId(PreTrainedTemplates, ClassId);
1310
+ if (CharClass != NULL && CharClass->NumConfigs == 32)
1311
+ Results->Configs[ClassId] = ConfigId;
1312
+ else
1313
+ Results->Configs[ClassId] = ~0;
1314
+
1315
+ if (Rating < Results->BestRating) {
1316
+ Results->BestRating = Rating;
1317
+ Results->BestClass = ClassId;
1318
+ Results->BestConfig = ConfigId;
1319
+ }
1320
+
1321
+ /* if this is first rating for class, add to list of classes matched */
1322
+ if (OldRating == WORST_POSSIBLE_RATING)
1323
+ Results->Classes[Results->NumMatches++] = ClassId;
1324
+ }
1325
+ } /* AddNewResult */
1326
+
1327
+
1328
+ /*---------------------------------------------------------------------------*/
1329
+ void AmbigClassifier(TBLOB *Blob,
1330
+ LINE_STATS *LineStats,
1331
+ INT_TEMPLATES Templates,
1332
+ UNICHAR_ID *Ambiguities,
1333
+ ADAPT_RESULTS *Results) {
1334
+ /*
1335
+ ** Parameters:
1336
+ ** Blob
1337
+ blob to be classified
1338
+ ** LineStats
1339
+ statistics for text line Blob is in
1340
+ ** Templates
1341
+ built-in templates to classify against
1342
+ ** Ambiguities
1343
+ array of class id's to match against
1344
+ ** Results
1345
+ place to put match results
1346
+ ** Globals:
1347
+ ** AllProtosOn
1348
+ mask that enables all protos
1349
+ ** AllConfigsOn
1350
+ mask that enables all configs
1351
+ ** Operation: This routine is identical to CharNormClassifier()
1352
+ ** except that it does no class pruning. It simply matches
1353
+ ** the unknown blob against the classes listed in
1354
+ ** Ambiguities.
1355
+ ** Return: none
1356
+ ** Exceptions: none
1357
+ ** History: Tue Mar 12 19:40:36 1991, DSJ, Created.
1358
+ */
1359
+ int NumFeatures;
1360
+ INT_FEATURE_ARRAY IntFeatures;
1361
+ CLASS_NORMALIZATION_ARRAY CharNormArray;
1362
+ INT_RESULT_STRUCT IntResult;
1363
+ CLASS_ID ClassId;
1364
+ CLASS_INDEX ClassIndex;
1365
+
1366
+ AmbigClassifierCalls++;
1367
+
1368
+ NumFeatures = GetCharNormFeatures (Blob, LineStats,
1369
+ Templates,
1370
+ IntFeatures, CharNormArray,
1371
+ &(Results->BlobLength));
1372
+ if (NumFeatures <= 0)
1373
+ return;
1374
+
1375
+ if (MatcherDebugLevel >= 2)
1376
+ cprintf ("AM Matches = ");
1377
+
1378
+ while (*Ambiguities >= 0) {
1379
+ ClassId = *Ambiguities;
1380
+ ClassIndex = Templates->IndexFor[ClassId];
1381
+
1382
+ SetCharNormMatch();
1383
+ IntegerMatcher (ClassForClassId (Templates, ClassId),
1384
+ AllProtosOn, AllConfigsOn,
1385
+ Results->BlobLength, NumFeatures, IntFeatures,
1386
+ CharNormArray[ClassIndex], &IntResult, NO_DEBUG);
1387
+
1388
+ if (MatcherDebugLevel >= 2)
1389
+ cprintf ("%s-%-2d %2.0f ", unicharset.id_to_unichar(ClassId),
1390
+ IntResult.Config,
1391
+ IntResult.Rating * 100.0);
1392
+
1393
+ AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
1394
+
1395
+ Ambiguities++;
1396
+
1397
+ NumAmbigClassesTried++;
1398
+ }
1399
+ if (MatcherDebugLevel >= 2)
1400
+ cprintf ("\n");
1401
+
1402
+ } /* AmbigClassifier */
1403
+
1404
+ /*---------------------------------------------------------------------------*/
1405
+ // Factored-out calls to IntegerMatcher based on class pruner results.
1406
+ // Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.
1407
+ void MasterMatcher(INT_TEMPLATES templates,
1408
+ inT16 num_features,
1409
+ INT_FEATURE_ARRAY features,
1410
+ CLASS_NORMALIZATION_ARRAY norm_factors,
1411
+ ADAPT_CLASS* classes,
1412
+ int debug,
1413
+ int num_classes,
1414
+ CLASS_PRUNER_RESULTS results,
1415
+ ADAPT_RESULTS* final_results) {
1416
+ for (int c = 0; c < num_classes; c++) {
1417
+ CLASS_ID class_id = results[c].Class;
1418
+ INT_RESULT_STRUCT& int_result = results[c].IMResult;
1419
+ CLASS_INDEX class_index = templates->IndexFor[class_id];
1420
+ BIT_VECTOR protos = classes != NULL ? classes[class_index]->PermProtos
1421
+ : AllProtosOn;
1422
+ BIT_VECTOR configs = classes != NULL ? classes[class_index]->PermConfigs
1423
+ : AllConfigsOn;
1424
+
1425
+ IntegerMatcher(ClassForClassId(templates, class_id),
1426
+ protos, configs, final_results->BlobLength,
1427
+ num_features, features, norm_factors[class_index],
1428
+ &int_result, NO_DEBUG);
1429
+ // Compute class feature corrections.
1430
+ double miss_penalty = tessedit_class_miss_scale *
1431
+ int_result.FeatureMisses;
1432
+ if (MatcherDebugLevel >= 2 || display_ratings > 1) {
1433
+ cprintf("%s-%-2d %2.1f(CP%2.1f, IM%2.1f + MP%2.1f) ",
1434
+ unicharset.id_to_unichar(class_id), int_result.Config,
1435
+ (int_result.Rating + miss_penalty) * 100.0,
1436
+ results[c].Rating * 100.0,
1437
+ int_result.Rating * 100.0, miss_penalty * 100.0);
1438
+ if (c % 4 == 3)
1439
+ cprintf ("\n");
1440
+ }
1441
+ int_result.Rating += miss_penalty;
1442
+ if (int_result.Rating > WORST_POSSIBLE_RATING)
1443
+ int_result.Rating = WORST_POSSIBLE_RATING;
1444
+ AddNewResult(final_results, class_id, int_result.Rating, int_result.Config);
1445
+ }
1446
+ if (MatcherDebugLevel >= 2 || display_ratings > 1)
1447
+ cprintf("\n");
1448
+ }
1449
+
1450
+ /*---------------------------------------------------------------------------*/
1451
+ UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
1452
+ LINE_STATS *LineStats,
1453
+ ADAPT_TEMPLATES Templates,
1454
+ ADAPT_RESULTS *Results) {
1455
+ /*
1456
+ ** Parameters:
1457
+ ** Blob
1458
+ blob to be classified
1459
+ ** LineStats
1460
+ statistics for text line Blob is in
1461
+ ** Templates
1462
+ current set of adapted templates
1463
+ ** Results
1464
+ place to put match results
1465
+ ** Globals:
1466
+ ** BaselineCutoffs
1467
+ expected num features for each class
1468
+ ** Operation: This routine extracts baseline normalized features
1469
+ ** from the unknown character and matches them against the
1470
+ ** specified set of templates. The classes which match
1471
+ ** are added to Results.
1472
+ ** Return: Array of possible ambiguous chars that should be checked.
1473
+ ** Exceptions: none
1474
+ ** History: Tue Mar 12 19:38:03 1991, DSJ, Created.
1475
+ */
1476
+ int NumFeatures;
1477
+ int NumClasses;
1478
+ INT_FEATURE_ARRAY IntFeatures;
1479
+ CLASS_NORMALIZATION_ARRAY CharNormArray;
1480
+ CLASS_ID ClassId;
1481
+ CLASS_INDEX ClassIndex;
1482
+
1483
+ BaselineClassifierCalls++;
1484
+
1485
+ NumFeatures = GetBaselineFeatures (Blob, LineStats,
1486
+ Templates->Templates,
1487
+ IntFeatures, CharNormArray,
1488
+ &(Results->BlobLength));
1489
+ if (NumFeatures <= 0)
1490
+ return NULL;
1491
+
1492
+ NumClasses = ClassPruner (Templates->Templates, NumFeatures,
1493
+ IntFeatures, CharNormArray,
1494
+ BaselineCutoffs, Results->CPResults,
1495
+ MatchDebugFlags);
1496
+
1497
+ NumBaselineClassesTried += NumClasses;
1498
+
1499
+ if (MatcherDebugLevel >= 2 || display_ratings > 1)
1500
+ cprintf ("BL Matches = ");
1501
+
1502
+ SetBaseLineMatch();
1503
+ MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray,
1504
+ Templates->Class, MatchDebugFlags, NumClasses,
1505
+ Results->CPResults, Results);
1506
+
1507
+ ClassId = Results->BestClass;
1508
+ if (ClassId == NO_CLASS)
1509
+ return (NULL);
1510
+ /* this is a bug - maybe should return "" */
1511
+
1512
+ ClassIndex = Templates->Templates->IndexFor[ClassId];
1513
+ return (Templates->Class[ClassIndex]->
1514
+ Config[Results->BestConfig].Perm);
1515
+ } /* BaselineClassifier */
1516
+
1517
+
1518
+ /*---------------------------------------------------------------------------*/
1519
+ void CharNormClassifier(TBLOB *Blob,
1520
+ LINE_STATS *LineStats,
1521
+ INT_TEMPLATES Templates,
1522
+ ADAPT_RESULTS *Results) {
1523
+ /*
1524
+ ** Parameters:
1525
+ ** Blob
1526
+ blob to be classified
1527
+ ** LineStats
1528
+ statistics for text line Blob is in
1529
+ ** Templates
1530
+ templates to classify unknown against
1531
+ ** Results
1532
+ place to put match results
1533
+ ** Globals:
1534
+ ** CharNormCutoffs
1535
+ expected num features for each class
1536
+ ** AllProtosOn
1537
+ mask that enables all protos
1538
+ ** AllConfigsOn
1539
+ mask that enables all configs
1540
+ ** Operation: This routine extracts character normalized features
1541
+ ** from the unknown character and matches them against the
1542
+ ** specified set of templates. The classes which match
1543
+ ** are added to Results.
1544
+ ** Return: none
1545
+ ** Exceptions: none
1546
+ ** History: Tue Mar 12 16:02:52 1991, DSJ, Created.
1547
+ */
1548
+ int NumFeatures;
1549
+ int NumClasses;
1550
+ INT_FEATURE_ARRAY IntFeatures;
1551
+ CLASS_NORMALIZATION_ARRAY CharNormArray;
1552
+
1553
+ CharNormClassifierCalls++;
1554
+
1555
+ NumFeatures = GetCharNormFeatures(Blob, LineStats,
1556
+ Templates,
1557
+ IntFeatures, CharNormArray,
1558
+ &(Results->BlobLength));
1559
+ if (NumFeatures <= 0)
1560
+ return;
1561
+
1562
+ NumClasses = ClassPruner(Templates, NumFeatures,
1563
+ IntFeatures, CharNormArray,
1564
+ CharNormCutoffs, Results->CPResults,
1565
+ MatchDebugFlags);
1566
+
1567
+ if (tessedit_single_match && NumClasses > 1)
1568
+ NumClasses = 1;
1569
+ NumCharNormClassesTried += NumClasses;
1570
+
1571
+ if (MatcherDebugLevel >= 2 || display_ratings > 1)
1572
+ cprintf("CN Matches = ");
1573
+
1574
+ SetCharNormMatch();
1575
+ MasterMatcher(Templates, NumFeatures, IntFeatures, CharNormArray,
1576
+ NULL, MatchDebugFlags, NumClasses,
1577
+ Results->CPResults, Results);
1578
+ } /* CharNormClassifier */
1579
+
1580
+
1581
+ /*---------------------------------------------------------------------------*/
1582
+ void ClassifyAsNoise(TBLOB *Blob,
1583
+ LINE_STATS *LineStats,
1584
+ ADAPT_RESULTS *Results) {
1585
+ /*
1586
+ ** Parameters:
1587
+ ** Blob
1588
+ blob to be classified
1589
+ ** LineStats
1590
+ statistics for text line Blob is in
1591
+ ** Results
1592
+ results to add noise classification to
1593
+ ** Globals:
1594
+ ** NoiseBlobLength
1595
+ avg. length of a noise blob
1596
+ ** Operation: This routine computes a rating which reflects the
1597
+ ** likelihood that the blob being classified is a noise
1598
+ ** blob. NOTE: assumes that the blob length has already been
1599
+ ** computed and placed into Results.
1600
+ ** Return: none
1601
+ ** Exceptions: none
1602
+ ** History: Tue Mar 12 18:36:52 1991, DSJ, Created.
1603
+ */
1604
+ register FLOAT32 Rating;
1605
+
1606
+ Rating = Results->BlobLength / NoiseBlobLength;
1607
+ Rating *= Rating;
1608
+ Rating /= 1.0 + Rating;
1609
+
1610
+ AddNewResult (Results, NO_CLASS, Rating, 0);
1611
+ } /* ClassifyAsNoise */
1612
+
1613
+
1614
+ /*---------------------------------------------------------------------------*/
1615
+ int CompareCurrentRatings( //CLASS_ID *Class1,
1616
+ const void *arg1,
1617
+ const void *arg2) { //CLASS_ID *Class2)
1618
+ /*
1619
+ ** Parameters:
1620
+ ** Class1, Class2
1621
+ classes whose ratings are to be compared
1622
+ ** Globals:
1623
+ ** CurrentRatings
1624
+ contains actual ratings for each class
1625
+ ** Operation: This routine gets the ratings for the 2 specified classes
1626
+ ** from a global variable (CurrentRatings) and returns:
1627
+ ** -1 if Rating1 < Rating2
1628
+ ** 0 if Rating1 = Rating2
1629
+ ** 1 if Rating1 > Rating2
1630
+ ** Return: Order of classes based on their ratings (see above).
1631
+ ** Exceptions: none
1632
+ ** History: Tue Mar 12 14:18:31 1991, DSJ, Created.
1633
+ */
1634
+ FLOAT32 Rating1, Rating2;
1635
+ CLASS_ID *Class1 = (CLASS_ID *) arg1;
1636
+ CLASS_ID *Class2 = (CLASS_ID *) arg2;
1637
+
1638
+ Rating1 = CurrentRatings[*Class1];
1639
+ Rating2 = CurrentRatings[*Class2];
1640
+
1641
+ if (Rating1 < Rating2)
1642
+ return (-1);
1643
+ else if (Rating1 > Rating2)
1644
+ return (1);
1645
+ else
1646
+ return (0);
1647
+
1648
+ } /* CompareCurrentRatings */
1649
+
1650
+
1651
+ /*---------------------------------------------------------------------------*/
1652
+ LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results) {
1653
+ /*
1654
+ ** Parameters:
1655
+ ** Results
1656
+ adaptive matcher results to convert to choices
1657
+ ** Globals: none
1658
+ ** Operation: This routine creates a choice for each matching class
1659
+ ** in Results (up to MAX_MATCHES) and returns a list of
1660
+ ** these choices. The match
1661
+ ** ratings are converted to be the ratings and certainties
1662
+ ** as used by the context checkers.
1663
+ ** Return: List of choices.
1664
+ ** Exceptions: none
1665
+ ** History: Tue Mar 12 08:55:37 1991, DSJ, Created.
1666
+ */
1667
+ int i;
1668
+ LIST Choices;
1669
+ CLASS_ID NextMatch;
1670
+ FLOAT32 Rating;
1671
+ FLOAT32 Certainty;
1672
+ const char *NextMatch_unichar;
1673
+ char choice_lengths[2] = {0, 0};
1674
+
1675
+ if (Results->NumMatches > MAX_MATCHES)
1676
+ Results->NumMatches = MAX_MATCHES;
1677
+
1678
+ for (Choices = NIL, i = 0; i < Results->NumMatches; i++) {
1679
+ NextMatch = Results->Classes[i];
1680
+ Rating = Certainty = Results->Ratings[NextMatch];
1681
+ Rating *= RatingScale * Results->BlobLength;
1682
+ Certainty *= -CertaintyScale;
1683
+ if (NextMatch != NO_CLASS)
1684
+ NextMatch_unichar = unicharset.id_to_unichar(NextMatch);
1685
+ else
1686
+ NextMatch_unichar = "";
1687
+ choice_lengths[0] = strlen(NextMatch_unichar);
1688
+ Choices = append_choice (Choices,
1689
+ NextMatch_unichar,
1690
+ choice_lengths,
1691
+ Rating, Certainty,
1692
+ Results->Configs[NextMatch],
1693
+ unicharset.get_script(NextMatch));
1694
+ }
1695
+ return (Choices);
1696
+
1697
+ } /* ConvertMatchesToChoices */
1698
+
1699
+
1700
+ /*---------------------------------------------------------------------------*/
1701
+ #ifndef GRAPHICS_DISABLED
1702
+ void DebugAdaptiveClassifier(TBLOB *Blob,
1703
+ LINE_STATS *LineStats,
1704
+ ADAPT_RESULTS *Results) {
1705
+ /*
1706
+ ** Parameters:
1707
+ ** Blob
1708
+ blob whose classification is being debugged
1709
+ ** LineStats
1710
+ statistics for text line blob is in
1711
+ ** Results
1712
+ results of match being debugged
1713
+ ** Globals: none
1714
+ ** Operation:
1715
+ ** Return: none
1716
+ ** Exceptions: none
1717
+ ** History: Wed Mar 13 16:44:41 1991, DSJ, Created.
1718
+ */
1719
+ const char *Prompt =
1720
+ "Left-click in IntegerMatch Window to continue or right click to debug...";
1721
+ const char *DebugMode = "All Templates";
1722
+ CLASS_ID LastClass = Results->BestClass;
1723
+ CLASS_ID ClassId;
1724
+ BOOL8 AdaptiveOn = TRUE;
1725
+ BOOL8 PreTrainedOn = TRUE;
1726
+
1727
+ ShowMatchDisplay();
1728
+ cprintf ("\nDebugging class = %s (%s) ...\n",
1729
+ unicharset.id_to_unichar(LastClass), DebugMode);
1730
+ ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
1731
+ UpdateMatchDisplay();
1732
+
1733
+ while ((ClassId = GetClassToDebug (Prompt)) != 0) {
1734
+ #if 0
1735
+ switch (ClassId) {
1736
+ case 'b':
1737
+ AdaptiveOn = TRUE;
1738
+ PreTrainedOn = FALSE;
1739
+ DebugMode = "Adaptive Templates Only";
1740
+ break;
1741
+
1742
+ case 'c':
1743
+ AdaptiveOn = FALSE;
1744
+ PreTrainedOn = TRUE;
1745
+ DebugMode = "PreTrained Templates Only";
1746
+ break;
1747
+
1748
+ case 'a':
1749
+ AdaptiveOn = TRUE;
1750
+ PreTrainedOn = TRUE;
1751
+ DebugMode = "All Templates";
1752
+ break;
1753
+
1754
+ default:
1755
+ LastClass = ClassId;
1756
+ break;
1757
+ }
1758
+ #endif
1759
+ LastClass = ClassId;
1760
+
1761
+ ShowMatchDisplay();
1762
+ cprintf ("\nDebugging class = %d = %s (%s) ...\n",
1763
+ LastClass, unicharset.id_to_unichar(LastClass), DebugMode);
1764
+ ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
1765
+ UpdateMatchDisplay();
1766
+ }
1767
+ } /* DebugAdaptiveClassifier */
1768
+ #endif
1769
+
1770
+ /*---------------------------------------------------------------------------*/
1771
+ void DoAdaptiveMatch(TBLOB *Blob,
1772
+ LINE_STATS *LineStats,
1773
+ ADAPT_RESULTS *Results) {
1774
+ /*
1775
+ ** Parameters:
1776
+ ** Blob
1777
+ blob to be classified
1778
+ ** LineStats
1779
+ statistics for text line Blob is in
1780
+ ** Results
1781
+ place to put match results
1782
+ ** Globals:
1783
+ ** PreTrainedTemplates
1784
+ built-in training templates
1785
+ ** AdaptedTemplates
1786
+ templates adapted for this page
1787
+ ** GreatAdaptiveMatch
1788
+ rating limit for a great match
1789
+ ** Operation: This routine performs an adaptive classification.
1790
+ ** If we have not yet adapted to enough classes, a simple
1791
+ ** classification to the pre-trained templates is performed.
1792
+ ** Otherwise, we match the blob against the adapted templates.
1793
+ ** If the adapted templates do not match well, we try a
1794
+ ** match against the pre-trained templates. If an adapted
1795
+ ** template match is found, we do a match to any pre-trained
1796
+ ** templates which could be ambiguous. The results from all
1797
+ ** of these classifications are merged together into Results.
1798
+ ** Return: none
1799
+ ** Exceptions: none
1800
+ ** History: Tue Mar 12 08:50:11 1991, DSJ, Created.
1801
+ */
1802
+ UNICHAR_ID *Ambiguities;
1803
+
1804
+ AdaptiveMatcherCalls++;
1805
+ InitIntFX();
1806
+
1807
+ if (AdaptedTemplates->NumPermClasses < MinNumPermClasses
1808
+ || tess_cn_matching) {
1809
+ CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
1810
+ }
1811
+ else {
1812
+ Ambiguities = BaselineClassifier (Blob, LineStats,
1813
+ AdaptedTemplates, Results);
1814
+
1815
+ if ((Results->NumMatches > 0 && MarginalMatch (Results->BestRating)
1816
+ && !tess_bn_matching) || Results->NumMatches == 0) {
1817
+ CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
1818
+ }
1819
+ else if (Ambiguities && *Ambiguities >= 0) {
1820
+ AmbigClassifier(Blob,
1821
+ LineStats,
1822
+ PreTrainedTemplates,
1823
+ Ambiguities,
1824
+ Results);
1825
+ }
1826
+ }
1827
+
1828
+ if (Results->NumMatches == 0)
1829
+ ClassifyAsNoise(Blob, LineStats, Results);
1830
+ /**/} /* DoAdaptiveMatch */
1831
+
1832
+ /*---------------------------------------------------------------------------*/
1833
+ void
1834
+ GetAdaptThresholds (TWERD * Word,
1835
+ LINE_STATS * LineStats,
1836
+ const WERD_CHOICE& BestChoice,
1837
+ const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]) {
1838
+ /*
1839
+ ** Parameters:
1840
+ ** Word
1841
+ current word
1842
+ ** LineStats
1843
+ line stats for row word is in
1844
+ ** BestChoice
1845
+ best choice for current word with context
1846
+ ** BestRawChoice
1847
+ best choice for current word without context
1848
+ ** Thresholds
1849
+ array of thresholds to be filled in
1850
+ ** Globals:
1851
+ ** EnableNewAdaptRules
1852
+ ** GoodAdaptiveMatch
1853
+ ** PerfectRating
1854
+ ** RatingMargin
1855
+ ** Operation: This routine tries to estimate how tight the adaptation
1856
+ ** threshold should be set for each character in the current
1857
+ ** word. In general, the routine tries to set tighter
1858
+ ** thresholds for a character when the current set of templates
1859
+ ** would have made an error on that character. It tries
1860
+ ** to set a threshold tight enough to eliminate the error.
1861
+ ** Two different sets of rules can be used to determine the
1862
+ ** desired thresholds.
1863
+ ** Return: none (results are returned in Thresholds)
1864
+ ** Exceptions: none
1865
+ ** History: Fri May 31 09:22:08 1991, DSJ, Created.
1866
+ */
1867
+ TBLOB *Blob;
1868
+ const char* BestChoice_string = BestChoice.string().string();
1869
+ const char* BestChoice_lengths = BestChoice.lengths().string();
1870
+ const char* BestRawChoice_string = BestRawChoice.string().string();
1871
+ const char* BestRawChoice_lengths = BestRawChoice.lengths().string();
1872
+
1873
+ if (EnableNewAdaptRules && /* new rules */
1874
+ CurrentBestChoiceIs (BestChoice_string, BestChoice_lengths)) {
1875
+ FindClassifierErrors(PerfectRating,
1876
+ GoodAdaptiveMatch,
1877
+ RatingMargin,
1878
+ Thresholds);
1879
+ }
1880
+ else { /* old rules */
1881
+ for (Blob = Word->blobs;
1882
+ Blob != NULL;
1883
+ Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++),
1884
+ BestRawChoice_string += *(BestRawChoice_lengths++), Thresholds++)
1885
+ if (*(BestChoice_lengths) == *(BestRawChoice_lengths) &&
1886
+ strncmp(BestChoice_string, BestRawChoice_string,
1887
+ *(BestChoice_lengths)) == 0)
1888
+ *Thresholds = GoodAdaptiveMatch;
1889
+ else {
1890
+ /* the blob was incorrectly classified - find the rating threshold
1891
+ needed to create a template which will correct the error with
1892
+ some margin. However, don't waste time trying to make
1893
+ templates which are too tight. */
1894
+ *Thresholds = GetBestRatingFor (Blob, LineStats,
1895
+ unicharset.unichar_to_id(
1896
+ BestChoice_string,
1897
+ *BestChoice_lengths));
1898
+ *Thresholds *= (1.0 - RatingMargin);
1899
+ if (*Thresholds > GoodAdaptiveMatch)
1900
+ *Thresholds = GoodAdaptiveMatch;
1901
+ if (*Thresholds < PerfectRating)
1902
+ *Thresholds = PerfectRating;
1903
+ }
1904
+ }
1905
+ } /* GetAdaptThresholds */
1906
+
1907
+ /*---------------------------------------------------------------------------*/
1908
+ UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
1909
+ LINE_STATS *LineStats,
1910
+ CLASS_ID CorrectClass) {
1911
+ /*
1912
+ ** Parameters:
1913
+ ** Blob
1914
+ blob to get classification ambiguities for
1915
+ ** LineStats
1916
+ statistics for text line blob is in
1917
+ ** CorrectClass
1918
+ correct class for Blob
1919
+ ** Globals:
1920
+ ** CurrentRatings
1921
+ used by qsort compare routine
1922
+ ** PreTrainedTemplates
1923
+ built-in templates
1924
+ ** Operation: This routine matches blob to the built-in templates
1925
+ ** to find out if there are any classes other than the correct
1926
+ ** class which are potential ambiguities.
1927
+ ** Return: String containing all possible ambiguous classes.
1928
+ ** Exceptions: none
1929
+ ** History: Fri Mar 15 08:08:22 1991, DSJ, Created.
1930
+ */
1931
+ ADAPT_RESULTS Results;
1932
+ UNICHAR_ID *Ambiguities;
1933
+ int i;
1934
+
1935
+ EnterClassifyMode;
1936
+
1937
+ Results.NumMatches = 0;
1938
+ Results.BestRating = WORST_POSSIBLE_RATING;
1939
+ Results.BestClass = NO_CLASS;
1940
+ Results.BestConfig = 0;
1941
+ InitMatcherRatings (Results.Ratings);
1942
+
1943
+ CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
1944
+ RemoveBadMatches(&Results);
1945
+
1946
+ /* save ratings in a global so that CompareCurrentRatings() can see them */
1947
+ CurrentRatings = Results.Ratings;
1948
+ qsort ((void *) (Results.Classes), Results.NumMatches,
1949
+ sizeof (CLASS_ID), CompareCurrentRatings);
1950
+
1951
+ /* copy the class id's into an string of ambiguities - don't copy if
1952
+ the correct class is the only class id matched */
1953
+ Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) *
1954
+ (Results.NumMatches + 1));
1955
+ if (Results.NumMatches > 1 ||
1956
+ (Results.NumMatches == 1 && Results.Classes[0] != CorrectClass)) {
1957
+ for (i = 0; i < Results.NumMatches; i++)
1958
+ Ambiguities[i] = Results.Classes[i];
1959
+ Ambiguities[i] = -1;
1960
+ }
1961
+ else
1962
+ Ambiguities[0] = -1;
1963
+
1964
+ return (Ambiguities);
1965
+
1966
+ } /* GetAmbiguities */
1967
+
1968
+ /*---------------------------------------------------------------------------*/
1969
+ int GetBaselineFeatures(TBLOB *Blob,
1970
+ LINE_STATS *LineStats,
1971
+ INT_TEMPLATES Templates,
1972
+ INT_FEATURE_ARRAY IntFeatures,
1973
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
1974
+ inT32 *BlobLength) {
1975
+ /*
1976
+ ** Parameters:
1977
+ ** Blob
1978
+ blob to extract features from
1979
+ ** LineStats
1980
+ statistics about text row blob is in
1981
+ ** Templates
1982
+ used to compute char norm adjustments
1983
+ ** IntFeatures
1984
+ array to fill with integer features
1985
+ ** CharNormArray
1986
+ array to fill with dummy char norm adjustments
1987
+ ** BlobLength
1988
+ length of blob in baseline-normalized units
1989
+ ** Globals: none
1990
+ ** Operation: This routine sets up the feature extractor to extract
1991
+ ** baseline normalized pico-features.
1992
+ ** The extracted pico-features are converted
1993
+ ** to integer form and placed in IntFeatures. CharNormArray
1994
+ ** is filled with 0's to indicate to the matcher that no
1995
+ ** character normalization adjustment needs to be done.
1996
+ ** The total length of all blob outlines
1997
+ ** in baseline normalized units is also returned.
1998
+ ** Return: Number of pico-features returned (0 if an error occurred)
1999
+ ** Exceptions: none
2000
+ ** History: Tue Mar 12 17:55:18 1991, DSJ, Created.
2001
+ */
2002
+ FEATURE_SET Features;
2003
+ int NumFeatures;
2004
+
2005
+ if (EnableIntFX)
2006
+ return (GetIntBaselineFeatures (Blob, LineStats, Templates,
2007
+ IntFeatures, CharNormArray, BlobLength));
2008
+
2009
+ NormMethod = baseline;
2010
+ Features = ExtractPicoFeatures (Blob, LineStats);
2011
+
2012
+ NumFeatures = Features->NumFeatures;
2013
+ *BlobLength = NumFeatures;
2014
+ if (NumFeatures > UNLIKELY_NUM_FEAT) {
2015
+ FreeFeatureSet(Features);
2016
+ return (0);
2017
+ }
2018
+
2019
+ ComputeIntFeatures(Features, IntFeatures);
2020
+ ClearCharNormArray(Templates, CharNormArray);
2021
+
2022
+ FreeFeatureSet(Features);
2023
+ return (NumFeatures);
2024
+
2025
+ } /* GetBaselineFeatures */
2026
+
2027
+ /*---------------------------------------------------------------------------*/
2028
+ FLOAT32 GetBestRatingFor(TBLOB *Blob,
2029
+ LINE_STATS *LineStats,
2030
+ CLASS_ID ClassId) {
2031
+ /*
2032
+ ** Parameters:
2033
+ ** Blob
2034
+ blob to get best rating for
2035
+ ** LineStats
2036
+ statistics about text line blob is in
2037
+ ** ClassId
2038
+ class blob is to be compared to
2039
+ ** Globals:
2040
+ ** PreTrainedTemplates
2041
+ built-in templates
2042
+ ** AdaptedTemplates
2043
+ current set of adapted templates
2044
+ ** AllProtosOn
2045
+ dummy mask to enable all protos
2046
+ ** AllConfigsOn
2047
+ dummy mask to enable all configs
2048
+ ** Operation: This routine classifies Blob against both sets of
2049
+ ** templates for the specified class and returns the best
2050
+ ** rating found.
2051
+ ** Return: Best rating for match of Blob to ClassId.
2052
+ ** Exceptions: none
2053
+ ** History: Tue Apr 9 09:01:24 1991, DSJ, Created.
2054
+ */
2055
+ int NumCNFeatures, NumBLFeatures;
2056
+ INT_FEATURE_ARRAY CNFeatures, BLFeatures;
2057
+ INT_RESULT_STRUCT CNResult, BLResult;
2058
+ CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
2059
+ CLASS_INDEX ClassIndex;
2060
+ inT32 BlobLength;
2061
+
2062
+ CNResult.Rating = BLResult.Rating = 1.0;
2063
+
2064
+ if (!LegalClassId (ClassId))
2065
+ return (1.0);
2066
+
2067
+ if (!UnusedClassIdIn (PreTrainedTemplates, ClassId)) {
2068
+ NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
2069
+ PreTrainedTemplates,
2070
+ CNFeatures, CNAdjust, &BlobLength);
2071
+ if (NumCNFeatures > 0) {
2072
+ ClassIndex = PreTrainedTemplates->IndexFor[ClassId];
2073
+
2074
+ SetCharNormMatch();
2075
+ IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
2076
+ AllProtosOn, AllConfigsOn,
2077
+ BlobLength, NumCNFeatures, CNFeatures,
2078
+ CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
2079
+ }
2080
+ }
2081
+
2082
+ if (!UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
2083
+ NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
2084
+ AdaptedTemplates->Templates,
2085
+ BLFeatures, BLAdjust, &BlobLength);
2086
+ if (NumBLFeatures > 0) {
2087
+ ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
2088
+
2089
+ SetBaseLineMatch();
2090
+ IntegerMatcher (ClassForClassId
2091
+ (AdaptedTemplates->Templates, ClassId),
2092
+ AdaptedTemplates->Class[ClassIndex]->PermProtos,
2093
+ AdaptedTemplates->Class[ClassIndex]->PermConfigs,
2094
+ BlobLength, NumBLFeatures, BLFeatures,
2095
+ BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
2096
+ }
2097
+ }
2098
+
2099
+ return (MIN (BLResult.Rating, CNResult.Rating));
2100
+
2101
+ } /* GetBestRatingFor */
2102
+
2103
+ /*---------------------------------------------------------------------------*/
2104
+ int GetCharNormFeatures(TBLOB *Blob,
2105
+ LINE_STATS *LineStats,
2106
+ INT_TEMPLATES Templates,
2107
+ INT_FEATURE_ARRAY IntFeatures,
2108
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
2109
+ inT32 *BlobLength) {
2110
+ /*
2111
+ ** Parameters:
2112
+ ** Blob
2113
+ blob to extract features from
2114
+ ** LineStats
2115
+ statistics about text row blob is in
2116
+ ** Templates
2117
+ used to compute char norm adjustments
2118
+ ** IntFeatures
2119
+ array to fill with integer features
2120
+ ** CharNormArray
2121
+ array to fill with char norm adjustments
2122
+ ** BlobLength
2123
+ length of blob in baseline-normalized units
2124
+ ** Globals: none
2125
+ ** Operation: This routine sets up the feature extractor to extract
2126
+ ** character normalization features and character normalized
2127
+ ** pico-features. The extracted pico-features are converted
2128
+ ** to integer form and placed in IntFeatures. The character
2129
+ ** normalization features are matched to each class in
2130
+ ** templates and the resulting adjustment factors are returned
2131
+ ** in CharNormArray. The total length of all blob outlines
2132
+ ** in baseline normalized units is also returned.
2133
+ ** Return: Number of pico-features returned (0 if an error occurred)
2134
+ ** Exceptions: none
2135
+ ** History: Tue Mar 12 17:55:18 1991, DSJ, Created.
2136
+ */
2137
+ return (GetIntCharNormFeatures (Blob, LineStats, Templates,
2138
+ IntFeatures, CharNormArray, BlobLength));
2139
+ } /* GetCharNormFeatures */
2140
+
2141
+ /*---------------------------------------------------------------------------*/
2142
+ int GetIntBaselineFeatures(TBLOB *Blob,
2143
+ LINE_STATS *LineStats,
2144
+ INT_TEMPLATES Templates,
2145
+ INT_FEATURE_ARRAY IntFeatures,
2146
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
2147
+ inT32 *BlobLength) {
2148
+ /*
2149
+ ** Parameters:
2150
+ ** Blob
2151
+ blob to extract features from
2152
+ ** LineStats
2153
+ statistics about text row blob is in
2154
+ ** Templates
2155
+ used to compute char norm adjustments
2156
+ ** IntFeatures
2157
+ array to fill with integer features
2158
+ ** CharNormArray
2159
+ array to fill with dummy char norm adjustments
2160
+ ** BlobLength
2161
+ length of blob in baseline-normalized units
2162
+ ** Globals:
2163
+ ** FeaturesHaveBeenExtracted
2164
+ TRUE if fx has been done
2165
+ ** BaselineFeatures
2166
+ holds extracted baseline feat
2167
+ ** CharNormFeatures
2168
+ holds extracted char norm feat
2169
+ ** FXInfo
2170
+ holds misc. FX info
2171
+ ** Operation: This routine calls the integer (Hardware) feature
2172
+ ** extractor if it has not been called before for this blob.
2173
+ ** The results from the feature extractor are placed into
2174
+ ** globals so that they can be used in other routines without
2175
+ ** re-extracting the features.
2176
+ ** It then copies the baseline features into the IntFeatures
2177
+ ** array provided by the caller.
2178
+ ** Return: Number of features extracted or 0 if an error occured.
2179
+ ** Exceptions: none
2180
+ ** History: Tue May 28 10:40:52 1991, DSJ, Created.
2181
+ */
2182
+ register INT_FEATURE Src, Dest, End;
2183
+
2184
+ if (!FeaturesHaveBeenExtracted) {
2185
+ FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
2186
+ CharNormFeatures, &FXInfo);
2187
+ FeaturesHaveBeenExtracted = TRUE;
2188
+ }
2189
+
2190
+ if (!FeaturesOK) {
2191
+ *BlobLength = FXInfo.NumBL;
2192
+ return (0);
2193
+ }
2194
+
2195
+ for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures;
2196
+ Src < End; *Dest++ = *Src++);
2197
+
2198
+ ClearCharNormArray(Templates, CharNormArray);
2199
+ *BlobLength = FXInfo.NumBL;
2200
+ return (FXInfo.NumBL);
2201
+
2202
+ } /* GetIntBaselineFeatures */
2203
+
2204
+ /*---------------------------------------------------------------------------*/
2205
+ int GetIntCharNormFeatures(TBLOB *Blob,
2206
+ LINE_STATS *LineStats,
2207
+ INT_TEMPLATES Templates,
2208
+ INT_FEATURE_ARRAY IntFeatures,
2209
+ CLASS_NORMALIZATION_ARRAY CharNormArray,
2210
+ inT32 *BlobLength) {
2211
+ /*
2212
+ ** Parameters:
2213
+ ** Blob
2214
+ blob to extract features from
2215
+ ** LineStats
2216
+ statistics about text row blob is in
2217
+ ** Templates
2218
+ used to compute char norm adjustments
2219
+ ** IntFeatures
2220
+ array to fill with integer features
2221
+ ** CharNormArray
2222
+ array to fill with dummy char norm adjustments
2223
+ ** BlobLength
2224
+ length of blob in baseline-normalized units
2225
+ ** Globals:
2226
+ ** FeaturesHaveBeenExtracted
2227
+ TRUE if fx has been done
2228
+ ** BaselineFeatures
2229
+ holds extracted baseline feat
2230
+ ** CharNormFeatures
2231
+ holds extracted char norm feat
2232
+ ** FXInfo
2233
+ holds misc. FX info
2234
+ ** Operation: This routine calls the integer (Hardware) feature
2235
+ ** extractor if it has not been called before for this blob.
2236
+ ** The results from the feature extractor are placed into
2237
+ ** globals so that they can be used in other routines without
2238
+ ** re-extracting the features.
2239
+ ** It then copies the char norm features into the IntFeatures
2240
+ ** array provided by the caller.
2241
+ ** Return: Number of features extracted or 0 if an error occured.
2242
+ ** Exceptions: none
2243
+ ** History: Tue May 28 10:40:52 1991, DSJ, Created.
2244
+ */
2245
+ register INT_FEATURE Src, Dest, End;
2246
+ FEATURE NormFeature;
2247
+ FLOAT32 Baseline, Scale;
2248
+
2249
+ if (!FeaturesHaveBeenExtracted) {
2250
+ FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
2251
+ CharNormFeatures, &FXInfo);
2252
+ FeaturesHaveBeenExtracted = TRUE;
2253
+ }
2254
+
2255
+ if (!FeaturesOK) {
2256
+ *BlobLength = FXInfo.NumBL;
2257
+ return (0);
2258
+ }
2259
+
2260
+ for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures;
2261
+ Src < End; *Dest++ = *Src++);
2262
+
2263
+ NormFeature = NewFeature (&CharNormDesc);
2264
+ Baseline = BaselineAt (LineStats, FXInfo.Xmean);
2265
+ Scale = ComputeScaleFactor (LineStats);
2266
+ NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale;
2267
+ NormFeature->Params[CharNormLength] =
2268
+ FXInfo.Length * Scale / LENGTH_COMPRESSION;
2269
+ NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale;
2270
+ NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale;
2271
+ ComputeIntCharNormArray(NormFeature, Templates, CharNormArray);
2272
+ FreeFeature(NormFeature);
2273
+
2274
+ *BlobLength = FXInfo.NumBL;
2275
+ return (FXInfo.NumCN);
2276
+
2277
+ } /* GetIntCharNormFeatures */
2278
+
2279
+ /*---------------------------------------------------------------------------*/
2280
+ void InitMatcherRatings(register FLOAT32 *Rating) {
2281
+ /*
2282
+ ** Parameters:
2283
+ ** Rating
2284
+ ptr to array of ratings to be initialized
2285
+ ** Globals: none
2286
+ ** Operation: This routine initializes the best rating for each class
2287
+ ** to be the worst possible rating (1.0).
2288
+ ** Return: none
2289
+ ** Exceptions: none
2290
+ ** History: Tue Mar 12 13:43:28 1991, DSJ, Created.
2291
+ */
2292
+ register FLOAT32 *LastRating;
2293
+ register FLOAT32 WorstRating = WORST_POSSIBLE_RATING;
2294
+
2295
+ for (LastRating = Rating + MAX_CLASS_ID;
2296
+ Rating <= LastRating; *Rating++ = WorstRating);
2297
+
2298
+ } /* InitMatcherRatings */
2299
+
2300
+ /*---------------------------------------------------------------------------*/
2301
+ int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
2302
+ CLASS_ID ClassId,
2303
+ int NumFeatures,
2304
+ INT_FEATURE_ARRAY Features,
2305
+ FEATURE_SET FloatFeatures) {
2306
+ /*
2307
+ ** Parameters:
2308
+ ** Templates
2309
+ adapted templates to add new config to
2310
+ ** ClassId
2311
+ class id to associate with new config
2312
+ ** NumFeatures
2313
+ number of features in IntFeatures
2314
+ ** Features
2315
+ features describing model for new config
2316
+ ** FloatFeatures
2317
+ floating-pt representation of features
2318
+ ** Globals:
2319
+ ** AllProtosOn
2320
+ mask to enable all protos
2321
+ ** AllConfigsOff
2322
+ mask to disable all configs
2323
+ ** TempProtoMask
2324
+ defines old protos matched in new config
2325
+ ** Operation:
2326
+ ** Return: The id of the new config created, a negative integer in
2327
+ ** case of error.
2328
+ ** Exceptions: none
2329
+ ** History: Fri Mar 15 08:49:46 1991, DSJ, Created.
2330
+ */
2331
+ CLASS_INDEX ClassIndex;
2332
+ INT_CLASS IClass;
2333
+ ADAPT_CLASS Class;
2334
+ PROTO_ID OldProtos[MAX_NUM_PROTOS];
2335
+ FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
2336
+ int NumOldProtos;
2337
+ int NumBadFeatures;
2338
+ int MaxProtoId, OldMaxProtoId;
2339
+ int BlobLength = 0;
2340
+ int MaskSize;
2341
+ int ConfigId;
2342
+ TEMP_CONFIG Config;
2343
+ int i;
2344
+ int debug_level = NO_DEBUG;
2345
+
2346
+ if (LearningDebugLevel >= 3)
2347
+ debug_level =
2348
+ PRINT_MATCH_SUMMARY | PRINT_FEATURE_MATCHES | PRINT_PROTO_MATCHES;
2349
+
2350
+ ClassIndex = Templates->Templates->IndexFor[ClassId];
2351
+ IClass = ClassForClassId (Templates->Templates, ClassId);
2352
+ Class = Templates->Class[ClassIndex];
2353
+
2354
+ if (IClass->NumConfigs >= MAX_NUM_CONFIGS)
2355
+ {
2356
+ ++NumAdaptationsFailed;
2357
+ if (LearningDebugLevel >= 1)
2358
+ cprintf ("Cannot make new temporary config: maximum number exceeded.\n");
2359
+ return -1;
2360
+ }
2361
+
2362
+ OldMaxProtoId = IClass->NumProtos - 1;
2363
+
2364
+ NumOldProtos = FindGoodProtos (IClass, AllProtosOn, AllConfigsOff,
2365
+ BlobLength, NumFeatures, Features,
2366
+ OldProtos, debug_level);
2367
+
2368
+ MaskSize = WordsInVectorOfSize (MAX_NUM_PROTOS);
2369
+ zero_all_bits(TempProtoMask, MaskSize);
2370
+ for (i = 0; i < NumOldProtos; i++)
2371
+ SET_BIT (TempProtoMask, OldProtos[i]);
2372
+
2373
+ NumBadFeatures = FindBadFeatures (IClass, TempProtoMask, AllConfigsOn,
2374
+ BlobLength, NumFeatures, Features,
2375
+ BadFeatures, debug_level);
2376
+
2377
+ MaxProtoId = MakeNewTempProtos (FloatFeatures, NumBadFeatures, BadFeatures,
2378
+ IClass, Class, TempProtoMask);
2379
+ if (MaxProtoId == NO_PROTO)
2380
+ {
2381
+ ++NumAdaptationsFailed;
2382
+ if (LearningDebugLevel >= 1)
2383
+ cprintf ("Cannot make new temp protos: maximum number exceeded.\n");
2384
+ return -1;
2385
+ }
2386
+
2387
+ ConfigId = AddIntConfig (IClass);
2388
+ ConvertConfig(TempProtoMask, ConfigId, IClass);
2389
+ Config = NewTempConfig (MaxProtoId);
2390
+ TempConfigFor (Class, ConfigId) = Config;
2391
+ copy_all_bits (TempProtoMask, Config->Protos, Config->ProtoVectorSize);
2392
+
2393
+ if (LearningDebugLevel >= 1)
2394
+ cprintf ("Making new temp config %d using %d old and %d new protos.\n",
2395
+ ConfigId, NumOldProtos, MaxProtoId - OldMaxProtoId);
2396
+
2397
+ return ConfigId;
2398
+ } /* MakeNewTemporaryConfig */
2399
+
2400
+ /*---------------------------------------------------------------------------*/
2401
+ PROTO_ID
2402
+ MakeNewTempProtos (FEATURE_SET Features,
2403
+ int NumBadFeat,
2404
+ FEATURE_ID BadFeat[],
2405
+ INT_CLASS IClass,
2406
+ ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) {
2407
+ /*
2408
+ ** Parameters:
2409
+ ** Features
2410
+ floating-pt features describing new character
2411
+ ** NumBadFeat
2412
+ number of bad features to turn into protos
2413
+ ** BadFeat
2414
+ feature id's of bad features
2415
+ ** IClass
2416
+ integer class templates to add new protos to
2417
+ ** Class
2418
+ adapted class templates to add new protos to
2419
+ ** TempProtoMask
2420
+ proto mask to add new protos to
2421
+ ** Globals: none
2422
+ ** Operation: This routine finds sets of sequential bad features
2423
+ ** that all have the same angle and converts each set into
2424
+ ** a new temporary proto. The temp proto is added to the
2425
+ ** proto pruner for IClass, pushed onto the list of temp
2426
+ ** protos in Class, and added to TempProtoMask.
2427
+ ** Return: Max proto id in class after all protos have been added.
2428
+ ** Exceptions: none
2429
+ ** History: Fri Mar 15 11:39:38 1991, DSJ, Created.
2430
+ */
2431
+ FEATURE_ID *ProtoStart;
2432
+ FEATURE_ID *ProtoEnd;
2433
+ FEATURE_ID *LastBad;
2434
+ TEMP_PROTO TempProto;
2435
+ PROTO Proto;
2436
+ FEATURE F1, F2;
2437
+ FLOAT32 X1, X2, Y1, Y2;
2438
+ FLOAT32 A1, A2, AngleDelta;
2439
+ FLOAT32 SegmentLength;
2440
+ PROTO_ID Pid;
2441
+
2442
+ for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
2443
+ ProtoStart < LastBad; ProtoStart = ProtoEnd) {
2444
+ F1 = Features->Features[*ProtoStart];
2445
+ X1 = F1->Params[PicoFeatX];
2446
+ Y1 = F1->Params[PicoFeatY];
2447
+ A1 = F1->Params[PicoFeatDir];
2448
+
2449
+ for (ProtoEnd = ProtoStart + 1,
2450
+ SegmentLength = GetPicoFeatureLength ();
2451
+ ProtoEnd < LastBad;
2452
+ ProtoEnd++, SegmentLength += GetPicoFeatureLength ()) {
2453
+ F2 = Features->Features[*ProtoEnd];
2454
+ X2 = F2->Params[PicoFeatX];
2455
+ Y2 = F2->Params[PicoFeatY];
2456
+ A2 = F2->Params[PicoFeatDir];
2457
+
2458
+ AngleDelta = fabs (A1 - A2);
2459
+ if (AngleDelta > 0.5)
2460
+ AngleDelta = 1.0 - AngleDelta;
2461
+
2462
+ if (AngleDelta > MaxAngleDelta ||
2463
+ fabs (X1 - X2) > SegmentLength ||
2464
+ fabs (Y1 - Y2) > SegmentLength)
2465
+ break;
2466
+ }
2467
+
2468
+ F2 = Features->Features[*(ProtoEnd - 1)];
2469
+ X2 = F2->Params[PicoFeatX];
2470
+ Y2 = F2->Params[PicoFeatY];
2471
+ A2 = F2->Params[PicoFeatDir];
2472
+
2473
+ Pid = AddIntProto (IClass);
2474
+ if (Pid == NO_PROTO)
2475
+ return (NO_PROTO);
2476
+
2477
+ TempProto = NewTempProto ();
2478
+ Proto = &(TempProto->Proto);
2479
+
2480
+ /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
2481
+ ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
2482
+ instead of the -0.25 to 0.75 used in baseline normalization */
2483
+ Proto->Length = SegmentLength;
2484
+ Proto->Angle = A1;
2485
+ Proto->X = (X1 + X2) / 2.0;
2486
+ Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
2487
+ FillABC(Proto);
2488
+
2489
+ TempProto->ProtoId = Pid;
2490
+ SET_BIT(TempProtoMask, Pid);
2491
+
2492
+ ConvertProto(Proto, Pid, IClass);
2493
+ AddProtoToProtoPruner(Proto, Pid, IClass);
2494
+
2495
+ Class->TempProtos = push (Class->TempProtos, TempProto);
2496
+ }
2497
+ return (IClass->NumProtos - 1);
2498
+ } /* MakeNewTempProtos */
2499
+
2500
+ /*---------------------------------------------------------------------------*/
2501
+ void MakePermanent(ADAPT_TEMPLATES Templates,
2502
+ CLASS_ID ClassId,
2503
+ int ConfigId,
2504
+ TBLOB *Blob,
2505
+ LINE_STATS *LineStats) {
2506
+ /*
2507
+ ** Parameters:
2508
+ ** Templates
2509
+ current set of adaptive templates
2510
+ ** ClassId
2511
+ class containing config to be made permanent
2512
+ ** ConfigId
2513
+ config to be made permanent
2514
+ ** Blob
2515
+ current blob being adapted to
2516
+ ** LineStats
2517
+ statistics about text line Blob is in
2518
+ ** Globals: none
2519
+ ** Operation:
2520
+ ** Return: none
2521
+ ** Exceptions: none
2522
+ ** History: Thu Mar 14 15:54:08 1991, DSJ, Created.
2523
+ */
2524
+ UNICHAR_ID *Ambigs;
2525
+ TEMP_CONFIG Config;
2526
+ CLASS_INDEX ClassIndex;
2527
+ ADAPT_CLASS Class;
2528
+ PROTO_KEY ProtoKey;
2529
+
2530
+ ClassIndex = Templates->Templates->IndexFor[ClassId];
2531
+ Class = Templates->Class[ClassIndex];
2532
+ Config = TempConfigFor (Class, ConfigId);
2533
+
2534
+ MakeConfigPermanent(Class, ConfigId);
2535
+ if (Class->NumPermConfigs == 0)
2536
+ Templates->NumPermClasses++;
2537
+ Class->NumPermConfigs++;
2538
+
2539
+ ProtoKey.Templates = Templates;
2540
+ ProtoKey.ClassId = ClassId;
2541
+ ProtoKey.ConfigId = ConfigId;
2542
+ Class->TempProtos = delete_d (Class->TempProtos, &ProtoKey,
2543
+ MakeTempProtoPerm);
2544
+ FreeTempConfig(Config);
2545
+
2546
+ Ambigs = GetAmbiguities (Blob, LineStats, ClassId);
2547
+ PermConfigFor (Class, ConfigId) = Ambigs;
2548
+
2549
+ if (LearningDebugLevel >= 1) {
2550
+ cprintf ("Making config %d permanent with ambiguities '",
2551
+ ConfigId, Ambigs);
2552
+ for (UNICHAR_ID *AmbigsPointer = Ambigs;
2553
+ *AmbigsPointer >= 0; ++AmbigsPointer)
2554
+ cprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
2555
+ cprintf("'.\n");
2556
+ }
2557
+
2558
+ } /* MakePermanent */
2559
+
2560
+ /*---------------------------------------------------------------------------*/
2561
+ int MakeTempProtoPerm(void *item1, //TEMP_PROTO TempProto,
2562
+ void *item2) { //PROTO_KEY *ProtoKey)
2563
+ /*
2564
+ ** Parameters:
2565
+ ** TempProto
2566
+ temporary proto to compare to key
2567
+ ** ProtoKey
2568
+ defines which protos to make permanent
2569
+ ** Globals: none
2570
+ ** Operation: This routine converts TempProto to be permanent if
2571
+ ** its proto id is used by the configuration specified in
2572
+ ** ProtoKey.
2573
+ ** Return: TRUE if TempProto is converted, FALSE otherwise
2574
+ ** Exceptions: none
2575
+ ** History: Thu Mar 14 18:49:54 1991, DSJ, Created.
2576
+ */
2577
+ CLASS_INDEX ClassIndex;
2578
+ ADAPT_CLASS Class;
2579
+ TEMP_CONFIG Config;
2580
+ TEMP_PROTO TempProto;
2581
+ PROTO_KEY *ProtoKey;
2582
+
2583
+ TempProto = (TEMP_PROTO) item1;
2584
+ ProtoKey = (PROTO_KEY *) item2;
2585
+
2586
+ ClassIndex = ProtoKey->Templates->Templates->IndexFor[ProtoKey->ClassId];
2587
+ Class = ProtoKey->Templates->Class[ClassIndex];
2588
+ Config = TempConfigFor (Class, ProtoKey->ConfigId);
2589
+
2590
+ if (TempProto->ProtoId > Config->MaxProtoId ||
2591
+ !test_bit (Config->Protos, TempProto->ProtoId))
2592
+ return (FALSE);
2593
+
2594
+ MakeProtoPermanent (Class, TempProto->ProtoId);
2595
+ AddProtoToClassPruner (&(TempProto->Proto), ProtoKey->ClassId,
2596
+ ProtoKey->Templates->Templates);
2597
+ FreeTempProto(TempProto);
2598
+
2599
+ return (TRUE);
2600
+
2601
+ } /* MakeTempProtoPerm */
2602
+
2603
+ /*---------------------------------------------------------------------------*/
2604
+ int NumBlobsIn(TWERD *Word) {
2605
+ /*
2606
+ ** Parameters:
2607
+ ** Word
2608
+ word to count blobs in
2609
+ ** Globals: none
2610
+ ** Operation: This routine returns the number of blobs in Word.
2611
+ ** Return: Number of blobs in Word.
2612
+ ** Exceptions: none
2613
+ ** History: Thu Mar 14 08:30:27 1991, DSJ, Created.
2614
+ */
2615
+ register TBLOB *Blob;
2616
+ register int NumBlobs;
2617
+
2618
+ if (Word == NULL)
2619
+ return (0);
2620
+
2621
+ for (Blob = Word->blobs, NumBlobs = 0;
2622
+ Blob != NULL; Blob = Blob->next, NumBlobs++);
2623
+
2624
+ return (NumBlobs);
2625
+
2626
+ } /* NumBlobsIn */
2627
+
2628
+ /*---------------------------------------------------------------------------*/
2629
+ int NumOutlinesInBlob(TBLOB *Blob) {
2630
+ /*
2631
+ ** Parameters:
2632
+ ** Blob
2633
+ blob to count outlines in
2634
+ ** Globals: none
2635
+ ** Operation: This routine returns the number of OUTER outlines
2636
+ ** in Blob.
2637
+ ** Return: Number of outer outlines in Blob.
2638
+ ** Exceptions: none
2639
+ ** History: Mon Jun 10 15:46:20 1991, DSJ, Created.
2640
+ */
2641
+ register TESSLINE *Outline;
2642
+ register int NumOutlines;
2643
+
2644
+ if (Blob == NULL)
2645
+ return (0);
2646
+
2647
+ for (Outline = Blob->outlines, NumOutlines = 0;
2648
+ Outline != NULL; Outline = Outline->next, NumOutlines++);
2649
+
2650
+ return (NumOutlines);
2651
+
2652
+ } /* NumOutlinesInBlob */
2653
+
2654
+ /*---------------------------------------------------------------------------*/
2655
+ void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) {
2656
+ /*
2657
+ ** Parameters:
2658
+ ** File
2659
+ open text file to write Results to
2660
+ ** Results
2661
+ match results to write to File
2662
+ ** Globals: none
2663
+ ** Operation: This routine writes the matches in Results to File.
2664
+ ** Return: none
2665
+ ** Exceptions: none
2666
+ ** History: Mon Mar 18 09:24:53 1991, DSJ, Created.
2667
+ */
2668
+ for (int i = 0; i < Results->NumMatches; ++i) {
2669
+ cprintf("%s(%d) %.2f ",
2670
+ unicharset.debug_str(Results->Classes[i]).string(),
2671
+ Results->Classes[i],
2672
+ Results->Ratings[Results->Classes[i]] * 100.0);
2673
+ }
2674
+ } /* PrintAdaptiveMatchResults */
2675
+
2676
+ /*---------------------------------------------------------------------------*/
2677
+ void RemoveBadMatches(ADAPT_RESULTS *Results) {
2678
+ /*
2679
+ ** Parameters:
2680
+ ** Results
2681
+ contains matches to be filtered
2682
+ ** Globals:
2683
+ ** BadMatchPad
2684
+ defines a "bad match"
2685
+ ** Operation: This routine steps thru each matching class in Results
2686
+ ** and removes it from the match list if its rating
2687
+ ** is worse than the BestRating plus a pad. In other words,
2688
+ ** all good matches get moved to the front of the classes
2689
+ ** array.
2690
+ ** Return: none
2691
+ ** Exceptions: none
2692
+ ** History: Tue Mar 12 13:51:03 1991, DSJ, Created.
2693
+ */
2694
+ int Next, NextGood;
2695
+ FLOAT32 *Rating = Results->Ratings;
2696
+ CLASS_ID *Match = Results->Classes;
2697
+ FLOAT32 BadMatchThreshold;
2698
+ static const char* romans = "i v x I V X";
2699
+ BadMatchThreshold = Results->BestRating + BadMatchPad;
2700
+
2701
+ if (bln_numericmode) {
2702
+ UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2703
+ unicharset.unichar_to_id("1") : -1;
2704
+ UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2705
+ unicharset.unichar_to_id("0") : -1;
2706
+ for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
2707
+ if (Rating[Match[Next]] <= BadMatchThreshold) {
2708
+ if (!unicharset.get_isalpha(Match[Next]) ||
2709
+ strstr(romans, unicharset.id_to_unichar(Match[Next])) != NULL) {
2710
+ Match[NextGood++] = Match[Next];
2711
+ } else if (unichar_id_one >= 0 && unicharset.eq(Match[Next], "l") &&
2712
+ Rating[unichar_id_one] >= BadMatchThreshold) {
2713
+ Match[NextGood++] = unichar_id_one;
2714
+ Rating[unichar_id_one] = Rating[unicharset.unichar_to_id("l")];
2715
+ } else if (unichar_id_zero >= 0 && unicharset.eq(Match[Next], "O") &&
2716
+ Rating[unichar_id_zero] >= BadMatchThreshold) {
2717
+ Match[NextGood++] = unichar_id_zero;
2718
+ Rating[unichar_id_zero] = Rating[unicharset.unichar_to_id("O")];
2719
+ }
2720
+ }
2721
+ }
2722
+ }
2723
+ else {
2724
+ for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
2725
+ if (Rating[Match[Next]] <= BadMatchThreshold)
2726
+ Match[NextGood++] = Match[Next];
2727
+ }
2728
+ }
2729
+
2730
+ Results->NumMatches = NextGood;
2731
+
2732
+ } /* RemoveBadMatches */
2733
+
2734
+ /*----------------------------------------------------------------------------------*/
2735
+ void RemoveExtraPuncs(ADAPT_RESULTS *Results) {
2736
+ /*
2737
+ ** Parameters:
2738
+ ** Results
2739
+ contains matches to be filtered
2740
+ ** Globals:
2741
+ ** BadMatchPad
2742
+ defines a "bad match"
2743
+ ** Operation: This routine steps thru each matching class in Results
2744
+ ** and removes it from the match list if its rating
2745
+ ** is worse than the BestRating plus a pad. In other words,
2746
+ ** all good matches get moved to the front of the classes
2747
+ ** array.
2748
+ ** Return: none
2749
+ ** Exceptions: none
2750
+ ** History: Tue Mar 12 13:51:03 1991, DSJ, Created.
2751
+ */
2752
+ int Next, NextGood;
2753
+ int punc_count; /*no of garbage characters */
2754
+ int digit_count;
2755
+ CLASS_ID *Match = Results->Classes;
2756
+ /*garbage characters */
2757
+ static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2758
+ static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2759
+
2760
+ punc_count = 0;
2761
+ digit_count = 0;
2762
+ for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
2763
+ if (strstr (punc_chars,
2764
+ unicharset.id_to_unichar(Match[Next])) == NULL) {
2765
+ if (strstr (digit_chars,
2766
+ unicharset.id_to_unichar(Match[Next])) == NULL) {
2767
+ Match[NextGood++] = Match[Next];
2768
+ }
2769
+ else {
2770
+ if (digit_count < 1)
2771
+ Match[NextGood++] = Match[Next];
2772
+ digit_count++;
2773
+ }
2774
+ }
2775
+ else {
2776
+ if (punc_count < 2)
2777
+ Match[NextGood++] = Match[Next];
2778
+ punc_count++; /*count them */
2779
+ }
2780
+ }
2781
+ Results->NumMatches = NextGood;
2782
+ } /* RemoveExtraPuncs */
2783
+
2784
+ /*---------------------------------------------------------------------------*/
2785
+ void SetAdaptiveThreshold(FLOAT32 Threshold) {
2786
+ /*
2787
+ ** Parameters:
2788
+ ** Threshold
2789
+ threshold for creating new templates
2790
+ ** Globals:
2791
+ ** GoodAdaptiveMatch
2792
+ default good match rating
2793
+ ** Operation: This routine resets the internal thresholds inside
2794
+ ** the integer matcher to correspond to the specified
2795
+ ** threshold.
2796
+ ** Return: none
2797
+ ** Exceptions: none
2798
+ ** History: Tue Apr 9 08:33:13 1991, DSJ, Created.
2799
+ */
2800
+ if (Threshold == GoodAdaptiveMatch) {
2801
+ /* the blob was probably classified correctly - use the default rating
2802
+ threshold */
2803
+ SetProtoThresh (0.9);
2804
+ SetFeatureThresh (0.9);
2805
+ }
2806
+ else {
2807
+ /* the blob was probably incorrectly classified */
2808
+ SetProtoThresh (1.0 - Threshold);
2809
+ SetFeatureThresh (1.0 - Threshold);
2810
+ }
2811
+ } /* SetAdaptiveThreshold */
2812
+
2813
+ /*---------------------------------------------------------------------------*/
2814
+ void ShowBestMatchFor(TBLOB *Blob,
2815
+ LINE_STATS *LineStats,
2816
+ CLASS_ID ClassId,
2817
+ BOOL8 AdaptiveOn,
2818
+ BOOL8 PreTrainedOn) {
2819
+ /*
2820
+ ** Parameters:
2821
+ ** Blob
2822
+ blob to show best matching config for
2823
+ ** LineStats
2824
+ statistics for text line Blob is in
2825
+ ** ClassId
2826
+ class whose configs are to be searched
2827
+ ** AdaptiveOn
2828
+ TRUE if adaptive configs are enabled
2829
+ ** PreTrainedOn
2830
+ TRUE if pretrained configs are enabled
2831
+ ** Globals:
2832
+ ** PreTrainedTemplates
2833
+ built-in training
2834
+ ** AdaptedTemplates
2835
+ adaptive templates
2836
+ ** AllProtosOn
2837
+ dummy proto mask
2838
+ ** AllConfigsOn
2839
+ dummy config mask
2840
+ ** Operation: This routine compares Blob to both sets of templates
2841
+ ** (adaptive and pre-trained) and then displays debug
2842
+ ** information for the config which matched best.
2843
+ ** Return: none
2844
+ ** Exceptions: none
2845
+ ** History: Fri Mar 22 08:43:52 1991, DSJ, Created.
2846
+ */
2847
+ int NumCNFeatures = 0, NumBLFeatures = 0;
2848
+ INT_FEATURE_ARRAY CNFeatures, BLFeatures;
2849
+ INT_RESULT_STRUCT CNResult, BLResult;
2850
+ CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
2851
+ CLASS_INDEX ClassIndex;
2852
+ inT32 BlobLength;
2853
+ uinT32 ConfigMask;
2854
+ static int next_config = -1;
2855
+
2856
+ if (PreTrainedOn) next_config = -1;
2857
+
2858
+ CNResult.Rating = BLResult.Rating = 2.0;
2859
+
2860
+ if (!LegalClassId (ClassId)) {
2861
+ cprintf ("%d is not a legal class id!!\n", ClassId);
2862
+ return;
2863
+ }
2864
+
2865
+ if (PreTrainedOn) {
2866
+ if (UnusedClassIdIn (PreTrainedTemplates, ClassId))
2867
+ cprintf ("No built-in templates for class %d = %s\n",
2868
+ ClassId, unicharset.id_to_unichar(ClassId));
2869
+ else {
2870
+ NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
2871
+ PreTrainedTemplates,
2872
+ CNFeatures, CNAdjust,
2873
+ &BlobLength);
2874
+ if (NumCNFeatures <= 0)
2875
+ cprintf ("Illegal blob (char norm features)!\n");
2876
+ else {
2877
+ ClassIndex = PreTrainedTemplates->IndexFor[ClassId];
2878
+
2879
+ SetCharNormMatch();
2880
+ IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
2881
+ AllProtosOn, AllConfigsOn,
2882
+ BlobLength, NumCNFeatures, CNFeatures,
2883
+ CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
2884
+
2885
+ cprintf ("Best built-in template match is config %2d (%4.1f) (cn=%d)\n",
2886
+ CNResult.Config, CNResult.Rating * 100.0, CNAdjust[ClassIndex]);
2887
+ }
2888
+ }
2889
+ }
2890
+
2891
+ if (AdaptiveOn) {
2892
+ if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId))
2893
+ cprintf ("No AD templates for class %d = %s\n",
2894
+ ClassId, unicharset.id_to_unichar(ClassId));
2895
+ else {
2896
+ NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
2897
+ AdaptedTemplates->Templates,
2898
+ BLFeatures, BLAdjust,
2899
+ &BlobLength);
2900
+ if (NumBLFeatures <= 0)
2901
+ cprintf ("Illegal blob (baseline features)!\n");
2902
+ else {
2903
+ ClassIndex =AdaptedTemplates->Templates->IndexFor[ClassId];
2904
+
2905
+ SetBaseLineMatch();
2906
+ IntegerMatcher (ClassForClassId
2907
+ (AdaptedTemplates->Templates, ClassId),
2908
+ AllProtosOn, AllConfigsOn,
2909
+ // AdaptedTemplates->Class[ClassIndex]->PermProtos,
2910
+ // AdaptedTemplates->Class[ClassIndex]->PermConfigs,
2911
+ BlobLength, NumBLFeatures, BLFeatures,
2912
+ BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
2913
+
2914
+ #ifndef SECURE_NAMES
2915
+ int ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
2916
+ ADAPT_CLASS Class = AdaptedTemplates->Class[ClassIndex];
2917
+ cprintf ("Best adaptive template match is config %2d (%4.1f) %s\n",
2918
+ BLResult.Config, BLResult.Rating * 100.0,
2919
+ ConfigIsPermanent(Class, BLResult.Config) ? "Perm" : "Temp");
2920
+ #endif
2921
+ }
2922
+ }
2923
+ }
2924
+
2925
+ cprintf ("\n");
2926
+ if (BLResult.Rating < CNResult.Rating) {
2927
+ ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
2928
+ if (next_config < 0) {
2929
+ ConfigMask = 1 << BLResult.Config;
2930
+ next_config = 0;
2931
+ } else {
2932
+ ConfigMask = 1 << next_config;
2933
+ ++next_config;
2934
+ }
2935
+ NormMethod = baseline;
2936
+
2937
+ SetBaseLineMatch();
2938
+ IntegerMatcher (ClassForClassId (AdaptedTemplates->Templates, ClassId),
2939
+ AllProtosOn,
2940
+ // AdaptedTemplates->Class[ClassIndex]->PermProtos,
2941
+ (BIT_VECTOR) & ConfigMask,
2942
+ BlobLength, NumBLFeatures, BLFeatures,
2943
+ BLAdjust[ClassIndex], &BLResult, MatchDebugFlags);
2944
+ cprintf ("Adaptive template match for config %2d is %4.1f\n",
2945
+ BLResult.Config, BLResult.Rating * 100.0);
2946
+ }
2947
+ else {
2948
+ ClassIndex = PreTrainedTemplates->IndexFor[ClassId];
2949
+ ConfigMask = 1 << CNResult.Config;
2950
+ NormMethod = character;
2951
+
2952
+ SetCharNormMatch();
2953
+ //xiaofan
2954
+ IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), AllProtosOn, (BIT_VECTOR) & ConfigMask,
2955
+ BlobLength, NumCNFeatures, CNFeatures,
2956
+ CNAdjust[ClassIndex], &CNResult, MatchDebugFlags);
2957
+ }
2958
+ } /* ShowBestMatchFor */