tesseract_bin 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,1458 @@
1
+ /******************************************************************************
2
+ ** Filename: stopper.c
3
+ ** Purpose: Stopping criteria for word classifier.
4
+ ** Author: Dan Johnson
5
+ ** History: Mon Apr 29 14:56:49 1991, DSJ, Created.
6
+ **
7
+ ** (c) Copyright Hewlett-Packard Company, 1988.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ ******************************************************************************/
18
+ /**----------------------------------------------------------------------------
19
+ Include Files and Type Defines
20
+ ----------------------------------------------------------------------------**/
21
+ #include "stopper.h"
22
+ #include "emalloc.h"
23
+ #include "matchdefs.h"
24
+ #include "debug.h"
25
+ #include "callcpp.h"
26
+ #include "permute.h"
27
+ #include "context.h"
28
+ #include "permnum.h"
29
+ #include "danerror.h"
30
+ #include "const.h"
31
+ #include "freelist.h"
32
+ #include "efio.h"
33
+ #include "globals.h"
34
+ #include "scanutils.h"
35
+ #include "unichar.h"
36
+
37
+ #include <stdio.h>
38
+ #include <string.h>
39
+ #include <ctype.h>
40
+ #include <math.h>
41
+ #ifdef __UNIX__
42
+ #include <assert.h>
43
+ #endif
44
+
45
+ /* these are kludges - add appropriate .h file later */
46
+ extern float CertaintyScale; /* from subfeat.h */
47
+
48
+ #define MAX_WERD_SIZE 100
49
+ #define MAX_AMBIG_SIZE 3
50
+ #define DANGEROUS_AMBIGS "DangAmbigs"
51
+
52
+ typedef LIST AMBIG_TABLE;
53
+
54
+ typedef struct
55
+ {
56
+ UNICHAR_ID Class;
57
+ uinT16 NumChunks;
58
+ float Certainty;
59
+ }
60
+
61
+
62
+ CHAR_CHOICE;
63
+
64
+ typedef struct
65
+ {
66
+ float Rating;
67
+ float Certainty;
68
+ FLOAT32 AdjustFactor;
69
+ int Length;
70
+ CHAR_CHOICE Blob[1];
71
+ } VIABLE_CHOICE_STRUCT;
72
+ typedef VIABLE_CHOICE_STRUCT *VIABLE_CHOICE;
73
+
74
+ typedef struct
75
+ {
76
+ VIABLE_CHOICE Choice;
77
+ float ChunkCertainty[MAX_NUM_CHUNKS];
78
+ UNICHAR_ID ChunkClass[MAX_NUM_CHUNKS];
79
+ }
80
+
81
+
82
+ EXPANDED_CHOICE;
83
+
84
+ typedef struct
85
+ {
86
+ char ambig[2 * (UNICHAR_LEN * MAX_AMBIG_SIZE) + 2];
87
+ char lengths[2 * (MAX_AMBIG_SIZE) + 2];
88
+ } AMBIG_SPEC;
89
+
90
+ /**----------------------------------------------------------------------------
91
+ Macros
92
+ ----------------------------------------------------------------------------**/
93
+ #define BestCertainty(Choices) (((VIABLE_CHOICE) first_node (Choices))->Certainty)
94
+ #define BestRating(Choices) (((VIABLE_CHOICE) first_node (Choices))->Rating)
95
+ #define BestFactor(Choices) (((VIABLE_CHOICE) first_node (Choices))->AdjustFactor)
96
+
97
+ #define AmbigThreshold(F1,F2) (((F2) - (F1)) * AmbigThresholdGain - \
98
+ AmbigThresholdOffset)
99
+
100
+ /*---------------------------------------------------------------------------
101
+ Private Function Prototoypes
102
+ ----------------------------------------------------------------------------*/
103
+ void AddNewChunk(VIABLE_CHOICE Choice, int Blob);
104
+
105
+ int AmbigsFound(char *Word,
106
+ char *CurrentChar,
107
+ const char *Tail,
108
+ const char *Tail_lengths,
109
+ LIST Ambigs,
110
+ DANGERR *fixpt);
111
+
112
+ int ChoiceSameAs(A_CHOICE *Choice, VIABLE_CHOICE ViableChoice);
113
+
114
+ int CmpChoiceRatings(void *arg1, //VIABLE_CHOICE Choice1,
115
+ void *arg2); //VIABLE_CHOICE Choice2);
116
+
117
+ void ExpandChoice(VIABLE_CHOICE Choice, EXPANDED_CHOICE *ExpandedChoice);
118
+
119
+ AMBIG_TABLE *FillAmbigTable();
120
+
121
+ int FreeBadChoice(void *item1, //VIABLE_CHOICE Choice,
122
+ void *item2); //EXPANDED_CHOICE *BestChoice);
123
+
124
+ int LengthOfShortestAlphaRun(register char *Word, const char *Word_lengths);
125
+
126
+ VIABLE_CHOICE NewViableChoice (A_CHOICE * Choice,
127
+ FLOAT32 AdjustFactor, float Certainties[]);
128
+
129
+ void PrintViableChoice(FILE *File, const char *Label, VIABLE_CHOICE Choice);
130
+
131
+ void ReplaceDuplicateChoice (VIABLE_CHOICE OldChoice,
132
+ A_CHOICE * NewChoice,
133
+ FLOAT32 AdjustFactor, float Certainties[]);
134
+
135
+ int StringSameAs(const char *String,
136
+ const char *String_lengths,
137
+ VIABLE_CHOICE ViableChoice);
138
+
139
+ int UniformCertainties(CHOICES_LIST Choices, A_CHOICE *BestChoice);
140
+
141
+ /**----------------------------------------------------------------------------
142
+ Global Data Definitions and Declarations
143
+ ----------------------------------------------------------------------------**/
144
+ /* Name of file containing potentially dangerous ambiguities */
145
+ static const char *DangerousAmbigs = DANGEROUS_AMBIGS;
146
+
147
+ /* Word for which stopper debug information should be printed to stdout */
148
+ static char *WordToDebug = NULL;
149
+ static char *WordToDebug_lengths = NULL;
150
+
151
+ /* flag used to disable accumulation of word choices during compound word
152
+ permutation */
153
+ BOOL8 KeepWordChoices = TRUE;
154
+
155
+ /* additional certainty padding allowed before a word is rejected */
156
+ static FLOAT32 RejectOffset = 0.0;
157
+
158
+ /* structures to keep track of viable word choices */
159
+ static VIABLE_CHOICE BestRawChoice = NULL;
160
+ static LIST BestChoices = NIL;
161
+ static PIECES_STATE CurrentSegmentation;
162
+
163
+ make_float_var (NonDictCertainty, -2.50, MakeNonDictCertainty,
164
+ 17, 2, SetNonDictCertainty,
165
+ "Certainty threshold for non-dict words");
166
+
167
+ make_float_var (RejectCertaintyOffset, 1.0, MakeRejectCertaintyOffset,
168
+ 17, 3, SetRejectCertaintyOffset, "Reject certainty offset");
169
+
170
+ make_int_var (SmallWordSize, 2, MakeSmallWordSize,
171
+ 17, 4, SetSmallWordSize,
172
+ "Size of dict word to be treated as non-dict word");
173
+
174
+ make_float_var (CertaintyPerChar, -0.50, MakeCertaintyPerChar,
175
+ 17, 5, SetCertaintyPerChar,
176
+ "Certainty to add for each dict char above SmallWordSize");
177
+
178
+ make_float_var (CertaintyVariation, 3.0, MakeCertaintyVariation,
179
+ 17, 6, SetCertaintyVariation,
180
+ "Max certaintly variation allowed in a word (in sigma)");
181
+
182
+ make_int_var (StopperDebugLevel, 0, MakeStopperDebugLevel,
183
+ 17, 7, SetStopperDebugLevel, "Stopper debug level");
184
+
185
+ make_float_var (AmbigThresholdGain, 8.0, MakeAmbigThresholdGain,
186
+ 17, 8, SetAmbigThresholdGain,
187
+ "Gain factor for ambiguity threshold");
188
+
189
+ make_float_var (AmbigThresholdOffset, 1.5, MakeAmbigThresholdOffset,
190
+ 17, 9, SetAmbigThresholdOffset,
191
+ "Certainty offset for ambiguity threshold");
192
+
193
+ extern int first_pass;
194
+ INT_VAR (tessedit_truncate_wordchoice_log, 10, "Max words to keep in list");
195
+
196
+ /**----------------------------------------------------------------------------
197
+ Public Code
198
+ ----------------------------------------------------------------------------**/
199
+ /*---------------------------------------------------------------------------*/
200
+ int AcceptableChoice(CHOICES_LIST Choices,
201
+ A_CHOICE *BestChoice,
202
+ A_CHOICE *RawChoice,
203
+ DANGERR *fixpt) {
204
+ /*
205
+ ** Parameters:
206
+ ** Choices choices for current segmentation
207
+ ** BestChoice best choice for current segmentation
208
+ ** RawChoice best raw choice for current segmentation
209
+ ** Globals:
210
+ ** NonDictCertainty certainty for a non-dict word
211
+ ** SmallWordSize size of word to be treated as non-word
212
+ ** CertaintyPerChar certainty to add for each dict char
213
+ ** Operation: Return TRUE if the results from this segmentation are
214
+ ** good enough to stop. Otherwise return FALSE.
215
+ ** Return: TRUE or FALSE.
216
+ ** Exceptions: none
217
+ ** History: Mon Apr 29 14:57:32 1991, DSJ, Created.
218
+ */
219
+ float CertaintyThreshold = NonDictCertainty;
220
+ int WordSize;
221
+
222
+ if (fixpt != NULL)
223
+ fixpt->index = -1;
224
+ if ((BestChoice == NULL) || (class_string (BestChoice) == NULL))
225
+ return (FALSE);
226
+
227
+ if (StopperDebugLevel >= 1)
228
+ cprintf ("\nStopper: %s (word=%c, case=%c, punct=%c)\n",
229
+ class_string (BestChoice),
230
+ (valid_word (class_string (BestChoice)) ? 'y' : 'n'),
231
+ (case_ok (class_string (BestChoice),
232
+ class_lengths (BestChoice)) ? 'y' : 'n'),
233
+ ((punctuation_ok (class_string (BestChoice),
234
+ class_lengths (BestChoice)) !=
235
+ -1) ? 'y' : 'n'));
236
+
237
+ if (valid_word (class_string (BestChoice)) &&
238
+ case_ok (class_string (BestChoice), class_lengths (BestChoice)) &&
239
+ punctuation_ok (class_string (BestChoice),
240
+ class_lengths (BestChoice)) != -1) {
241
+ WordSize = LengthOfShortestAlphaRun (class_string (BestChoice),
242
+ class_lengths (BestChoice));
243
+ WordSize -= SmallWordSize;
244
+ if (WordSize < 0)
245
+ WordSize = 0;
246
+ CertaintyThreshold += WordSize * CertaintyPerChar;
247
+ }
248
+ else if (stopper_numbers_on && valid_number (class_string (BestChoice),
249
+ class_lengths (BestChoice))) {
250
+ CertaintyThreshold += stopper_numbers_on * CertaintyPerChar;
251
+ }
252
+
253
+ if (StopperDebugLevel >= 1)
254
+ cprintf ("Stopper: Certainty = %4.1f, Threshold = %4.1f\n",
255
+ class_certainty (BestChoice), CertaintyThreshold);
256
+
257
+ if (NoDangerousAmbig (class_string (BestChoice),
258
+ class_lengths (BestChoice), fixpt)
259
+ && class_certainty (BestChoice) > CertaintyThreshold &&
260
+ UniformCertainties (Choices, BestChoice))
261
+ return (TRUE);
262
+ else
263
+ return (FALSE);
264
+
265
+ } /* AcceptableChoice */
266
+
267
+
268
+ /*---------------------------------------------------------------------------*/
269
+ int AcceptableResult(A_CHOICE *BestChoice, A_CHOICE *RawChoice) {
270
+ /*
271
+ ** Parameters:
272
+ ** BestChoice best choice for current word
273
+ ** RawChoice best raw choice for current word
274
+ ** Globals:
275
+ ** NonDictCertainty certainty for a non-dict word
276
+ ** SmallWordSize size of word to be treated as non-word
277
+ ** CertaintyPerChar certainty to add for each dict char
278
+ ** BestChoices list of all good choices found
279
+ ** RejectOffset allowed offset before a word is rejected
280
+ ** Operation: Return FALSE if the best choice for the current word
281
+ ** is questionable and should be tried again on the second
282
+ ** pass or should be flagged to the user.
283
+ ** Return: TRUE or FALSE.
284
+ ** Exceptions: none
285
+ ** History: Thu May 9 14:05:05 1991, DSJ, Created.
286
+ */
287
+ float CertaintyThreshold = NonDictCertainty - RejectOffset;
288
+ int WordSize;
289
+
290
+ if (StopperDebugLevel >= 1)
291
+ cprintf ("\nRejecter: %s (word=%c, case=%c, punct=%c, unambig=%c)\n",
292
+ class_string (BestChoice),
293
+ (valid_word (class_string (BestChoice)) ? 'y' : 'n'),
294
+ (case_ok (class_string (BestChoice),
295
+ class_lengths (BestChoice)) ? 'y' : 'n'),
296
+ ((punctuation_ok (class_string (BestChoice),
297
+ class_lengths (BestChoice)) != -1) ? 'y' : 'n'),
298
+ ((rest (BestChoices) != NIL) ? 'n' : 'y'));
299
+
300
+ if ((BestChoice == NULL) ||
301
+ (class_string (BestChoice) == NULL) || CurrentWordAmbig ())
302
+ return (FALSE);
303
+
304
+ if (valid_word (class_string (BestChoice)) &&
305
+ case_ok (class_string (BestChoice), class_lengths (BestChoice)) &&
306
+ punctuation_ok (class_string (BestChoice),
307
+ class_lengths (BestChoice)) != -1) {
308
+ WordSize = LengthOfShortestAlphaRun (class_string (BestChoice),
309
+ class_lengths (BestChoice));
310
+ WordSize -= SmallWordSize;
311
+ if (WordSize < 0)
312
+ WordSize = 0;
313
+ CertaintyThreshold += WordSize * CertaintyPerChar;
314
+ }
315
+
316
+ if (StopperDebugLevel >= 1)
317
+ cprintf ("Rejecter: Certainty = %4.1f, Threshold = %4.1f ",
318
+ class_certainty (BestChoice), CertaintyThreshold);
319
+
320
+ if (class_certainty (BestChoice) > CertaintyThreshold) {
321
+ if (StopperDebugLevel >= 1)
322
+ cprintf ("ACCEPTED\n");
323
+ return (TRUE);
324
+ }
325
+ else {
326
+ if (StopperDebugLevel >= 1)
327
+ cprintf ("REJECTED\n");
328
+ return (FALSE);
329
+ }
330
+ } /* AcceptableResult */
331
+
332
+
333
+ /*---------------------------------------------------------------------------*/
334
+ int AlternativeChoicesWorseThan(FLOAT32 Threshold) {
335
+ /*
336
+ ** Parameters:
337
+ ** Threshold minimum adjust factor for alternative choices
338
+ ** Globals:
339
+ ** BestChoices alternative choices for current word
340
+ ** Operation: This routine returns TRUE if there are no alternative
341
+ ** choices for the current word OR if all alternatives have
342
+ ** an adjust factor worse than Threshold.
343
+ ** Return: TRUE or FALSE.
344
+ ** Exceptions: none
345
+ ** History: Mon Jun 3 09:36:31 1991, DSJ, Created.
346
+ */
347
+ LIST Alternatives;
348
+ VIABLE_CHOICE Choice;
349
+
350
+ Alternatives = rest (BestChoices);
351
+ iterate(Alternatives) {
352
+ Choice = (VIABLE_CHOICE) first_node (Alternatives);
353
+ if (Choice->AdjustFactor <= Threshold)
354
+ return (FALSE);
355
+ }
356
+
357
+ return (TRUE);
358
+
359
+ } /* AlternativeChoicesWorseThan */
360
+
361
+
362
+ /*---------------------------------------------------------------------------*/
363
+ int CurrentBestChoiceIs(const char *Word, const char *Word_lengths) {
364
+ /*
365
+ ** Parameters:
366
+ ** Word string to compare to current best choice
367
+ ** Word_lengths lengths of unichars in Word
368
+ ** Globals:
369
+ ** BestChoices set of best choices for current word
370
+ ** Operation: Returns TRUE if Word is the same as the current best
371
+ ** choice, FALSE otherwise.
372
+ ** Return: TRUE or FALSE
373
+ ** Exceptions: none
374
+ ** History: Thu May 30 14:44:22 1991, DSJ, Created.
375
+ */
376
+ return (BestChoices != NIL &&
377
+ StringSameAs (Word, Word_lengths,
378
+ (VIABLE_CHOICE) first_node (BestChoices)));
379
+
380
+ } /* CurrentBestChoiceIs */
381
+
382
+
383
+ /*---------------------------------------------------------------------------*/
384
+ FLOAT32 CurrentBestChoiceAdjustFactor() {
385
+ /*
386
+ ** Parameters: none
387
+ ** Globals:
388
+ ** BestChoices set of best choices for current word
389
+ ** Operation: Return the adjustment factor for the best choice for
390
+ ** the current word.
391
+ ** Return: Adjust factor for current best choice.
392
+ ** Exceptions: none
393
+ ** History: Thu May 30 14:48:24 1991, DSJ, Created.
394
+ */
395
+ VIABLE_CHOICE BestChoice;
396
+
397
+ if (BestChoices == NIL)
398
+ return (MAX_FLOAT32);
399
+
400
+ BestChoice = (VIABLE_CHOICE) first_node (BestChoices);
401
+ return (BestChoice->AdjustFactor);
402
+
403
+ } /* CurrentBestChoiceAdjustFactor */
404
+
405
+
406
+ /*---------------------------------------------------------------------------*/
407
+ int CurrentWordAmbig() {
408
+ /*
409
+ ** Parameters: none
410
+ ** Globals:
411
+ ** BestChoices set of best choices for current word
412
+ ** Operation: This routine returns TRUE if there are multiple good
413
+ ** choices for the current word and FALSE otherwise.
414
+ ** Return: TRUE or FALSE
415
+ ** Exceptions: none
416
+ ** History: Wed May 22 15:38:38 1991, DSJ, Created.
417
+ */
418
+ return (rest (BestChoices) != NIL);
419
+
420
+ } /* CurrentWordAmbig */
421
+
422
+
423
+ /*---------------------------------------------------------------------------*/
424
+ void DebugWordChoices() {
425
+ /*
426
+ ** Parameters: none
427
+ ** Globals:
428
+ ** BestRawChoice
429
+ ** BestChoices
430
+ ** Operation: Print the current choices for this word to stdout.
431
+ ** Return: none
432
+ ** Exceptions: none
433
+ ** History: Wed May 15 13:52:08 1991, DSJ, Created.
434
+ */
435
+ LIST Choices;
436
+ int i;
437
+ char LabelString[80];
438
+
439
+ if (StopperDebugLevel >= 1 ||
440
+ (WordToDebug && BestChoices &&
441
+ StringSameAs (WordToDebug, WordToDebug_lengths,
442
+ (VIABLE_CHOICE) first_node (BestChoices)))) {
443
+ if (BestRawChoice)
444
+ PrintViableChoice (stderr, "\nBest Raw Choice: ", BestRawChoice);
445
+
446
+ i = 1;
447
+ Choices = BestChoices;
448
+ if (Choices)
449
+ cprintf ("\nBest Cooked Choices:\n");
450
+ iterate(Choices) {
451
+ sprintf (LabelString, "Cooked Choice #%d: ", i);
452
+ PrintViableChoice (stderr, LabelString,
453
+ (VIABLE_CHOICE) first_node (Choices));
454
+ i++;
455
+ }
456
+ }
457
+ } /* DebugWordChoices */
458
+
459
+
460
+ /*---------------------------------------------------------------------------*/
461
+ void FilterWordChoices() {
462
+ /*
463
+ ** Parameters: none
464
+ ** Globals:
465
+ ** BestChoices set of choices for current word
466
+ ** Operation: This routine removes from BestChoices all choices which
467
+ ** are not within a reasonable range of the best choice.
468
+ ** Return: none
469
+ ** Exceptions: none
470
+ ** History: Wed May 15 13:08:24 1991, DSJ, Created.
471
+ */
472
+ EXPANDED_CHOICE BestChoice;
473
+
474
+ if (BestChoices == NIL || second_node (BestChoices) == NIL)
475
+ return;
476
+
477
+ /* compute certainties and class for each chunk in best choice */
478
+ ExpandChoice ((VIABLE_CHOICE_STRUCT *) first_node (BestChoices), &BestChoice);
479
+
480
+ set_rest (BestChoices, delete_d (rest (BestChoices),
481
+ &BestChoice, FreeBadChoice));
482
+
483
+ } /* FilterWordChoices */
484
+
485
+
486
+ /*---------------------------------------------------------------------------*/
487
+ void
488
+ FindClassifierErrors (FLOAT32 MinRating,
489
+ FLOAT32 MaxRating,
490
+ FLOAT32 RatingMargin, FLOAT32 Thresholds[]) {
491
+ /*
492
+ ** Parameters:
493
+ ** MinRating limits how tight to make a template
494
+ ** MaxRating limits how loose to make a template
495
+ ** RatingMargin amount of margin to put in template
496
+ ** Thresholds[] place to put error thresholds
497
+ ** Globals: none
498
+ ** Operation: This routine compares the best choice for the current
499
+ ** word to the best raw choice to determine which characters
500
+ ** were classified incorrectly by the classifier. It then
501
+ ** places a separate threshold into Thresholds for each
502
+ ** character in the word. If the classifier was correct,
503
+ ** MaxRating is placed into Thresholds. If the
504
+ ** classifier was incorrect, the avg. match rating (error
505
+ ** percentage) of the classifier's incorrect choice minus
506
+ ** some margin is
507
+ ** placed into thresholds. This can then be used by the
508
+ ** caller to try to create a new template for the desired
509
+ ** class that will classify the character with a rating better
510
+ ** than the threshold value. The match rating placed into
511
+ ** Thresholds is never allowed to be below MinRating in order
512
+ ** to prevent trying to make overly tight templates.
513
+ ** Return: none (results are placed in Thresholds)
514
+ ** Exceptions: none
515
+ ** History: Fri May 31 16:02:57 1991, DSJ, Created.
516
+ */
517
+ EXPANDED_CHOICE BestRaw;
518
+ VIABLE_CHOICE Choice;
519
+ int i, j, Chunk;
520
+ FLOAT32 AvgRating;
521
+ int NumErrorChunks;
522
+
523
+ assert (BestChoices != NIL);
524
+ assert (BestRawChoice != NULL);
525
+
526
+ ExpandChoice(BestRawChoice, &BestRaw);
527
+ Choice = (VIABLE_CHOICE) first_node (BestChoices);
528
+
529
+ for (i = 0, Chunk = 0; i < Choice->Length; i++, Thresholds++) {
530
+ AvgRating = 0.0;
531
+ NumErrorChunks = 0;
532
+
533
+ for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++)
534
+ if (Choice->Blob[i].Class != BestRaw.ChunkClass[Chunk]) {
535
+ AvgRating += BestRaw.ChunkCertainty[Chunk];
536
+ NumErrorChunks++;
537
+ }
538
+
539
+ if (NumErrorChunks > 0) {
540
+ AvgRating /= NumErrorChunks;
541
+ *Thresholds = (AvgRating / -CertaintyScale) * (1.0 - RatingMargin);
542
+ }
543
+ else
544
+ *Thresholds = MaxRating;
545
+
546
+ if (*Thresholds > MaxRating)
547
+ *Thresholds = MaxRating;
548
+ if (*Thresholds < MinRating)
549
+ *Thresholds = MinRating;
550
+ }
551
+ } /* FindClassifierErrors */
552
+
553
+
554
+ /*---------------------------------------------------------------------------*/
555
+ void InitStopperVars() {
556
+ /*
557
+ ** Parameters: none
558
+ ** Globals: none
559
+ ** Operation: Initializes the control variables used by the stopper.
560
+ ** Return: none
561
+ ** Exceptions: none
562
+ ** History: Thu May 9 10:06:04 1991, DSJ, Created.
563
+ */
564
+ VALUE dummy;
565
+
566
+ string_variable (DangerousAmbigs, "DangerousAmbigs", DANGEROUS_AMBIGS);
567
+ string_variable (WordToDebug, "WordToDebug", "");
568
+ string_variable (WordToDebug_lengths, "WordToDebug_lengths", "");
569
+
570
+ MakeNonDictCertainty();
571
+ MakeRejectCertaintyOffset();
572
+ MakeSmallWordSize();
573
+ MakeCertaintyPerChar();
574
+ MakeCertaintyVariation();
575
+ MakeStopperDebugLevel();
576
+ MakeAmbigThresholdGain();
577
+ MakeAmbigThresholdOffset();
578
+ } /* InitStopperVars */
579
+
580
+
581
+ /*---------------------------------------------------------------------------*/
582
+ void InitChoiceAccum() {
583
+ /*
584
+ ** Parameters: none
585
+ ** Globals: none
586
+ ** Operation: This routine initializes the data structures used to
587
+ ** keep track the good word choices found for a word.
588
+ ** Return: none
589
+ ** Exceptions: none
590
+ ** History: Fri May 17 07:59:00 1991, DSJ, Created.
591
+ */
592
+ BLOB_WIDTH *BlobWidth, *End;
593
+
594
+ if (BestRawChoice)
595
+ memfree(BestRawChoice);
596
+
597
+ if (BestChoices)
598
+ destroy_nodes(BestChoices, memfree);
599
+
600
+ BestRawChoice = NULL;
601
+ BestChoices = NIL;
602
+ EnableChoiceAccum();
603
+
604
+ for (BlobWidth = CurrentSegmentation,
605
+ End = CurrentSegmentation + MAX_NUM_CHUNKS;
606
+ BlobWidth < End; *BlobWidth++ = 1);
607
+
608
+ } /* InitChoiceAccum */
609
+
610
+
611
+ /*---------------------------------------------------------------------------*/
612
+ void
613
+ LogNewRawChoice (A_CHOICE * Choice, FLOAT32 AdjustFactor, float Certainties[]) {
614
+ /*
615
+ ** Parameters:
616
+ ** Choice new raw choice for current word
617
+ ** AdjustFactor adjustment factor which was applied to choice
618
+ ** Certainties certainties for each char in new choice
619
+ ** Globals:
620
+ ** BestRawChoice best raw choice so far for current word
621
+ ** Operation: This routine compares Choice to the best raw (non-dict)
622
+ ** choice so far and replaces it if the new choice is better.
623
+ ** Return: none
624
+ ** Exceptions: none
625
+ ** History: Wed May 15 09:57:19 1991, DSJ, Created.
626
+ */
627
+ if (!KeepWordChoices)
628
+ return;
629
+
630
+ if (!BestRawChoice)
631
+ BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties);
632
+ else if (class_probability (Choice) < BestRawChoice->Rating) {
633
+ if (ChoiceSameAs (Choice, BestRawChoice))
634
+ ReplaceDuplicateChoice(BestRawChoice, Choice, AdjustFactor, Certainties);
635
+ else {
636
+ memfree(BestRawChoice);
637
+ BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties);
638
+ }
639
+ }
640
+ } /* LogNewRawChoice */
641
+
642
+
643
+ /*---------------------------------------------------------------------------*/
644
+ void LogNewSegmentation(PIECES_STATE BlobWidth) {
645
+ /*
646
+ ** Parameters:
647
+ ** BlobWidth[] number of chunks in each blob in segmentation
648
+ ** Globals:
649
+ ** CurrentSegmentation blob widths for current segmentation
650
+ ** Operation: This routine updates the blob widths in CurrentSegmentation
651
+ ** to be the same as provided in BlobWidth.
652
+ ** Return: none
653
+ ** Exceptions: none
654
+ ** History: Mon May 20 11:52:26 1991, DSJ, Created.
655
+ */
656
+ BLOB_WIDTH *Segmentation;
657
+
658
+ for (Segmentation = CurrentSegmentation; *BlobWidth != 0;
659
+ BlobWidth++, Segmentation++)
660
+ *Segmentation = *BlobWidth;
661
+ *Segmentation = 0;
662
+
663
+ } /* LogNewSegmentation */
664
+
665
+
666
+ /*---------------------------------------------------------------------------*/
667
+ void LogNewSplit(int Blob) {
668
+ /*
669
+ ** Parameters:
670
+ ** Blob index of blob that was split
671
+ ** Globals:
672
+ ** BestRawChoice current best raw choice
673
+ ** BestChoices list of best choices found so far
674
+ ** Operation: This routine adds 1 chunk to the specified blob for each
675
+ ** choice in BestChoices and for the BestRawChoice.
676
+ ** Return: none
677
+ ** Exceptions: none
678
+ ** History: Mon May 20 11:38:56 1991, DSJ, Created.
679
+ */
680
+ LIST Choices;
681
+
682
+ if (BestRawChoice) {
683
+ AddNewChunk(BestRawChoice, Blob);
684
+ }
685
+
686
+ Choices = BestChoices;
687
+ iterate(Choices) {
688
+ AddNewChunk ((VIABLE_CHOICE) first_node (Choices), Blob);
689
+ }
690
+
691
+ } /* LogNewSplit */
692
+
693
+
694
+ /*---------------------------------------------------------------------------*/
695
+ void
696
+ LogNewWordChoice (A_CHOICE * Choice,
697
+ FLOAT32 AdjustFactor, float Certainties[]) {
698
+ /*
699
+ ** Parameters:
700
+ ** Choice new choice for current word
701
+ ** AdjustFactor adjustment factor which was applied to choice
702
+ ** Certainties certainties for each char in new choice
703
+ ** Globals:
704
+ ** BestChoices best choices so far for current word
705
+ ** Operation: This routine adds Choice to BestChoices if the
706
+ ** adjusted certainty for Choice is within a reasonable range
707
+ ** of the best choice in BestChoices. The BestChoices
708
+ ** list is kept in sorted order by rating. Duplicates are
709
+ ** removed.
710
+ ** Return: none
711
+ ** Exceptions: none
712
+ ** History: Wed May 15 09:57:19 1991, DSJ, Created.
713
+ */
714
+ VIABLE_CHOICE NewChoice;
715
+ LIST Choices;
716
+ FLOAT32 Threshold;
717
+
718
+ if (!KeepWordChoices)
719
+ return;
720
+
721
+ /* throw out obviously bad choices to save some work */
722
+ if (BestChoices != NIL) {
723
+ Threshold = AmbigThreshold (BestFactor (BestChoices), AdjustFactor);
724
+ if (Threshold > -AmbigThresholdOffset)
725
+ Threshold = -AmbigThresholdOffset;
726
+ if (class_certainty (Choice) - BestCertainty (BestChoices) < Threshold)
727
+ return;
728
+ }
729
+
730
+ /* see if a choice with the same text string has already been found */
731
+ NewChoice = NULL;
732
+ Choices = BestChoices;
733
+ iterate(Choices) {
734
+ if (ChoiceSameAs (Choice, (VIABLE_CHOICE) first_node (Choices))) {
735
+ if (class_probability (Choice) < BestRating (Choices))
736
+ NewChoice = (VIABLE_CHOICE) first_node (Choices);
737
+ else
738
+ return;
739
+ }
740
+ }
741
+
742
+ if (NewChoice) {
743
+ ReplaceDuplicateChoice(NewChoice, Choice, AdjustFactor, Certainties);
744
+ BestChoices = delete_d (BestChoices, NewChoice, is_same_node);
745
+ }
746
+ else {
747
+ NewChoice = NewViableChoice (Choice, AdjustFactor, Certainties);
748
+ }
749
+
750
+ BestChoices = s_adjoin (BestChoices, NewChoice, CmpChoiceRatings);
751
+ if (StopperDebugLevel >= 2)
752
+ PrintViableChoice (stderr, "New Word Choice: ", NewChoice);
753
+ if (count (BestChoices) > tessedit_truncate_wordchoice_log) {
754
+ Choices =
755
+ (LIST) nth_cell (BestChoices, tessedit_truncate_wordchoice_log);
756
+ destroy_nodes (rest (Choices), Efree);
757
+ set_rest(Choices, NIL);
758
+ }
759
+
760
+ } /* LogNewWordChoice */
761
+
762
+
763
+ /*---------------------------------------------------------------------------*/
764
+ static AMBIG_TABLE *AmbigFor = NULL;
765
+
766
+ int NoDangerousAmbig(const char *Word,
767
+ const char *Word_lengths,
768
+ DANGERR *fixpt) {
769
+ /*
770
+ ** Parameters:
771
+ ** Word word to check for dangerous ambiguities
772
+ ** Word_lengths lengths of unichars in Word
773
+ ** Globals: none
774
+ ** Operation: This word checks each letter in word against a list
775
+ ** of potentially ambiguous characters. If a match is found
776
+ ** that letter is replaced with its ambiguity and tested in
777
+ ** the dictionary. If the ambiguous word is found in the
778
+ ** dictionary, FALSE is returned. Otherwise, the search
779
+ ** continues for other ambiguities. If no ambiguities that
780
+ ** match in the dictionary are found, TRUE is returned.
781
+ ** Return: TRUE if Word contains no dangerous ambiguities.
782
+ ** Exceptions: none
783
+ ** History: Mon May 6 16:28:56 1991, DSJ, Created.
784
+ */
785
+
786
+ char NewWord[MAX_WERD_SIZE * UNICHAR_LEN + 1];
787
+ char *NextNewChar;
788
+ int bad_index = 0;
789
+
790
+ if (!AmbigFor)
791
+ AmbigFor = FillAmbigTable ();
792
+
793
+ NextNewChar = NewWord;
794
+ while (*Word)
795
+ if (AmbigsFound (NewWord, NextNewChar,
796
+ Word + *Word_lengths, Word_lengths + 1,
797
+ AmbigFor[unicharset.unichar_to_id(Word, *Word_lengths)],
798
+ fixpt)) {
799
+ if (fixpt != NULL)
800
+ fixpt->index = bad_index;
801
+ return (FALSE);
802
+ }
803
+ else {
804
+ strncpy(NextNewChar, Word, *Word_lengths);
805
+ NextNewChar += *Word_lengths;
806
+ Word += *Word_lengths;
807
+ Word_lengths++;
808
+ bad_index++;
809
+ }
810
+
811
+ return (TRUE);
812
+
813
+ } /* NoDangerousAmbig */
814
+
815
+ void EndDangerousAmbigs() {
816
+ if (AmbigFor != NULL) {
817
+ for (int i = 0; i <= MAX_CLASS_ID; ++i) {
818
+ destroy_nodes(AmbigFor[i], Efree);
819
+ }
820
+ Efree(AmbigFor);
821
+ AmbigFor = NULL;
822
+ }
823
+ }
824
+
825
+ /*---------------------------------------------------------------------------*/
826
+ void SettupStopperPass1() {
827
+ /*
828
+ ** Parameters: none
829
+ ** Globals:
830
+ ** RejectOffset offset allowed before word is rejected
831
+ ** Operation: This routine performs any settup of stopper variables
832
+ ** that is needed in preparation for the first pass.
833
+ ** Return: none
834
+ ** Exceptions: none
835
+ ** History: Mon Jun 3 12:32:00 1991, DSJ, Created.
836
+ */
837
+ RejectOffset = 0.0;
838
+ } /* SettupStopperPass1 */
839
+
840
+
841
+ /*---------------------------------------------------------------------------*/
842
+ void SettupStopperPass2() {
843
+ /*
844
+ ** Parameters: none
845
+ ** Globals:
846
+ ** RejectOffset offset allowed before word is rejected
847
+ ** Operation: This routine performs any settup of stopper variables
848
+ ** that is needed in preparation for the second pass.
849
+ ** Return: none
850
+ ** Exceptions: none
851
+ ** History: Mon Jun 3 12:32:00 1991, DSJ, Created.
852
+ */
853
+ RejectOffset = RejectCertaintyOffset;
854
+ } /* SettupStopperPass2 */
855
+
856
+
857
+ /**----------------------------------------------------------------------------
858
+ Private Code
859
+ ----------------------------------------------------------------------------**/
860
+ /*---------------------------------------------------------------------------*/
861
+ void AddNewChunk(VIABLE_CHOICE Choice, int Blob) {
862
+ /*
863
+ ** Parameters:
864
+ ** Choice choice to add a new chunk to
865
+ ** Blob index of blob being split
866
+ ** Globals: none
867
+ ** Operation: This routine increments the chunk count of the character
868
+ ** in Choice which corresponds to Blob.
869
+ ** Return: none
870
+ ** Exceptions: none
871
+ ** History: Mon May 20 11:43:27 1991, DSJ, Created.
872
+ */
873
+ int i, LastChunk;
874
+
875
+ for (i = 0, LastChunk = 0; i < Choice->Length; i++) {
876
+ LastChunk += Choice->Blob[i].NumChunks;
877
+ if (Blob < LastChunk) {
878
+ (Choice->Blob[i].NumChunks)++;
879
+ return;
880
+ }
881
+ }
882
+ mem_tidy (1);
883
+ cprintf ("AddNewChunk failed:Choice->Length=%d, LastChunk=%d, Blob=%d\n",
884
+ Choice->Length, LastChunk, Blob);
885
+ assert(FALSE); /* this should never get executed */
886
+
887
+ } /* AddNewChunk */
888
+
889
+
890
+ /*---------------------------------------------------------------------------*/
891
+ int AmbigsFound(char *Word,
892
+ char *CurrentChar,
893
+ const char *Tail,
894
+ const char *Tail_lengths,
895
+ LIST Ambigs,
896
+ DANGERR *fixpt) {
897
+ /*
898
+ ** Parameters:
899
+ ** Word word being tested for ambiguities
900
+ ** CurrentChar position in Word to put ambig replacement
901
+ ** Tail end of word to place after ambiguity
902
+ ** Tail_lengths lengths of the unichars in Tail
903
+ ** Ambigs list of ambiguities to test at this position
904
+ ** Globals: none
905
+ ** Operation: For each ambiguity in Ambigs, see if the remainder of
906
+ ** the test string matches the start of Tail. If it does,
907
+ ** construct a word consisting of the contents of Word up to,
908
+ ** but not including, CurrentChar followed by the replacement
909
+ ** string for the ambiguity followed by the unmatched
910
+ ** contents of Tail. Then test this word to see if it
911
+ ** is a dictionary word. If it is return TRUE. If none of
912
+ ** the ambiguities result in a dictionary word, return FALSE.
913
+ ** Return: TRUE if the Word is ambiguous at the specified position
914
+ ** Exceptions: none
915
+ ** History: Thu May 9 10:10:28 1991, DSJ, Created.
916
+ */
917
+ AMBIG_SPEC *AmbigSpec;
918
+ char *ambig;
919
+ char *ambig_lengths;
920
+ const char *UnmatchedTail;
921
+ const char *UnmatchedTail_lengths;
922
+ int Matches;
923
+ int bad_length;
924
+
925
+ iterate(Ambigs) {
926
+ AmbigSpec = (AMBIG_SPEC *) first_node (Ambigs);
927
+ ambig = AmbigSpec->ambig;
928
+ ambig_lengths = AmbigSpec->lengths;
929
+ bad_length = 1;
930
+ UnmatchedTail = Tail;
931
+ UnmatchedTail_lengths = Tail_lengths;
932
+ Matches = TRUE;
933
+
934
+ while (*ambig != ' ' && Matches)
935
+ if (*UnmatchedTail_lengths == *ambig_lengths &&
936
+ strncmp(ambig, UnmatchedTail, *ambig_lengths) == 0) {
937
+ ambig += *(ambig_lengths++);
938
+ UnmatchedTail += *(UnmatchedTail_lengths++);
939
+ bad_length++;
940
+ }
941
+ else
942
+ Matches = FALSE;
943
+
944
+ if (Matches) {
945
+ ambig += *(ambig_lengths++); /* skip over the space */
946
+ /* insert replacement string */
947
+ strcpy(CurrentChar, ambig);
948
+ /* add tail */
949
+ strcat(Word, UnmatchedTail);
950
+ if (valid_word (Word)) {
951
+ if (StopperDebugLevel >= 1)
952
+ cprintf ("Stopper: Possible ambiguous word = %s\n", Word);
953
+ if (fixpt != NULL) {
954
+ fixpt->good_length = strlen (ambig_lengths);
955
+ fixpt->bad_length = bad_length;
956
+ }
957
+ return (TRUE);
958
+ }
959
+ }
960
+ }
961
+ return (FALSE);
962
+
963
+ } /* AmbigsFound */
964
+
965
+
966
+ /*---------------------------------------------------------------------------*/
967
+ int ChoiceSameAs(A_CHOICE *Choice, VIABLE_CHOICE ViableChoice) {
968
+ /*
969
+ ** Parameters:
970
+ ** Choice choice to compare to ViableChoice
971
+ ** ViableChoice viable choice to compare to Choice
972
+ ** Globals: none
973
+ ** Operation: This routine compares the corresponding strings of
974
+ ** Choice and ViableChoice and returns TRUE if they are the
975
+ ** same, FALSE otherwise.
976
+ ** Return: TRUE or FALSE.
977
+ ** Exceptions: none
978
+ ** History: Fri May 17 08:48:04 1991, DSJ, Created.
979
+ */
980
+ return (StringSameAs (class_string (Choice), class_lengths (Choice),
981
+ ViableChoice));
982
+
983
+ } /* ChoiceSameAs */
984
+
985
+
986
+ /*---------------------------------------------------------------------------*/
987
+ int CmpChoiceRatings(void *arg1, //VIABLE_CHOICE Choice1,
988
+ void *arg2) { //VIABLE_CHOICE Choice2)
989
+ /*
990
+ ** Parameters:
991
+ ** Choice1, Choice2 choices to compare ratings for
992
+ ** Globals: none
993
+ ** Operation: Return -1 if the rating for Choice1 is less than the
994
+ ** rating for Choice2, otherwise return (1).
995
+ ** Return: -1 or 1
996
+ ** Exceptions: none
997
+ ** History: Wed May 15 13:02:37 1991, DSJ, Created.
998
+ */
999
+ float R1, R2;
1000
+ VIABLE_CHOICE Choice1 = (VIABLE_CHOICE) arg1;
1001
+ VIABLE_CHOICE Choice2 = (VIABLE_CHOICE) arg2;
1002
+
1003
+ R1 = Choice1->Rating;
1004
+ R2 = Choice2->Rating;
1005
+
1006
+ if (R1 < R2)
1007
+ return (-1);
1008
+ else
1009
+ return (1);
1010
+
1011
+ } /* CmpChoiceRatings */
1012
+
1013
+
1014
+ /*---------------------------------------------------------------------------*/
1015
+ void ExpandChoice(VIABLE_CHOICE Choice, EXPANDED_CHOICE *ExpandedChoice) {
1016
+ /*
1017
+ ** Parameters:
1018
+ ** Choice choice to be expanded
1019
+ ** ExpandedChoice place to put resulting expanded choice
1020
+ ** Globals: none
1021
+ ** Operation: This routine expands Choice and places the results
1022
+ ** in ExpandedChoice. The primary function of expansion
1023
+ ** is to create an two arrays, one which holds the corresponding
1024
+ ** certainty for each chunk in Choice, and one which holds
1025
+ ** the class for each chunk.
1026
+ ** Return: none (results are placed in ExpandedChoice)
1027
+ ** Exceptions: none
1028
+ ** History: Fri May 31 15:21:57 1991, DSJ, Created.
1029
+ */
1030
+ int i, j, Chunk;
1031
+
1032
+ ExpandedChoice->Choice = Choice;
1033
+ for (i = 0, Chunk = 0; i < Choice->Length; i++)
1034
+ for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) {
1035
+ ExpandedChoice->ChunkCertainty[Chunk] = Choice->Blob[i].Certainty;
1036
+ ExpandedChoice->ChunkClass[Chunk] = Choice->Blob[i].Class;
1037
+ }
1038
+ } /* ExpandChoice */
1039
+
1040
+
1041
+ /*---------------------------------------------------------------------------*/
1042
+ AMBIG_TABLE *FillAmbigTable() {
1043
+ /*
1044
+ ** Parameters: none
1045
+ ** Globals:
1046
+ ** DangerousAmbigs filename of dangerous ambig info
1047
+ ** Operation: This routine allocates a new ambiguity table and fills
1048
+ ** it in from the file specified by DangerousAmbigs. An
1049
+ ** ambiguity table is an array of lists. The array is indexed
1050
+ ** by a class id. Therefore, each entry in the table provides
1051
+ ** a list of potential ambiguities which can start with the
1052
+ ** corresponding character. Each potential ambiguity is
1053
+ ** described by a string which contains the remainder of the
1054
+ ** test string followed by a space followed by the replacement
1055
+ ** string. For example the ambiguity "rn -> m", would be
1056
+ ** located in the table at index 'r'. The string corresponding
1057
+ ** to this ambiguity would be "n m".
1058
+ ** Return: Pointer to new ambiguity table.
1059
+ ** Exceptions: none
1060
+ ** History: Thu May 9 09:20:57 1991, DSJ, Created.
1061
+ */
1062
+ FILE *AmbigFile;
1063
+ AMBIG_TABLE *NewTable;
1064
+ int i;
1065
+ int AmbigPartSize;
1066
+ char buffer[256 * UNICHAR_LEN];
1067
+ char TestString[256 * UNICHAR_LEN];
1068
+ char TestString_lengths[256];
1069
+ char ReplacementString[256 * UNICHAR_LEN];
1070
+ char ReplacementString_lengths[256];
1071
+ STRING name;
1072
+ char lengths[2];
1073
+ AMBIG_SPEC *AmbigSpec;
1074
+ UNICHAR_ID unichar_id;
1075
+
1076
+ lengths[1] = 0;
1077
+
1078
+ name = language_data_path_prefix;
1079
+ name += DangerousAmbigs;
1080
+ AmbigFile = Efopen (name.string(), "r");
1081
+ NewTable = (AMBIG_TABLE *) Emalloc (sizeof (LIST) * (MAX_CLASS_ID + 1));
1082
+
1083
+ for (i = 0; i <= MAX_CLASS_ID; i++)
1084
+ NewTable[i] = NIL;
1085
+
1086
+ while (fscanf (AmbigFile, "%d", &AmbigPartSize) == 1) {
1087
+ TestString[0] = '\0';
1088
+ TestString_lengths[0] = 0;
1089
+ ReplacementString[0] = '\0';
1090
+ ReplacementString_lengths[0] = 0;
1091
+ bool illegal_char = false;
1092
+ for (i = 0; i < AmbigPartSize; ++i) {
1093
+ fscanf (AmbigFile, "%s", buffer);
1094
+ strcat(TestString, buffer);
1095
+ lengths[0] = strlen(buffer);
1096
+ strcat(TestString_lengths, lengths);
1097
+ if (!unicharset.contains_unichar(buffer))
1098
+ illegal_char = true;
1099
+ }
1100
+ fscanf (AmbigFile, "%d", &AmbigPartSize);
1101
+ for (i = 0; i < AmbigPartSize; ++i) {
1102
+ fscanf (AmbigFile, "%s", buffer);
1103
+ strcat(ReplacementString, buffer);
1104
+ lengths[0] = strlen(buffer);
1105
+ strcat(ReplacementString_lengths, lengths);
1106
+ if (!unicharset.contains_unichar(buffer))
1107
+ illegal_char = true;
1108
+ }
1109
+
1110
+ if (strlen (TestString_lengths) > MAX_AMBIG_SIZE ||
1111
+ strlen (ReplacementString_lengths) > MAX_AMBIG_SIZE)
1112
+ DoError (0, "Illegal ambiguity specification!");
1113
+ if (illegal_char) {
1114
+ continue;
1115
+ }
1116
+
1117
+ AmbigSpec = (AMBIG_SPEC *) Emalloc (sizeof (AMBIG_SPEC));
1118
+
1119
+ strcpy(AmbigSpec->ambig, TestString + TestString_lengths[0]);
1120
+ strcat(AmbigSpec->ambig, " ");
1121
+ strcat(AmbigSpec->ambig, ReplacementString);
1122
+
1123
+ strcpy(AmbigSpec->lengths, TestString_lengths + 1);
1124
+ lengths[0] = 1;
1125
+ strcat(AmbigSpec->lengths, lengths);
1126
+ strcat(AmbigSpec->lengths, ReplacementString_lengths);
1127
+ unichar_id = unicharset.unichar_to_id(TestString, TestString_lengths[0]);
1128
+ NewTable[unichar_id] = push_last (NewTable[unichar_id], AmbigSpec);
1129
+ }
1130
+
1131
+ fclose(AmbigFile);
1132
+ return (NewTable);
1133
+
1134
+ } /* FillAmbigTable */
1135
+
1136
+
1137
+ /*---------------------------------------------------------------------------*/
1138
+ int FreeBadChoice(void *item1, //VIABLE_CHOICE Choice,
1139
+ void *item2) { //EXPANDED_CHOICE *BestChoice)
1140
+ /*
1141
+ ** Parameters:
1142
+ ** Choice choice to be tested
1143
+ ** BestChoice best choice found
1144
+ ** Globals:
1145
+ ** AmbigThresholdGain
1146
+ ** AmbigThresholdOffset
1147
+ ** Operation: If the certainty of any chunk in Choice is not ambiguous
1148
+ ** with the corresponding chunk in the best choice, free
1149
+ ** Choice and return TRUE. Otherwise, return FALSE.
1150
+ ** Return: TRUE or FALSE.
1151
+ ** Exceptions: none
1152
+ ** History: Wed May 15 13:20:26 1991, DSJ, Created.
1153
+ */
1154
+ int i, j, Chunk;
1155
+ FLOAT32 Threshold;
1156
+ VIABLE_CHOICE Choice;
1157
+ EXPANDED_CHOICE *BestChoice;
1158
+
1159
+ Choice = (VIABLE_CHOICE) item1;
1160
+ BestChoice = (EXPANDED_CHOICE *) item2;
1161
+
1162
+ Threshold = AmbigThreshold (BestChoice->Choice->AdjustFactor,
1163
+ Choice->AdjustFactor);
1164
+
1165
+ for (i = 0, Chunk = 0; i < Choice->Length; i++)
1166
+ for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++)
1167
+ if (Choice->Blob[i].Class != BestChoice->ChunkClass[Chunk] &&
1168
+ Choice->Blob[i].Certainty - BestChoice->ChunkCertainty[Chunk] <
1169
+ Threshold) {
1170
+ memfree(Choice);
1171
+ return (TRUE);
1172
+ }
1173
+
1174
+ return (FALSE);
1175
+
1176
+ } /* FreeBadChoice */
1177
+
1178
+
1179
+ /*---------------------------------------------------------------------------*/
1180
+ int LengthOfShortestAlphaRun(register char *Word, const char *Word_lengths) {
1181
+ /*
1182
+ ** Parameters:
1183
+ ** Word word to be tested
1184
+ ** Word_lengths lengths of the unichars in Word
1185
+ ** Globals: none
1186
+ ** Operation: Return the length of the shortest alpha run in Word.
1187
+ ** Return: Return the length of the shortest alpha run in Word.
1188
+ ** Exceptions: none
1189
+ ** History: Tue May 14 07:50:45 1991, DSJ, Created.
1190
+ */
1191
+ register int Shortest = MAX_INT32;
1192
+ register int Length;
1193
+
1194
+ for (; *Word; Word += *(Word_lengths++))
1195
+ if (unicharset.get_isalpha(Word, *Word_lengths)) {
1196
+ for (Length = 1, Word += *(Word_lengths++);
1197
+ *Word && unicharset.get_isalpha(Word, *Word_lengths);
1198
+ Word += *(Word_lengths++), Length++);
1199
+ if (Length < Shortest)
1200
+ Shortest = Length;
1201
+
1202
+ if (*Word == 0)
1203
+ break;
1204
+ }
1205
+ if (Shortest == MAX_INT32)
1206
+ Shortest = 0;
1207
+
1208
+ return (Shortest);
1209
+
1210
+ } /* LengthOfShortestAlphaRun */
1211
+
1212
+
1213
+ /*---------------------------------------------------------------------------*/
1214
+ VIABLE_CHOICE
1215
+ NewViableChoice (A_CHOICE * Choice, FLOAT32 AdjustFactor, float Certainties[]) {
1216
+ /*
1217
+ ** Parameters:
1218
+ ** Choice choice to be converted to a viable choice
1219
+ ** AdjustFactor factor used to adjust ratings for Choice
1220
+ ** Certainties certainty for each character in Choice
1221
+ ** Globals:
1222
+ ** CurrentSegmentation segmentation corresponding to Choice
1223
+ ** Operation: Allocate a new viable choice data structure, copy
1224
+ ** Choice, Certainties, and CurrentSegmentation into it,
1225
+ ** and return a pointer to it.
1226
+ ** Return: Ptr to new viable choice.
1227
+ ** Exceptions: none
1228
+ ** History: Thu May 16 15:28:29 1991, DSJ, Created.
1229
+ */
1230
+ VIABLE_CHOICE NewChoice;
1231
+ int Length;
1232
+ char *Word;
1233
+ char *Word_lengths;
1234
+ CHAR_CHOICE *NewChar;
1235
+ BLOB_WIDTH *BlobWidth;
1236
+
1237
+ Length = strlen (class_lengths (Choice));
1238
+ assert (Length <= MAX_NUM_CHUNKS && Length > 0);
1239
+
1240
+ NewChoice = (VIABLE_CHOICE) Emalloc (sizeof (VIABLE_CHOICE_STRUCT) +
1241
+ (Length - 1) * sizeof (CHAR_CHOICE));
1242
+
1243
+ NewChoice->Rating = class_probability (Choice);
1244
+ NewChoice->Certainty = class_certainty (Choice);
1245
+ NewChoice->AdjustFactor = AdjustFactor;
1246
+ NewChoice->Length = Length;
1247
+ for (Word = class_string (Choice),
1248
+ Word_lengths = class_lengths (Choice),
1249
+ NewChar = &(NewChoice->Blob[0]),
1250
+ BlobWidth = CurrentSegmentation;
1251
+ *Word;
1252
+ Word += *(Word_lengths++), NewChar++, Certainties++, BlobWidth++) {
1253
+ NewChar->Class = unicharset.unichar_to_id(Word, *Word_lengths);
1254
+ NewChar->NumChunks = *BlobWidth;
1255
+ NewChar->Certainty = *Certainties;
1256
+ }
1257
+
1258
+ return (NewChoice);
1259
+
1260
+ } /* NewViableChoice */
1261
+
1262
+
1263
+ /*---------------------------------------------------------------------------*/
1264
+ void PrintViableChoice(FILE *File, const char *Label, VIABLE_CHOICE Choice) {
1265
+ /*
1266
+ ** Parameters:
1267
+ ** File open text file to print Choice to
1268
+ ** Label text label to be printed with Choice
1269
+ ** Choice choice to be printed
1270
+ ** Globals: none
1271
+ ** Operation: This routine dumps a text representation of the
1272
+ ** specified Choice to File.
1273
+ ** Return: none
1274
+ ** Exceptions: none
1275
+ ** History: Mon May 20 11:16:44 1991, DSJ, Created.
1276
+ */
1277
+ int i, j;
1278
+
1279
+ fprintf (File, "%s", Label);
1280
+
1281
+ fprintf (File, "(R=%5.1f, C=%4.1f, F=%4.2f) ",
1282
+ Choice->Rating, Choice->Certainty, Choice->AdjustFactor);
1283
+
1284
+ for (i = 0; i < Choice->Length; i++)
1285
+ fprintf (File, "%s", unicharset.id_to_unichar(Choice->Blob[i].Class));
1286
+ fprintf (File, "\n");
1287
+
1288
+ for (i = 0; i < Choice->Length; i++) {
1289
+ fprintf (File, " %s", unicharset.id_to_unichar(Choice->Blob[i].Class));
1290
+ for (j = 0; j < Choice->Blob[i].NumChunks - 1; j++)
1291
+ fprintf (File, " ");
1292
+ }
1293
+ fprintf (File, "\n");
1294
+
1295
+ for (i = 0; i < Choice->Length; i++) {
1296
+ for (j = 0; j < Choice->Blob[i].NumChunks; j++)
1297
+ fprintf (File, "%3d", (int) (Choice->Blob[i].Certainty * -10.0));
1298
+ }
1299
+ fprintf (File, "\n");
1300
+
1301
+ } /* PrintViableChoice */
1302
+
1303
+
1304
+ /*---------------------------------------------------------------------------*/
1305
+ void
1306
+ ReplaceDuplicateChoice (VIABLE_CHOICE OldChoice,
1307
+ A_CHOICE * NewChoice,
1308
+ FLOAT32 AdjustFactor, float Certainties[]) {
1309
+ /*
1310
+ ** Parameters:
1311
+ ** OldChoice existing viable choice to be replaced
1312
+ ** NewChoice choice to replace OldChoice with
1313
+ ** AdjustFactor factor used to adjust ratings for OldChoice
1314
+ ** Certainties certainty for each character in OldChoice
1315
+ ** Globals:
1316
+ ** CurrentSegmentation segmentation for NewChoice
1317
+ ** Operation: This routine is used whenever a better segmentation (or
1318
+ ** contextual interpretation) is found for a word which already
1319
+ ** exists. The OldChoice is updated with the relevant
1320
+ ** information from the new choice. The text string itself
1321
+ ** does not need to be copied since, by definition, has not
1322
+ ** changed.
1323
+ ** Return: none
1324
+ ** Exceptions: none
1325
+ ** History: Fri May 17 13:35:58 1991, DSJ, Created.
1326
+ */
1327
+ char *Word;
1328
+ char *Word_lengths;
1329
+ CHAR_CHOICE *NewChar;
1330
+ BLOB_WIDTH *BlobWidth;
1331
+
1332
+ OldChoice->Rating = class_probability (NewChoice);
1333
+ OldChoice->Certainty = class_certainty (NewChoice);
1334
+ OldChoice->AdjustFactor = AdjustFactor;
1335
+
1336
+ for (Word = class_string (NewChoice),
1337
+ Word_lengths = class_lengths (NewChoice),
1338
+ NewChar = &(OldChoice->Blob[0]),
1339
+ BlobWidth = CurrentSegmentation;
1340
+ *Word;
1341
+ Word += *(Word_lengths++), NewChar++, Certainties++, BlobWidth++) {
1342
+ NewChar->NumChunks = *BlobWidth;
1343
+ NewChar->Certainty = *Certainties;
1344
+ }
1345
+ } /* ReplaceDuplicateChoice */
1346
+
1347
+
1348
+ /*---------------------------------------------------------------------------*/
1349
+ int StringSameAs(const char *String,
1350
+ const char *String_lengths,
1351
+ VIABLE_CHOICE ViableChoice) {
1352
+ /*
1353
+ ** Parameters:
1354
+ ** String string to compare to ViableChoice
1355
+ ** String_lengths lengths of unichars in String
1356
+ ** ViableChoice viable choice to compare to String
1357
+ ** Globals: none
1358
+ ** Operation: This routine compares String to ViableChoice and
1359
+ ** returns TRUE if they are the same, FALSE otherwise.
1360
+ ** Return: TRUE or FALSE.
1361
+ ** Exceptions: none
1362
+ ** History: Fri May 17 08:48:04 1991, DSJ, Created.
1363
+ */
1364
+ CHAR_CHOICE *Char;
1365
+ int i;
1366
+ int current_unichar_length;
1367
+
1368
+ for (Char = &(ViableChoice->Blob[0]), i = 0;
1369
+ i < ViableChoice->Length;
1370
+ String += *(String_lengths++), Char++, i++) {
1371
+ current_unichar_length = strlen(unicharset.id_to_unichar(Char->Class));
1372
+ if (current_unichar_length != *String_lengths ||
1373
+ strncmp(String, unicharset.id_to_unichar(Char->Class),
1374
+ current_unichar_length) != 0)
1375
+ return (FALSE);
1376
+ }
1377
+
1378
+ if (*String == 0)
1379
+ return (TRUE);
1380
+ else
1381
+ return (FALSE);
1382
+
1383
+ } /* StringSameAs */
1384
+
1385
+
1386
+ /*---------------------------------------------------------------------------*/
1387
+ int UniformCertainties(CHOICES_LIST Choices, A_CHOICE *BestChoice) {
1388
+ /*
1389
+ ** Parameters:
1390
+ ** Choices choices for current segmentation
1391
+ ** BestChoice best choice for current segmentation
1392
+ ** Globals:
1393
+ ** CertaintyVariation max allowed certainty variation
1394
+ ** Operation: This routine returns TRUE if the certainty of the
1395
+ ** BestChoice word is within a reasonable range of the average
1396
+ ** certainties for the best choices for each character in
1397
+ ** the segmentation. This test is used to catch words in which
1398
+ ** one character is much worse than the other characters in
1399
+ ** the word (i.e. FALSE will be returned in that case).
1400
+ ** The algorithm computes the mean and std deviation of the
1401
+ ** certainties in the word with the worst certainty thrown out.
1402
+ ** Return: TRUE or FALSE.
1403
+ ** Exceptions: none
1404
+ ** History: Tue May 14 08:23:21 1991, DSJ, Created.
1405
+ */
1406
+ int i;
1407
+ CHOICES CharChoices;
1408
+ float Certainty;
1409
+ float WorstCertainty = MAX_FLOAT32;
1410
+ float CertaintyThreshold;
1411
+ FLOAT64 TotalCertainty;
1412
+ FLOAT64 TotalCertaintySquared;
1413
+ FLOAT64 Variance;
1414
+ FLOAT32 Mean, StdDev;
1415
+ int WordLength;
1416
+
1417
+ WordLength = array_count (Choices);
1418
+ if (WordLength < 3)
1419
+ return (TRUE);
1420
+
1421
+ TotalCertainty = TotalCertaintySquared = 0.0;
1422
+ for_each_choice(Choices, i) {
1423
+ CharChoices = (CHOICES) array_index (Choices, i);
1424
+ Certainty = best_certainty (CharChoices);
1425
+ TotalCertainty += Certainty;
1426
+ TotalCertaintySquared += Certainty * Certainty;
1427
+ if (Certainty < WorstCertainty)
1428
+ WorstCertainty = Certainty;
1429
+ }
1430
+
1431
+ /* subtract off worst certainty from statistics */
1432
+ WordLength--;
1433
+ TotalCertainty -= WorstCertainty;
1434
+ TotalCertaintySquared -= WorstCertainty * WorstCertainty;
1435
+
1436
+ Mean = TotalCertainty / WordLength;
1437
+ Variance = ((WordLength * TotalCertaintySquared -
1438
+ TotalCertainty * TotalCertainty) /
1439
+ (WordLength * (WordLength - 1)));
1440
+ if (Variance < 0.0)
1441
+ Variance = 0.0;
1442
+ StdDev = sqrt (Variance);
1443
+
1444
+ CertaintyThreshold = Mean - CertaintyVariation * StdDev;
1445
+ if (CertaintyThreshold > NonDictCertainty)
1446
+ CertaintyThreshold = NonDictCertainty;
1447
+
1448
+ if (class_certainty (BestChoice) < CertaintyThreshold) {
1449
+ if (StopperDebugLevel >= 1)
1450
+ cprintf
1451
+ ("Stopper: Non-uniform certainty = %4.1f (m=%4.1f, s=%4.1f, t=%4.1f)\n",
1452
+ class_certainty (BestChoice), Mean, StdDev, CertaintyThreshold);
1453
+ return (FALSE);
1454
+ }
1455
+ else
1456
+ return (TRUE);
1457
+
1458
+ } /* UniformCertainties */