tesseract_bin 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,1704 @@
1
+ /* -*-C-*-
2
+ ********************************************************************************
3
+ *
4
+ * File: permute.c (Formerly permute.c)
5
+ * Description: Handle the new ratings choices for Wise Owl
6
+ * Author: Mark Seaman, OCR Technology
7
+ * Created: Fri Sep 22 14:05:51 1989
8
+ * Modified: Thu Jan 3 16:38:46 1991 (Mark Seaman) marks@hpgrlt
9
+ * Language: C
10
+ * Package: N/A
11
+ * Status: Experimental (Do Not Distribute)
12
+ *
13
+ * (c) Copyright 1989, Hewlett-Packard Company.
14
+ ** Licensed under the Apache License, Version 2.0 (the "License");
15
+ ** you may not use this file except in compliance with the License.
16
+ ** You may obtain a copy of the License at
17
+ ** http://www.apache.org/licenses/LICENSE-2.0
18
+ ** Unless required by applicable law or agreed to in writing, software
19
+ ** distributed under the License is distributed on an "AS IS" BASIS,
20
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ ** See the License for the specific language governing permissions and
22
+ ** limitations under the License.
23
+ *
24
+ *********************************************************************************/
25
+ /*----------------------------------------------------------------------
26
+ I n c l u d e s
27
+ ---------------------------------------------------------------------*/
28
+ #include "permute.h"
29
+ #include "globals.h"
30
+ #include "permdawg.h"
31
+ #include "debug.h"
32
+ #include "tordvars.h"
33
+ #include "hyphen.h"
34
+ #include "stopper.h"
35
+ #include "trie.h"
36
+ #include "context.h"
37
+ #include "permnum.h"
38
+ #include "freelist.h"
39
+ #include "callcpp.h"
40
+ #include "permngram.h"
41
+
42
+ #include <math.h>
43
+
44
+ int permutation_count; // Used in metrics.cpp.
45
+ /*----------------------------------------------------------------------
46
+ V a r i a b l e s
47
+ ----------------------------------------------------------------------*/
48
+ // TODO(tkielbus) Choose a value for the MAX_NUM_EDGES constant
49
+ // (or make it dynamic)
50
+ #define MAX_NUM_EDGES 2000000
51
+ #define MAX_DOC_EDGES 250000
52
+ #define RESERVED_DOC_EDGES 10000
53
+ #define MAX_USER_EDGES 50000
54
+ #define USER_RESERVED_EDGES 2000
55
+ /* Weights for adjustment */
56
+ #define NON_WERD 1.25
57
+ #define GARBAGE_STRING 1.5
58
+ #define MAX_PERM_LENGTH 128
59
+
60
+ EDGE_ARRAY pending_words;
61
+ EDGE_ARRAY document_words;
62
+ EDGE_ARRAY user_words;
63
+ EDGE_ARRAY word_dawg;
64
+
65
+ make_toggle_var (adjust_debug, 0, make_adjust_debug,
66
+ 8, 13, set_adjust_debug, "Adjustment Debug");
67
+
68
+ make_toggle_var (compound_debug, 0, make_compound_debug,
69
+ 8, 14, set_compound_debug, "Compound Debug");
70
+
71
+ make_float_var (non_word, NON_WERD, make_non_word,
72
+ 8, 20, set_non_word, "Non-word adjustment");
73
+
74
+ make_float_var (garbage, GARBAGE_STRING, make_garbage,
75
+ 8, 21, set_garbage, "Garbage adjustment");
76
+
77
+ make_toggle_var (save_doc_words, 0, make_doc_words,
78
+ 8, 22, set_doc_words, "Save Document Words ");
79
+
80
+ make_toggle_var (doc_dict_enable, 1, make_doc_dict,
81
+ 8, 25, set_doc_dict, "Enable Document Dictionary ");
82
+ /* PREV DEFAULT 0 */
83
+
84
+ BOOL_VAR(ngram_permuter_activated, FALSE,
85
+ "Activate character-level n-gram-based permuter");
86
+
87
+ int permute_only_top = 0;
88
+
89
+ #if 0
90
+ //0x0=.
91
+ static inT32 bigram_counts[256][3] = { {
92
+ 0, 0, 0
93
+ },
94
+ { //0x1=.
95
+ 0, 0, 0
96
+ },
97
+ { //0x2=.
98
+ 0, 0, 0
99
+ },
100
+ { //0x3=.
101
+ 0, 0, 0
102
+ },
103
+ { //0x4=.
104
+ 0, 0, 0
105
+ },
106
+ { //0x5=.
107
+ 0, 0, 0
108
+ },
109
+ { //0x6=.
110
+ 0, 0, 0
111
+ },
112
+ { //0x7=.
113
+ 0, 0, 0
114
+ },
115
+ { //0x8=.
116
+ 0, 0, 0
117
+ },
118
+ { //0x9=.
119
+ 0, 0, 0
120
+ },
121
+ { //0xa=.
122
+ 93, 28, 0
123
+ },
124
+ { //0xb=.
125
+ 0, 0, 0
126
+ },
127
+ { //0xc=.
128
+ 0, 0, 0
129
+ },
130
+ { //0xd=.
131
+ 0, 0, 0
132
+ },
133
+ { //0xe=.
134
+ 0, 0, 0
135
+ },
136
+ { //0xf=.
137
+ 0, 0, 0
138
+ },
139
+ { //0x10=.
140
+ 0, 0, 0
141
+ },
142
+ { //0x11=.
143
+ 0, 0, 0
144
+ },
145
+ { //0x12=.
146
+ 0, 0, 0
147
+ },
148
+ { //0x13=.
149
+ 0, 0, 0
150
+ },
151
+ { //0x14=.
152
+ 0, 0, 0
153
+ },
154
+ { //0x15=.
155
+ 0, 0, 0
156
+ },
157
+ { //0x16=.
158
+ 0, 0, 0
159
+ },
160
+ { //0x17=.
161
+ 0, 0, 0
162
+ },
163
+ { //0x18=.
164
+ 0, 0, 0
165
+ },
166
+ { //0x19=.
167
+ 0, 0, 0
168
+ },
169
+ { //0x1a=.
170
+ 0, 0, 0
171
+ },
172
+ { //0x1b=.
173
+ 0, 0, 0
174
+ },
175
+ { //0x1c=.
176
+ 0, 0, 0
177
+ },
178
+ { //0x1d=.
179
+ 0, 0, 0
180
+ },
181
+ { //0x1e=.
182
+ 0, 0, 0
183
+ },
184
+ { //0x1f=.
185
+ 0, 0, 0
186
+ },
187
+ { //0x20=
188
+ 324, 377, 2
189
+ },
190
+ { //0x21=!
191
+ 2, 1, 0
192
+ },
193
+ { //0x22="
194
+ 2, 1, 0
195
+ },
196
+ { //0x23=#
197
+ 1, 0, 1
198
+ },
199
+ { //0x24=$
200
+ 2, 1, 0
201
+ },
202
+ { //0x25=%
203
+ 2, 0, 0
204
+ },
205
+ { //0x26=&
206
+ 2, 1, 0
207
+ },
208
+ { //0x27='
209
+ 1, 21, 8
210
+ },
211
+ { //0x28=(
212
+ 2, 1, 0
213
+ },
214
+ { //0x29=)
215
+ 19, 0, 0
216
+ },
217
+ { //0x2a=*
218
+ 2, 1, 0
219
+ },
220
+ { //0x2b=+
221
+ 1, 0, 0
222
+ },
223
+ { //0x2c=,
224
+ 75, 4, 0
225
+ },
226
+ { //0x2d=-
227
+ 52, 7, 0
228
+ },
229
+ { //0x2e=.
230
+ 190, 16, 3
231
+ },
232
+ { //0x2f=/
233
+ 53, 2, 0
234
+ },
235
+ { //0x30=0
236
+ 399, 0, 0
237
+ },
238
+ { //0x31=1
239
+ 220, 0, 0
240
+ },
241
+ { //0x32=2
242
+ 226, 0, 0
243
+ },
244
+ { //0x33=3
245
+ 128, 0, 0
246
+ },
247
+ { //0x34=4
248
+ 147, 0, 0
249
+ },
250
+ { //0x35=5
251
+ 179, 0, 1
252
+ },
253
+ { //0x36=6
254
+ 173, 0, 0
255
+ },
256
+ { //0x37=7
257
+ 115, 0, 0
258
+ },
259
+ { //0x38=8
260
+ 107, 0, 0
261
+ },
262
+ { //0x39=9
263
+ 934, 0, 1
264
+ },
265
+ { //0x3a=:
266
+ 27, 0, 1
267
+ },
268
+ { //0x3b=;
269
+ 2, 1, 0
270
+ },
271
+ { //0x3c=<
272
+ 2, 1, 0
273
+ },
274
+ { //0x3d==
275
+ 2, 1, 0
276
+ },
277
+ { //0x3e=>
278
+ 2, 1, 0
279
+ },
280
+ { //0x3f=?
281
+ 2, 1, 0
282
+ },
283
+ { //0x40=@
284
+ 2, 1, 0
285
+ },
286
+ { //0x41=A
287
+ 3, 1, 0
288
+ },
289
+ { //0x42=B
290
+ 1, 73, 0
291
+ },
292
+ { //0x43=C
293
+ 1, 6, 0
294
+ },
295
+ { //0x44=D
296
+ 1, 24, 0
297
+ },
298
+ { //0x45=E
299
+ 1, 2, 0
300
+ },
301
+ { //0x46=F
302
+ 1, 19, 0
303
+ },
304
+ { //0x47=G
305
+ 1, 2, 0
306
+ },
307
+ { //0x48=H
308
+ 3, 2, 1
309
+ },
310
+ { //0x49=I
311
+ 0, 68, 0
312
+ },
313
+ { //0x4a=J
314
+ 1, 2, 0
315
+ },
316
+ { //0x4b=K
317
+ 1, 2, 0
318
+ },
319
+ { //0x4c=L
320
+ 1, 82, 0
321
+ },
322
+ { //0x4d=M
323
+ 10, 10, 0
324
+ },
325
+ { //0x4e=N
326
+ 3, 239, 0
327
+ },
328
+ { //0x4f=O
329
+ 1, 10, 0
330
+ },
331
+ { //0x50=P
332
+ 0, 1, 3
333
+ },
334
+ { //0x51=Q
335
+ 2, 3, 0
336
+ },
337
+ { //0x52=R
338
+ 1, 43, 0
339
+ },
340
+ { //0x53=S
341
+ 1, 53, 0
342
+ },
343
+ { //0x54=T
344
+ 2, 18, 0
345
+ },
346
+ { //0x55=U
347
+ 1, 2, 0
348
+ },
349
+ { //0x56=V
350
+ 1, 17, 0
351
+ },
352
+ { //0x57=W
353
+ 1, 5, 0
354
+ },
355
+ { //0x58=X
356
+ 1, 6, 0
357
+ },
358
+ { //0x59=Y
359
+ 1, 2, 0
360
+ },
361
+ { //0x5a=Z
362
+ 1, 2, 0
363
+ },
364
+ { //0x5b=[
365
+ 2, 1, 0
366
+ },
367
+ { //0x5c=backslash
368
+ 2, 1, 0
369
+ },
370
+ { //0x5d=]
371
+ 2, 1, 0
372
+ },
373
+ { //0x5e=^
374
+ 2, 1, 0
375
+ },
376
+ { //0x5f=_
377
+ 2, 1, 0
378
+ },
379
+ { //0x60=`
380
+ 1, 0, 2
381
+ },
382
+ { //0x61=a
383
+ 0, 0, 671
384
+ },
385
+ { //0x62=b
386
+ 0, 1, 16
387
+ },
388
+ { //0x63=c
389
+ 0, 2, 1
390
+ },
391
+ { //0x64=d
392
+ 0, 14, 0
393
+ },
394
+ { //0x65=e
395
+ 0, 0, 763
396
+ },
397
+ { //0x66=f
398
+ 0, 186, 0
399
+ },
400
+ { //0x67=g
401
+ 0, 2, 1
402
+ },
403
+ { //0x68=h
404
+ 0, 2, 1
405
+ },
406
+ { //0x69=i
407
+ 0, 0, 818
408
+ },
409
+ { //0x6a=j
410
+ 0, 2, 1
411
+ },
412
+ { //0x6b=k
413
+ 0, 4, 1
414
+ },
415
+ { //0x6c=l
416
+ 0, 26, 3
417
+ },
418
+ { //0x6d=m
419
+ 0, 69, 0
420
+ },
421
+ { //0x6e=n
422
+ 0, 885, 0
423
+ },
424
+ { //0x6f=o
425
+ 0, 17, 722
426
+ },
427
+ { //0x70=p
428
+ 0, 1, 5
429
+ },
430
+ { //0x71=q
431
+ 2, 1, 0
432
+ },
433
+ { //0x72=r
434
+ 0, 21, 0
435
+ },
436
+ { //0x73=s
437
+ 3, 49, 0
438
+ },
439
+ { //0x74=t
440
+ 0, 219, 5
441
+ },
442
+ { //0x75=u
443
+ 0, 0, 56
444
+ },
445
+ { //0x76=v
446
+ 0, 4, 0
447
+ },
448
+ { //0x77=w
449
+ 0, 2, 1
450
+ },
451
+ { //0x78=x
452
+ 0, 2, 1
453
+ },
454
+ { //0x79=y
455
+ 0, 1, 23
456
+ },
457
+ { //0x7a=z
458
+ 0, 2, 1
459
+ },
460
+ { //0x7b={
461
+ 2, 1, 0
462
+ },
463
+ { //0x7c=|
464
+ 59, 0, 3
465
+ },
466
+ { //0x7d=}
467
+ 2, 1, 0
468
+ },
469
+ { //0x7e=~
470
+ 2, 1, 0
471
+ },
472
+ { //0x7f=.
473
+ 0, 0, 0
474
+ },
475
+ { //0x80=.
476
+ 0, 0, 0
477
+ },
478
+ { //0x81=.
479
+ 0, 0, 0
480
+ },
481
+ { //0x82=.
482
+ 0, 0, 0
483
+ },
484
+ { //0x83=.
485
+ 0, 0, 0
486
+ },
487
+ { //0x84=.
488
+ 0, 0, 0
489
+ },
490
+ { //0x85=.
491
+ 0, 0, 0
492
+ },
493
+ { //0x86=.
494
+ 0, 0, 0
495
+ },
496
+ { //0x87=.
497
+ 0, 0, 0
498
+ },
499
+ { //0x88=.
500
+ 0, 0, 0
501
+ },
502
+ { //0x89=.
503
+ 0, 0, 0
504
+ },
505
+ { //0x8a=.
506
+ 0, 0, 0
507
+ },
508
+ { //0x8b=.
509
+ 0, 0, 0
510
+ },
511
+ { //0x8c=.
512
+ 0, 0, 0
513
+ },
514
+ { //0x8d=.
515
+ 0, 0, 0
516
+ },
517
+ { //0x8e=.
518
+ 0, 0, 0
519
+ },
520
+ { //0x8f=.
521
+ 0, 0, 0
522
+ },
523
+ { //0x90=.
524
+ 0, 0, 0
525
+ },
526
+ { //0x91=.
527
+ 0, 0, 0
528
+ },
529
+ { //0x92=.
530
+ 0, 0, 0
531
+ },
532
+ { //0x93=.
533
+ 0, 0, 0
534
+ },
535
+ { //0x94=.
536
+ 0, 0, 0
537
+ },
538
+ { //0x95=.
539
+ 0, 0, 0
540
+ },
541
+ { //0x96=.
542
+ 0, 0, 0
543
+ },
544
+ { //0x97=.
545
+ 0, 0, 0
546
+ },
547
+ { //0x98=.
548
+ 0, 0, 0
549
+ },
550
+ { //0x99=.
551
+ 0, 0, 0
552
+ },
553
+ { //0x9a=.
554
+ 0, 0, 0
555
+ },
556
+ { //0x9b=.
557
+ 0, 0, 0
558
+ },
559
+ { //0x9c=.
560
+ 0, 0, 0
561
+ },
562
+ { //0x9d=.
563
+ 0, 0, 0
564
+ },
565
+ { //0x9e=.
566
+ 0, 0, 0
567
+ },
568
+ { //0x9f=.
569
+ 0, 0, 0
570
+ },
571
+ { //0xa0=.
572
+ 0, 0, 0
573
+ },
574
+ { //0xa1=.
575
+ 0, 0, 0
576
+ },
577
+ { //0xa2=.
578
+ 0, 0, 0
579
+ },
580
+ { //0xa3=.
581
+ 0, 0, 0
582
+ },
583
+ { //0xa4=.
584
+ 0, 0, 0
585
+ },
586
+ { //0xa5=.
587
+ 0, 0, 0
588
+ },
589
+ { //0xa6=.
590
+ 0, 0, 0
591
+ },
592
+ { //0xa7=.
593
+ 0, 0, 0
594
+ },
595
+ { //0xa8=.
596
+ 0, 0, 0
597
+ },
598
+ { //0xa9=.
599
+ 0, 0, 0
600
+ },
601
+ { //0xaa=.
602
+ 0, 0, 0
603
+ },
604
+ { //0xab=.
605
+ 0, 0, 0
606
+ },
607
+ { //0xac=.
608
+ 0, 0, 0
609
+ },
610
+ { //0xad=.
611
+ 0, 0, 0
612
+ },
613
+ { //0xae=.
614
+ 0, 0, 0
615
+ },
616
+ { //0xaf=.
617
+ 0, 0, 0
618
+ },
619
+ { //0xb0=.
620
+ 0, 0, 0
621
+ },
622
+ { //0xb1=.
623
+ 0, 0, 0
624
+ },
625
+ { //0xb2=.
626
+ 0, 0, 0
627
+ },
628
+ { //0xb3=.
629
+ 0, 0, 0
630
+ },
631
+ { //0xb4=.
632
+ 0, 0, 0
633
+ },
634
+ { //0xb5=.
635
+ 0, 0, 0
636
+ },
637
+ { //0xb6=.
638
+ 0, 0, 0
639
+ },
640
+ { //0xb7=.
641
+ 0, 0, 0
642
+ },
643
+ { //0xb8=.
644
+ 0, 0, 0
645
+ },
646
+ { //0xb9=.
647
+ 0, 0, 0
648
+ },
649
+ { //0xba=.
650
+ 0, 0, 0
651
+ },
652
+ { //0xbb=.
653
+ 0, 0, 0
654
+ },
655
+ { //0xbc=.
656
+ 0, 0, 0
657
+ },
658
+ { //0xbd=.
659
+ 0, 0, 0
660
+ },
661
+ { //0xbe=.
662
+ 0, 0, 0
663
+ },
664
+ { //0xbf=.
665
+ 0, 0, 0
666
+ },
667
+ { //0xc0=.
668
+ 0, 0, 0
669
+ },
670
+ { //0xc1=.
671
+ 0, 0, 0
672
+ },
673
+ { //0xc2=.
674
+ 0, 0, 0
675
+ },
676
+ { //0xc3=.
677
+ 0, 0, 0
678
+ },
679
+ { //0xc4=.
680
+ 0, 0, 0
681
+ },
682
+ { //0xc5=.
683
+ 0, 0, 0
684
+ },
685
+ { //0xc6=.
686
+ 0, 0, 0
687
+ },
688
+ { //0xc7=.
689
+ 0, 0, 0
690
+ },
691
+ { //0xc8=.
692
+ 0, 0, 0
693
+ },
694
+ { //0xc9=.
695
+ 0, 0, 0
696
+ },
697
+ { //0xca=.
698
+ 0, 0, 0
699
+ },
700
+ { //0xcb=.
701
+ 0, 0, 0
702
+ },
703
+ { //0xcc=.
704
+ 0, 0, 0
705
+ },
706
+ { //0xcd=.
707
+ 0, 0, 0
708
+ },
709
+ { //0xce=.
710
+ 0, 0, 0
711
+ },
712
+ { //0xcf=.
713
+ 0, 0, 0
714
+ },
715
+ { //0xd0=.
716
+ 0, 0, 0
717
+ },
718
+ { //0xd1=.
719
+ 0, 0, 0
720
+ },
721
+ { //0xd2=.
722
+ 0, 0, 0
723
+ },
724
+ { //0xd3=.
725
+ 0, 0, 0
726
+ },
727
+ { //0xd4=.
728
+ 0, 0, 0
729
+ },
730
+ { //0xd5=.
731
+ 0, 0, 0
732
+ },
733
+ { //0xd6=.
734
+ 0, 0, 0
735
+ },
736
+ { //0xd7=.
737
+ 0, 0, 0
738
+ },
739
+ { //0xd8=.
740
+ 0, 0, 0
741
+ },
742
+ { //0xd9=.
743
+ 0, 0, 0
744
+ },
745
+ { //0xda=.
746
+ 0, 0, 0
747
+ },
748
+ { //0xdb=.
749
+ 0, 0, 0
750
+ },
751
+ { //0xdc=.
752
+ 0, 0, 0
753
+ },
754
+ { //0xdd=.
755
+ 0, 0, 0
756
+ },
757
+ { //0xde=.
758
+ 0, 0, 0
759
+ },
760
+ { //0xdf=.
761
+ 0, 0, 0
762
+ },
763
+ { //0xe0=.
764
+ 0, 0, 0
765
+ },
766
+ { //0xe1=.
767
+ 0, 0, 0
768
+ },
769
+ { //0xe2=.
770
+ 0, 0, 0
771
+ },
772
+ { //0xe3=.
773
+ 0, 0, 0
774
+ },
775
+ { //0xe4=.
776
+ 0, 0, 0
777
+ },
778
+ { //0xe5=.
779
+ 0, 0, 0
780
+ },
781
+ { //0xe6=.
782
+ 0, 0, 0
783
+ },
784
+ { //0xe7=.
785
+ 0, 0, 0
786
+ },
787
+ { //0xe8=.
788
+ 0, 0, 0
789
+ },
790
+ { //0xe9=.
791
+ 0, 0, 0
792
+ },
793
+ { //0xea=.
794
+ 0, 0, 0
795
+ },
796
+ { //0xeb=.
797
+ 0, 0, 0
798
+ },
799
+ { //0xec=.
800
+ 0, 0, 0
801
+ },
802
+ { //0xed=.
803
+ 0, 0, 0
804
+ },
805
+ { //0xee=.
806
+ 0, 0, 0
807
+ },
808
+ { //0xef=.
809
+ 0, 0, 0
810
+ },
811
+ { //0xf0=.
812
+ 0, 0, 0
813
+ },
814
+ { //0xf1=.
815
+ 0, 0, 0
816
+ },
817
+ { //0xf2=.
818
+ 0, 0, 0
819
+ },
820
+ { //0xf3=.
821
+ 0, 0, 0
822
+ },
823
+ { //0xf4=.
824
+ 0, 0, 0
825
+ },
826
+ { //0xf5=.
827
+ 0, 0, 0
828
+ },
829
+ { //0xf6=.
830
+ 0, 0, 0
831
+ },
832
+ { //0xf7=.
833
+ 0, 0, 0
834
+ },
835
+ { //0xf8=.
836
+ 0, 0, 0
837
+ },
838
+ { //0xf9=.
839
+ 0, 0, 0
840
+ },
841
+ { //0xfa=.
842
+ 0, 0, 0
843
+ },
844
+ { //0xfb=.
845
+ 0, 0, 0
846
+ },
847
+ { //0xfc=.
848
+ 0, 0, 0
849
+ },
850
+ { //0xfd=.
851
+ 0, 0, 0
852
+ },
853
+ { //0xfe=.
854
+ 0, 0, 0
855
+ },
856
+ { //0xff=.
857
+ 0, 0, 0
858
+ },
859
+ };
860
+ #endif
861
+
862
+ //extern "C" double permuter_pending_threshold;
863
+
864
+ /* Similarity matcher values */
865
+ #define SIM_CERTAINTY_SCALE -10.0
866
+ /* Similarity matcher values */
867
+ #define SIM_CERTAINTY_OFFSET -10.0
868
+ /* Worst E*L product to stop on */
869
+ #define SIMILARITY_FLOOR 100.0
870
+ /*----------------------------------------------------------------------
871
+ F u n c t i o n s
872
+ ----------------------------------------------------------------------*/
873
+
874
+ /**********************************************************************
875
+ * good_choice
876
+ *
877
+ * Return TRUE if a good answer is found for the unknown blob rating.
878
+ **********************************************************************/
879
+ int good_choice(A_CHOICE *choice) {
880
+ register float certainty;
881
+ if (choice == NULL)
882
+ return (FALSE);
883
+ if (similarity_enable) {
884
+ if ((class_probability (choice) + 1) * class_certainty (choice) >
885
+ SIMILARITY_FLOOR)
886
+ return (FALSE);
887
+ certainty =
888
+ SIM_CERTAINTY_OFFSET +
889
+ class_probability (choice) * SIM_CERTAINTY_SCALE;
890
+ }
891
+
892
+ else {
893
+ certainty = class_certainty (choice);
894
+ }
895
+ if (certainty > certainty_threshold) {
896
+ return (TRUE);
897
+ }
898
+
899
+ else {
900
+ return (FALSE);
901
+ }
902
+ }
903
+
904
+
905
+ /**********************************************************************
906
+ * add_document_word
907
+ *
908
+ * Add a word found on this document to the document specific
909
+ * dictionary.
910
+ **********************************************************************/
911
+ void add_document_word(A_CHOICE *best_choice) {
912
+ char filename[CHARS_PER_LINE];
913
+ FILE *doc_word_file;
914
+ char *string;
915
+ char *lengths;
916
+ int stringlen; //length of word
917
+
918
+ string = class_string (best_choice);
919
+ lengths = class_lengths (best_choice);
920
+ stringlen = strlen (lengths);
921
+
922
+ // Skip if using external dictionary.
923
+ if (letter_is_okay != &def_letter_is_okay) return;
924
+
925
+ if (!doc_dict_enable
926
+ || valid_word (string) || CurrentWordAmbig () || stringlen < 2)
927
+ return;
928
+
929
+ if (!good_choice (best_choice) || stringlen == 2) {
930
+ if (class_certainty (best_choice) < permuter_pending_threshold)
931
+ return;
932
+ if (!word_in_dawg (pending_words, string)) {
933
+ if (stringlen > 2 ||
934
+ (stringlen >= 2 && unicharset.get_isupper (string, lengths[0]) &&
935
+ unicharset.get_isupper (string + lengths[0], lengths[1])))
936
+ add_word_to_dawg(pending_words,
937
+ string,
938
+ MAX_DOC_EDGES,
939
+ RESERVED_DOC_EDGES);
940
+ return;
941
+ }
942
+ }
943
+
944
+ if (save_doc_words) {
945
+ strcpy(filename, imagefile);
946
+ strcat (filename, ".doc");
947
+ doc_word_file = open_file (filename, "a");
948
+ fprintf (doc_word_file, "%s\n", string);
949
+ fclose(doc_word_file);
950
+ }
951
+ add_word_to_dawg(document_words, string, MAX_DOC_EDGES, RESERVED_DOC_EDGES);
952
+ }
953
+
954
+
955
+ /**********************************************************************
956
+ * adjust_non_word
957
+ *
958
+ * Assign an adjusted value to a string that is a non-word. The value
959
+ * that this word choice has is based on case and punctuation rules.
960
+ **********************************************************************/
961
+ void
962
+ adjust_non_word (A_CHOICE * best_choice, float certainties[]) {
963
+ char *this_word;
964
+ float adjust_factor;
965
+
966
+ if (adjust_debug)
967
+ cprintf ("%s %4.2f ",
968
+ class_string (best_choice), class_probability (best_choice));
969
+
970
+ this_word = class_string (best_choice);
971
+
972
+ class_probability (best_choice) += RATING_PAD;
973
+ if (case_ok (this_word, class_lengths (best_choice))
974
+ && punctuation_ok (this_word, class_lengths (best_choice)) != -1) {
975
+ class_probability (best_choice) *= non_word;
976
+ adjust_factor = non_word;
977
+ if (adjust_debug)
978
+ cprintf (", %4.2f ", non_word);
979
+ }
980
+ else {
981
+ class_probability (best_choice) *= garbage;
982
+ adjust_factor = garbage;
983
+ if (adjust_debug) {
984
+ if (!case_ok (this_word, class_lengths (best_choice)))
985
+ cprintf (", C");
986
+ if (punctuation_ok (this_word, class_lengths (best_choice)) == -1)
987
+ cprintf (", P");
988
+ cprintf (", %4.2f ", garbage);
989
+ }
990
+ }
991
+
992
+ class_probability (best_choice) -= RATING_PAD;
993
+
994
+ LogNewWordChoice(best_choice, adjust_factor, certainties);
995
+
996
+ if (adjust_debug)
997
+ cprintf (" --> %4.2f\n", class_probability (best_choice));
998
+ }
999
+
1000
+
1001
+ /**********************************************************************
1002
+ * init_permute
1003
+ *
1004
+ * Initialize anything that needs to be set up for the permute
1005
+ * functions.
1006
+ **********************************************************************/
1007
+ void init_permute_vars() {
1008
+ make_adjust_debug();
1009
+ make_compound_debug();
1010
+ make_non_word();
1011
+ make_garbage();
1012
+ make_doc_words();
1013
+ make_doc_dict();
1014
+
1015
+ init_permdawg_vars();
1016
+ init_permnum();
1017
+ }
1018
+
1019
+ void init_permute() {
1020
+ if (word_dawg != NULL)
1021
+ end_permute();
1022
+ init_permdawg();
1023
+ STRING name;
1024
+ name = language_data_path_prefix;
1025
+ name += "word-dawg";
1026
+ word_dawg = read_squished_dawg(name.string());
1027
+
1028
+ document_words =
1029
+ (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_DOC_EDGES);
1030
+ initialize_dawg(document_words, MAX_DOC_EDGES);
1031
+
1032
+ pending_words =
1033
+ (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_DOC_EDGES);
1034
+ initialize_dawg(pending_words, MAX_DOC_EDGES);
1035
+
1036
+ user_words = (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_USER_EDGES);
1037
+ name = language_data_path_prefix;
1038
+ name += "user-words";
1039
+ read_word_list(name.string(), user_words, MAX_USER_EDGES, USER_RESERVED_EDGES);
1040
+ }
1041
+
1042
+ void end_permute() {
1043
+ if (word_dawg == NULL)
1044
+ return; // Not safe to call twice.
1045
+ memfree(word_dawg);
1046
+ word_dawg = NULL;
1047
+ memfree(document_words);
1048
+ document_words = NULL;
1049
+ memfree(pending_words);
1050
+ pending_words = NULL;
1051
+ memfree(user_words);
1052
+ user_words = NULL;
1053
+ end_permdawg();
1054
+ }
1055
+
1056
+ /**********************************************************************
1057
+ * permute_all
1058
+ *
1059
+ * Permute all the characters together using all of the different types
1060
+ * of permuters/selectors available. Each of the characters must have
1061
+ * a non-NIL choice list.
1062
+ **********************************************************************/
1063
+ A_CHOICE *permute_all(CHOICES_LIST char_choices,
1064
+ float rating_limit,
1065
+ A_CHOICE *raw_choice) {
1066
+ A_CHOICE *result_1;
1067
+ A_CHOICE *result_2 = NULL;
1068
+ BOOL8 any_alpha;
1069
+
1070
+ result_1 = permute_top_choice (char_choices, rating_limit, raw_choice,
1071
+ &any_alpha);
1072
+
1073
+ if (ngram_permuter_activated)
1074
+ return ngram_permute_and_select(char_choices, rating_limit, word_dawg);
1075
+
1076
+ if (result_1 == NULL)
1077
+ return (NULL);
1078
+ if (permute_only_top)
1079
+ return result_1;
1080
+ if (any_alpha && array_count (char_choices) <= MAX_WERD_LENGTH) {
1081
+ result_2 = permute_words (char_choices, rating_limit);
1082
+ if (class_probability (result_1) < class_probability (result_2)
1083
+ || class_string (result_2) == NULL) {
1084
+ free_choice(result_2);
1085
+ }
1086
+ else {
1087
+ free_choice(result_1);
1088
+ result_1 = result_2;
1089
+ }
1090
+ }
1091
+
1092
+ result_2 = number_permute_and_select (char_choices, rating_limit);
1093
+
1094
+ if (class_probability (result_1) < class_probability (result_2)
1095
+ || class_string (result_2) == NULL) {
1096
+ free_choice(result_2);
1097
+ }
1098
+ else {
1099
+ free_choice(result_1);
1100
+ result_1 = result_2;
1101
+ }
1102
+
1103
+ result_2 = permute_compound_words (char_choices, rating_limit);
1104
+
1105
+ if (!result_2 ||
1106
+ class_probability (result_1) < class_probability (result_2)
1107
+ || class_string (result_2) == NULL) {
1108
+ free_choice(result_2);
1109
+ }
1110
+ else {
1111
+ free_choice(result_1);
1112
+ result_1 = result_2;
1113
+ }
1114
+
1115
+ return (result_1);
1116
+ }
1117
+
1118
+
1119
+ /**********************************************************************
1120
+ * permute_characters
1121
+ *
1122
+ * Permute these characters together according to each of the different
1123
+ * permuters that are enabled.
1124
+ **********************************************************************/
1125
+ void permute_characters(CHOICES_LIST char_choices,
1126
+ float limit,
1127
+ A_CHOICE *best_choice,
1128
+ A_CHOICE *raw_choice) {
1129
+ A_CHOICE *this_choice;
1130
+
1131
+ permutation_count++; /* Global counter */
1132
+
1133
+ this_choice = permute_all (char_choices, limit, raw_choice);
1134
+
1135
+ if (this_choice &&
1136
+ class_probability (this_choice) < class_probability (best_choice)) {
1137
+ clone_choice(best_choice, this_choice);
1138
+ }
1139
+ free_choice(this_choice);
1140
+
1141
+ if (display_ratings)
1142
+ print_word_choice("permute_characters", best_choice);
1143
+ }
1144
+
1145
+
1146
+ /**********************************************************************
1147
+ * permute_compound_word
1148
+ *
1149
+ * Return the top choice for each character as the choice for the word.
1150
+ **********************************************************************/
1151
+ A_CHOICE *permute_compound_words(CHOICES_LIST character_choices,
1152
+ float rating_limit) {
1153
+ A_CHOICE *first_choice;
1154
+ A_CHOICE *best_choice = NULL;
1155
+ char word[UNICHAR_LEN * MAX_WERD_LENGTH + 1];
1156
+ char unichar_lengths[MAX_WERD_LENGTH + 1];
1157
+ float rating = 0;
1158
+ float certainty = 10000;
1159
+ char char_choice;
1160
+ int x;
1161
+ int first_index = 0;
1162
+ char *ptr;
1163
+
1164
+ word[0] = '\0';
1165
+ unichar_lengths[0] = 0;
1166
+
1167
+ if (array_count (character_choices) > MAX_WERD_LENGTH) {
1168
+ return (new_choice (NULL, NULL, MAX_FLOAT32, -MAX_FLOAT32, -1, NO_PERM));
1169
+ }
1170
+
1171
+ array_loop(character_choices, x) {
1172
+
1173
+ first_choice =
1174
+ (A_CHOICE *) first_node ((CHOICES) array_value (character_choices, x));
1175
+
1176
+ ptr = class_string (first_choice);
1177
+ char_choice = ptr != NULL ? *ptr : '\0';
1178
+ if (x > first_index && (char_choice == '-' || char_choice == '/')) {
1179
+ if (compound_debug)
1180
+ cprintf ("Hyphenated word found\n");
1181
+
1182
+ permute_subword (character_choices, rating_limit,
1183
+ first_index, x - 1, word, unichar_lengths,
1184
+ &rating, &certainty);
1185
+
1186
+ if (rating > rating_limit)
1187
+ break;
1188
+ first_index = x + 1;
1189
+
1190
+ strcat(word, class_string (first_choice));
1191
+ char length[] = {strlen(class_string (first_choice)), 0};
1192
+ strcat(unichar_lengths + x, length);
1193
+ rating += class_probability (first_choice);
1194
+ certainty = min (class_certainty (first_choice), certainty);
1195
+ }
1196
+ }
1197
+
1198
+ if (first_index > 0 && first_index < x && rating <= rating_limit) {
1199
+ permute_subword (character_choices, rating_limit,
1200
+ first_index, x - 1, word, unichar_lengths,
1201
+ &rating, &certainty);
1202
+
1203
+ best_choice = new_choice (word, unichar_lengths, rating,
1204
+ certainty, -1, COMPOUND_PERM);
1205
+ }
1206
+ return (best_choice);
1207
+ }
1208
+
1209
+
1210
+ /**********************************************************************
1211
+ * permute_subword
1212
+ *
1213
+ * Permute a part of a compound word this subword is bounded by hyphens
1214
+ * and the start and end of the word. Call the standard word permute
1215
+ * function on a set of choices covering only part of the original
1216
+ * word. When it is done reclaim the memory that was used in the
1217
+ * excercise.
1218
+ **********************************************************************/
1219
+ void permute_subword(CHOICES_LIST character_choices,
1220
+ float rating_limit,
1221
+ int start,
1222
+ int end,
1223
+ char *word,
1224
+ char unichar_lengths[],
1225
+ float *rating,
1226
+ float *certainty) {
1227
+ int x;
1228
+ A_CHOICE *best_choice = NULL;
1229
+ A_CHOICE raw_choice;
1230
+ CHOICES_LIST subchoices;
1231
+ CHOICES choices;
1232
+ char this_char;
1233
+ char *ptr;
1234
+
1235
+ DisableChoiceAccum();
1236
+ raw_choice.string = NULL;
1237
+ raw_choice.lengths = NULL;
1238
+ raw_choice.rating = MAX_INT16;
1239
+ raw_choice.certainty = -MAX_INT16;
1240
+
1241
+ subchoices = new_choice_list ();
1242
+ for (x = start; x <= end; x++) {
1243
+ choices = (CHOICES) array_value (character_choices, x);
1244
+ ptr = best_string (choices);
1245
+ this_char = ptr != NULL ? *ptr : '\0';
1246
+ if (this_char != '-' && this_char != '/') {
1247
+ subchoices = array_push (subchoices, choices);
1248
+ } else {
1249
+ const char* str = best_string(choices);
1250
+ strcat(word, str);
1251
+ char length[] = {strlen(str), 0};
1252
+ strcat(unichar_lengths + x, length);
1253
+ }
1254
+ }
1255
+
1256
+ if (array_count (subchoices)) {
1257
+ if (compound_debug)
1258
+ dawg_debug = TRUE;
1259
+ best_choice = permute_all (subchoices, rating_limit, &raw_choice);
1260
+ if (compound_debug)
1261
+ dawg_debug = FALSE;
1262
+
1263
+ if (best_choice && class_string (best_choice)) {
1264
+ strcat (word, class_string (best_choice));
1265
+ strcat (unichar_lengths, class_lengths (best_choice));
1266
+ *rating += class_probability (best_choice);
1267
+ *certainty = min (class_certainty (best_choice), *certainty);
1268
+ }
1269
+ else {
1270
+ *rating = MAX_FLOAT32;
1271
+ }
1272
+ }
1273
+ else {
1274
+ *rating = MAX_FLOAT32;
1275
+ }
1276
+
1277
+ free_choice_list(subchoices);
1278
+ if (best_choice)
1279
+ free_choice(best_choice);
1280
+
1281
+ if (compound_debug && *rating < MAX_FLOAT32) {
1282
+ cprintf ("Subword permuted = %s, %5.2f, %5.2f\n\n",
1283
+ word, *rating, *certainty);
1284
+ }
1285
+ if (raw_choice.string)
1286
+ strfree(raw_choice.string);
1287
+ if (raw_choice.lengths)
1288
+ strfree(raw_choice.lengths);
1289
+
1290
+ EnableChoiceAccum();
1291
+ }
1292
+
1293
+
1294
+ /**********************************************************************
1295
+ * permute_top_choice
1296
+ *
1297
+ * Return the top choice for each character as the choice for the word.
1298
+ * In addition a choice is created for the best lower and upper case
1299
+ * non-words. In each character position the best lower (or upper) case
1300
+ * character is substituted for the best overall character.
1301
+ **********************************************************************/
1302
+ A_CHOICE *permute_top_choice(CHOICES_LIST character_choices,
1303
+ float rating_limit,
1304
+ A_CHOICE *raw_choice,
1305
+ BOOL8 *any_alpha) {
1306
+ CHOICES char_list;
1307
+ A_CHOICE *first_choice;
1308
+ A_CHOICE *best_choice;
1309
+ A_CHOICE *other_choice;
1310
+ const char *ptr;
1311
+ const char *first_char; //first choice
1312
+ const char *second_char; //second choice
1313
+ const char *third_char; //third choice
1314
+ char prev_char[UNICHAR_LEN + 1]; //prev in word
1315
+ const char *next_char = ""; //next in word
1316
+ const char *next_next_char = ""; //after next next in word
1317
+
1318
+ char word[UNICHAR_LEN * MAX_PERM_LENGTH + 1];
1319
+ char capital_word[UNICHAR_LEN * MAX_PERM_LENGTH + 1];
1320
+ char lower_word[UNICHAR_LEN * MAX_PERM_LENGTH + 1];
1321
+
1322
+ char word_lengths[MAX_PERM_LENGTH + 1];
1323
+ char capital_word_lengths[MAX_PERM_LENGTH + 1];
1324
+ char lower_word_lengths[MAX_PERM_LENGTH + 1];
1325
+
1326
+ int x;
1327
+ int x_word = 0;
1328
+ int x_capital_word = 0;
1329
+ int x_lower_word = 0;
1330
+ BOOL8 char_alpha;
1331
+
1332
+ float rating = 0;
1333
+ float upper_rating = 0;
1334
+ float lower_rating = 0;
1335
+ float first_rating = 0;
1336
+
1337
+ float certainty = 10000;
1338
+ float upper_certainty = 10000;
1339
+ float lower_certainty = 10000;
1340
+
1341
+ float certainties[MAX_PERM_LENGTH + 1];
1342
+ float lower_certainties[MAX_PERM_LENGTH + 1];
1343
+ float upper_certainties[MAX_PERM_LENGTH + 1];
1344
+
1345
+ register CHOICES this_char;
1346
+ register const char* ch;
1347
+ register inT8 lower_done;
1348
+ register inT8 upper_done;
1349
+
1350
+ prev_char[0] = '\0';
1351
+
1352
+ if (any_alpha != NULL)
1353
+ *any_alpha = FALSE;
1354
+
1355
+ if (array_count (character_choices) > MAX_PERM_LENGTH) {
1356
+ return (NULL);
1357
+ }
1358
+
1359
+ array_loop(character_choices, x) {
1360
+ if (x + 1 < array_count (character_choices)) {
1361
+ char_list = (CHOICES) array_value (character_choices, x + 1);
1362
+ first_choice = (A_CHOICE *) first_node (char_list);
1363
+
1364
+ ptr = class_string (first_choice);
1365
+ next_char = (ptr != NULL && *ptr != '\0') ? ptr : " ";
1366
+ }
1367
+ else
1368
+ next_char = "";
1369
+ if (x + 2 < array_count (character_choices)) {
1370
+ char_list = (CHOICES) array_value (character_choices, x + 2);
1371
+ first_choice = (A_CHOICE *) first_node (char_list);
1372
+
1373
+ ptr = class_string (first_choice);
1374
+ next_next_char = (ptr != NULL && *ptr != '\0') ? ptr : " ";
1375
+ }
1376
+ else
1377
+ next_next_char = "";
1378
+
1379
+ char_list = (CHOICES) array_value (character_choices, x);
1380
+ first_choice = (A_CHOICE *) first_node (char_list);
1381
+
1382
+ ptr = class_string (first_choice);
1383
+ if (ptr != NULL && *ptr != '\0')
1384
+ {
1385
+ strcpy(word + x_word, ptr);
1386
+ word_lengths[x] = strlen(ptr);
1387
+
1388
+ strcpy(capital_word + x_capital_word, ptr);
1389
+ capital_word_lengths[x] = strlen(ptr);
1390
+
1391
+ strcpy(lower_word + x_lower_word, ptr);
1392
+ lower_word_lengths[x] = strlen(ptr);
1393
+ }
1394
+ else
1395
+ {
1396
+ word[x_word] = ' ';
1397
+ word_lengths[x] = 1;
1398
+
1399
+ capital_word[x_capital_word] = ' ';
1400
+ capital_word_lengths[x] = 1;
1401
+
1402
+ lower_word[x_lower_word] = ' ';
1403
+ lower_word_lengths[x] = 1;
1404
+ }
1405
+
1406
+ first_char = (ptr != NULL && *ptr != '\0') ? ptr : " ";
1407
+ first_rating = class_probability (first_choice);
1408
+ upper_rating += class_probability (first_choice);
1409
+ lower_rating += class_probability (first_choice);
1410
+ lower_certainty = min (class_certainty (first_choice), lower_certainty);
1411
+ upper_certainty = min (class_certainty (first_choice), upper_certainty);
1412
+
1413
+ certainties[x] = class_certainty (first_choice);
1414
+ lower_certainties[x] = class_certainty (first_choice);
1415
+ upper_certainties[x] = class_certainty (first_choice);
1416
+
1417
+ lower_done = FALSE;
1418
+ upper_done = FALSE;
1419
+ char_alpha = FALSE;
1420
+ second_char = "";
1421
+ third_char = "";
1422
+ iterate_list(this_char, char_list) {
1423
+ ptr = best_string (this_char);
1424
+ ch = ptr != NULL ? ptr : "";
1425
+ if (strcmp(ch, "l") == 0 && rest (this_char) != NULL
1426
+ && best_probability (rest (this_char)) == first_rating) {
1427
+ ptr = best_string (rest (this_char));
1428
+ if (ptr != NULL && (strcmp(ptr, "1") == 0 || strcmp(ptr, "I") == 0)) {
1429
+ second_char = ptr;
1430
+ this_char = rest (this_char);
1431
+ if (rest (this_char) != NULL
1432
+ && best_probability (rest (this_char)) == first_rating) {
1433
+ ptr = best_string (rest (this_char));
1434
+ if (ptr != NULL && (strcmp(ptr, "1") == 0 || strcmp(ptr, "I") == 0)) {
1435
+ third_char = ptr;
1436
+ this_char = rest (this_char);
1437
+ }
1438
+ }
1439
+ ch = choose_il1 (first_char, second_char, third_char,
1440
+ prev_char, next_char, next_next_char);
1441
+ if (strcmp(ch, "l") != 0 && word_lengths[x] == 1 &&
1442
+ word[x_word] == 'l') {
1443
+ word[x_word] = *ch;
1444
+ lower_word[x_lower_word] = *ch;
1445
+ capital_word[x_capital_word] = *ch;
1446
+ }
1447
+ }
1448
+ }
1449
+ if (ch != NULL && *ch != '\0') {
1450
+ /* Find lower case */
1451
+ if (!lower_done && (unicharset.get_islower(ch) ||
1452
+ (unicharset.get_isupper(ch) && x == 0))) {
1453
+ strcpy(lower_word + x_lower_word, ch);
1454
+ lower_word_lengths[x] = strlen(ch);
1455
+ lower_rating += best_probability (this_char);
1456
+ lower_rating -= class_probability (first_choice);
1457
+ lower_certainty = min (best_certainty (this_char), lower_certainty);
1458
+ lower_certainties[x] = best_certainty (this_char);
1459
+ lower_done = TRUE;
1460
+ }
1461
+ /* Find upper case */
1462
+ if (!upper_done && unicharset.get_isupper(ch)) {
1463
+ strcpy(capital_word + x_capital_word, ch);
1464
+ capital_word_lengths[x] = strlen(ch);
1465
+ upper_rating += best_probability (this_char);
1466
+ upper_rating -= class_probability (first_choice);
1467
+ upper_certainty = min (best_certainty (this_char), upper_certainty);
1468
+ upper_certainties[x] = best_certainty (this_char);
1469
+ upper_done = TRUE;
1470
+ }
1471
+ if (!char_alpha && unicharset.get_isalpha(ch))
1472
+ char_alpha = TRUE;
1473
+ if (lower_done && upper_done)
1474
+ break;
1475
+ }
1476
+ }
1477
+ if (char_alpha && any_alpha != NULL)
1478
+ *any_alpha = TRUE;
1479
+
1480
+ if (first_choice == NULL) {
1481
+ cprintf ("Permuter giving up due to null choices list");
1482
+ word[x_word + 1] = '$';
1483
+ word[x_word + 2] = '\0';
1484
+ word_lengths[x + 1] = 1;
1485
+ word_lengths[x + 2] = 0;
1486
+ cprintf (" word=%s\n", word);
1487
+ return (NULL);
1488
+ }
1489
+
1490
+ rating += class_probability (first_choice);
1491
+ if (rating > rating_limit)
1492
+ return (NULL);
1493
+
1494
+ certainty = min (class_certainty (first_choice), certainty);
1495
+
1496
+ strncpy(prev_char, word + x_word, word_lengths[x]);
1497
+ prev_char[word_lengths[x]] = '\0';
1498
+
1499
+ x_word += word_lengths[x];
1500
+ x_capital_word += capital_word_lengths[x];
1501
+ x_lower_word += lower_word_lengths[x];
1502
+ }
1503
+
1504
+ word[x_word] = '\0';
1505
+ word_lengths[x] = 0;
1506
+
1507
+ capital_word[x_capital_word] = '\0';
1508
+ capital_word_lengths[x] = 0;
1509
+
1510
+ lower_word[x_lower_word] = '\0';
1511
+ lower_word_lengths[x] = 0;
1512
+
1513
+ if (rating < class_probability (raw_choice)) {
1514
+ if (class_string (raw_choice))
1515
+ strfree (class_string (raw_choice));
1516
+ if (class_lengths (raw_choice))
1517
+ strfree (class_lengths (raw_choice));
1518
+
1519
+ class_probability (raw_choice) = rating;
1520
+ class_certainty (raw_choice) = certainty;
1521
+ class_string (raw_choice) = strsave (word);
1522
+ class_lengths (raw_choice) = strsave (word_lengths);
1523
+ class_permuter (raw_choice) = TOP_CHOICE_PERM;
1524
+
1525
+ LogNewRawChoice (raw_choice, 1.0, certainties);
1526
+ }
1527
+
1528
+ if (ngram_permuter_activated)
1529
+ return NULL;
1530
+
1531
+ best_choice = new_choice (word, word_lengths,
1532
+ rating, certainty, -1, TOP_CHOICE_PERM);
1533
+ adjust_non_word(best_choice, certainties);
1534
+
1535
+ other_choice = new_choice (lower_word, lower_word_lengths,
1536
+ lower_rating, lower_certainty,
1537
+ -1, LOWER_CASE_PERM);
1538
+ adjust_non_word(other_choice, lower_certainties);
1539
+ if (class_probability (best_choice) > class_probability (other_choice)) {
1540
+ clone_choice(best_choice, other_choice);
1541
+ }
1542
+ free_choice(other_choice);
1543
+
1544
+ other_choice = new_choice (capital_word, capital_word_lengths,
1545
+ upper_rating, upper_certainty,
1546
+ -1, UPPER_CASE_PERM);
1547
+ adjust_non_word(other_choice, upper_certainties);
1548
+ if (class_probability (best_choice) > class_probability (other_choice)) {
1549
+ clone_choice(best_choice, other_choice);
1550
+ }
1551
+ free_choice(other_choice);
1552
+ return (best_choice);
1553
+ }
1554
+
1555
+
1556
+ /**********************************************************************
1557
+ * choose_il1
1558
+ *
1559
+ * Choose between the candidate il1 chars.
1560
+ **********************************************************************/
1561
+ const char* choose_il1(const char *first_char, //first choice
1562
+ const char *second_char, //second choice
1563
+ const char *third_char, //third choice
1564
+ const char *prev_char, //prev in word
1565
+ const char *next_char, //next in word
1566
+ const char *next_next_char) { //after next next in word
1567
+ inT32 type1; //1/I/l type of first choice
1568
+ inT32 type2; //1/I/l type of second choice
1569
+ inT32 type3; //1/I/l type of third choice
1570
+
1571
+ int first_char_length = strlen(first_char);
1572
+ int prev_char_length = strlen(prev_char);
1573
+ int next_char_length = strlen(next_char);
1574
+ int next_next_char_length = strlen(next_next_char);
1575
+
1576
+ if (*first_char == 'l' && *second_char != '\0') {
1577
+ if (*second_char == 'I'
1578
+ && (((prev_char_length != 0 &&
1579
+ unicharset.get_isupper (prev_char, prev_char_length)) &&
1580
+ (next_char_length == 0 ||
1581
+ !unicharset.get_islower (next_char, next_char_length)) &&
1582
+ (next_char_length == 0 ||
1583
+ !unicharset.get_isdigit (next_char, next_char_length))) ||
1584
+ ((next_char_length != 0 &&
1585
+ unicharset.get_isupper (next_char, next_char_length)) &&
1586
+ (prev_char_length == 0 ||
1587
+ !unicharset.get_islower (prev_char, prev_char_length)) &&
1588
+ (prev_char_length == 0 ||
1589
+ !unicharset.get_isdigit (prev_char, prev_char_length)))))
1590
+ first_char = second_char; //override
1591
+ else if (*second_char == '1' || *third_char == '1') {
1592
+ if ((next_char_length != 0 &&
1593
+ unicharset.get_isdigit (next_char, next_char_length)) ||
1594
+ (prev_char_length != 0 &&
1595
+ unicharset.get_isdigit (prev_char, prev_char_length))
1596
+ || (*next_char == 'l' &&
1597
+ (next_next_char_length != 0 &&
1598
+ unicharset.get_isdigit (next_next_char, next_next_char_length)))) {
1599
+ first_char = "1";
1600
+ first_char_length = 1;
1601
+ }
1602
+ else if ((prev_char_length == 0 ||
1603
+ !unicharset.get_islower (prev_char, prev_char_length)) &&
1604
+ ((next_char_length == 0 ||
1605
+ !unicharset.get_islower (next_char, next_char_length)) ||
1606
+ (*next_char == 's' &&
1607
+ *next_next_char == 't'))) {
1608
+ if (((*prev_char != '\'' && *prev_char != '`') || *next_char != '\0')
1609
+ && ((*next_char != '\'' && *next_char != '`')
1610
+ || *prev_char != '\0')) {
1611
+ first_char = "1";
1612
+ first_char_length = 1;
1613
+ }
1614
+ }
1615
+ }
1616
+ if (*first_char == 'l' && *next_char != '\0' &&
1617
+ (prev_char_length == 0 ||
1618
+ !unicharset.get_isalpha (prev_char, prev_char_length))) {
1619
+ type1 = 2;
1620
+
1621
+ if (*second_char == '1')
1622
+ type2 = 0;
1623
+ else if (*second_char == 'I')
1624
+ type2 = 1;
1625
+ else if (*second_char == 'l')
1626
+ type2 = 2;
1627
+ else
1628
+ type2 = type1;
1629
+
1630
+ if (*third_char == '1')
1631
+ type3 = 0;
1632
+ else if (*third_char == 'I')
1633
+ type3 = 1;
1634
+ else if (*third_char == 'l')
1635
+ type3 = 2;
1636
+ else
1637
+ type3 = type1;
1638
+
1639
+ #if 0
1640
+ if (bigram_counts[*next_char][type2] >
1641
+ bigram_counts[*next_char][type1]) {
1642
+ first_char = second_char;
1643
+ type1 = type2;
1644
+ }
1645
+ if (bigram_counts[*next_char][type3] >
1646
+ bigram_counts[*next_char][type1]) {
1647
+ first_char = third_char;
1648
+ }
1649
+ #endif
1650
+ }
1651
+ }
1652
+ return first_char;
1653
+ }
1654
+
1655
+
1656
+ /**********************************************************************
1657
+ * permute_words
1658
+ *
1659
+ * Permute all the characters together using the dawg to prune all
1660
+ * but the valid words.
1661
+ **********************************************************************/
1662
+ A_CHOICE *permute_words(CHOICES_LIST char_choices, float rating_limit) {
1663
+ A_CHOICE *best_choice;
1664
+
1665
+ best_choice = new_choice (NULL, NULL, rating_limit, -MAX_FLOAT32, -1, NO_PERM);
1666
+
1667
+ if (hyphen_base_size() + array_count (char_choices) > MAX_WERD_LENGTH) {
1668
+ class_probability (best_choice) = MAX_FLOAT32;
1669
+ }
1670
+ else {
1671
+
1672
+ dawg_permute_and_select ("system words:", word_dawg, SYSTEM_DAWG_PERM,
1673
+ char_choices, best_choice);
1674
+
1675
+ dawg_permute_and_select ("document_words", document_words,
1676
+ DOC_DAWG_PERM, char_choices, best_choice);
1677
+
1678
+ dawg_permute_and_select ("user words", user_words, USER_DAWG_PERM,
1679
+ char_choices, best_choice);
1680
+ }
1681
+
1682
+ return (best_choice);
1683
+ }
1684
+
1685
+
1686
+ /**********************************************************************
1687
+ * valid_word
1688
+ *
1689
+ * Check all the DAWGs to see if this word is in any of them.
1690
+ **********************************************************************/
1691
+ int valid_word(const char *string) {
1692
+ int result = NO_PERM;
1693
+
1694
+ if (word_in_dawg (word_dawg, string))
1695
+ result = SYSTEM_DAWG_PERM;
1696
+ else {
1697
+ if (word_in_dawg (document_words, string))
1698
+ result = DOC_DAWG_PERM;
1699
+ else if (word_in_dawg (user_words, string))
1700
+ result = USER_DAWG_PERM;
1701
+ }
1702
+ return (result);
1703
+ }
1704
+