tesseract_bin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (612) hide show
  1. data/.document +5 -0
  2. data/Gemfile +14 -0
  3. data/Gemfile.lock +23 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +46 -0
  7. data/VERSION +1 -0
  8. data/ext/tesseract_bin/extconf.rb +17 -0
  9. data/lib/tesseract_bin.rb +12 -0
  10. data/tesseract_bin.gemspec +660 -0
  11. data/test/helper.rb +18 -0
  12. data/test/test_tesseract_bin.rb +7 -0
  13. data/vendor/tesseract-2.04/AUTHORS +8 -0
  14. data/vendor/tesseract-2.04/COPYING +23 -0
  15. data/vendor/tesseract-2.04/ChangeLog +71 -0
  16. data/vendor/tesseract-2.04/INSTALL +229 -0
  17. data/vendor/tesseract-2.04/Makefile.am +20 -0
  18. data/vendor/tesseract-2.04/Makefile.in +641 -0
  19. data/vendor/tesseract-2.04/NEWS +1 -0
  20. data/vendor/tesseract-2.04/README +138 -0
  21. data/vendor/tesseract-2.04/ReleaseNotes +213 -0
  22. data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
  23. data/vendor/tesseract-2.04/StdAfx.h +24 -0
  24. data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
  25. data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
  26. data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
  27. data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
  28. data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
  29. data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
  30. data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
  31. data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
  32. data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
  33. data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
  34. data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
  35. data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
  36. data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
  37. data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
  38. data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
  39. data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
  40. data/vendor/tesseract-2.04/ccmain/control.h +198 -0
  41. data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
  42. data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
  43. data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
  44. data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
  45. data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
  46. data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
  47. data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
  48. data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
  49. data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
  50. data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
  51. data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
  52. data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
  53. data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
  54. data/vendor/tesseract-2.04/ccmain/output.h +116 -0
  55. data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
  56. data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
  57. data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
  58. data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
  59. data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
  60. data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
  61. data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
  62. data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
  63. data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
  64. data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
  65. data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
  66. data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
  67. data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
  68. data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
  69. data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
  70. data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
  71. data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
  72. data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
  73. data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
  74. data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
  75. data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
  76. data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
  77. data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
  78. data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
  79. data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
  80. data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
  81. data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
  82. data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
  83. data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
  84. data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
  85. data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
  86. data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
  87. data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
  88. data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
  89. data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
  90. data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
  91. data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
  92. data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
  93. data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
  94. data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
  95. data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
  96. data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
  97. data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
  98. data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
  99. data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
  100. data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
  101. data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
  102. data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
  103. data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
  104. data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
  105. data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
  106. data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
  107. data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
  108. data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
  109. data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
  110. data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
  111. data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
  112. data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
  113. data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
  114. data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
  115. data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
  116. data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
  117. data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
  118. data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
  119. data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
  120. data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
  121. data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
  122. data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
  123. data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
  124. data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
  125. data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
  126. data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
  127. data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
  128. data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
  129. data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
  130. data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
  131. data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
  132. data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
  133. data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
  134. data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
  135. data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
  136. data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
  137. data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
  138. data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
  139. data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
  140. data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
  141. data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
  142. data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
  143. data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
  144. data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
  145. data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
  146. data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
  147. data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
  148. data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
  149. data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
  150. data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
  151. data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
  152. data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
  153. data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
  154. data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
  155. data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
  156. data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
  157. data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
  158. data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
  159. data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
  160. data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
  161. data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
  162. data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
  163. data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
  164. data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
  165. data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
  166. data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
  167. data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
  168. data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
  169. data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
  170. data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
  171. data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
  172. data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
  173. data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
  174. data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
  175. data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
  176. data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
  177. data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
  178. data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
  179. data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
  180. data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
  181. data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
  182. data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
  183. data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
  184. data/vendor/tesseract-2.04/ccutil/host.h +180 -0
  185. data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
  186. data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
  187. data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
  188. data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
  189. data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
  190. data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
  191. data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
  192. data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
  193. data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
  194. data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
  195. data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
  196. data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
  197. data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
  198. data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
  199. data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
  200. data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
  201. data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
  202. data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
  203. data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
  204. data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
  205. data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
  206. data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
  207. data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
  208. data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
  209. data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
  210. data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
  211. data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
  212. data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
  213. data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
  214. data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
  215. data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
  216. data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
  217. data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
  218. data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
  219. data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
  220. data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
  221. data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
  222. data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
  223. data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
  224. data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
  225. data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
  226. data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
  227. data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
  228. data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
  229. data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
  230. data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
  231. data/vendor/tesseract-2.04/classify/baseline.h +91 -0
  232. data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
  233. data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
  234. data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
  235. data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
  236. data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
  237. data/vendor/tesseract-2.04/classify/cluster.h +158 -0
  238. data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
  239. data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
  240. data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
  241. data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
  242. data/vendor/tesseract-2.04/classify/extern.h +32 -0
  243. data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
  244. data/vendor/tesseract-2.04/classify/extract.h +36 -0
  245. data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
  246. data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
  247. data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
  248. data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
  249. data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
  250. data/vendor/tesseract-2.04/classify/float2int.h +65 -0
  251. data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
  252. data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
  253. data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
  254. data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
  255. data/vendor/tesseract-2.04/classify/fxid.h +69 -0
  256. data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
  257. data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
  258. data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
  259. data/vendor/tesseract-2.04/classify/intfx.h +63 -0
  260. data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
  261. data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
  262. data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
  263. data/vendor/tesseract-2.04/classify/intproto.h +320 -0
  264. data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
  265. data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
  266. data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
  267. data/vendor/tesseract-2.04/classify/mf.h +43 -0
  268. data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
  269. data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
  270. data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
  271. data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
  272. data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
  273. data/vendor/tesseract-2.04/classify/mfx.h +52 -0
  274. data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
  275. data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
  276. data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
  277. data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
  278. data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
  279. data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
  280. data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
  281. data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
  282. data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
  283. data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
  284. data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
  285. data/vendor/tesseract-2.04/classify/protos.h +258 -0
  286. data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
  287. data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
  288. data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
  289. data/vendor/tesseract-2.04/classify/speckle.h +69 -0
  290. data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
  291. data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
  292. data/vendor/tesseract-2.04/config/config.guess +1466 -0
  293. data/vendor/tesseract-2.04/config/config.h.in +188 -0
  294. data/vendor/tesseract-2.04/config/config.sub +1579 -0
  295. data/vendor/tesseract-2.04/config/depcomp +530 -0
  296. data/vendor/tesseract-2.04/config/install-sh +269 -0
  297. data/vendor/tesseract-2.04/config/missing +198 -0
  298. data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
  299. data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
  300. data/vendor/tesseract-2.04/configure +10424 -0
  301. data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
  302. data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
  303. data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
  304. data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
  305. data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
  306. data/vendor/tesseract-2.04/cutil/const.h +108 -0
  307. data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
  308. data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
  309. data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
  310. data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
  311. data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
  312. data/vendor/tesseract-2.04/cutil/debug.h +348 -0
  313. data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
  314. data/vendor/tesseract-2.04/cutil/efio.h +32 -0
  315. data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
  316. data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
  317. data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
  318. data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
  319. data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
  320. data/vendor/tesseract-2.04/cutil/general.h +33 -0
  321. data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
  322. data/vendor/tesseract-2.04/cutil/globals.h +70 -0
  323. data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
  324. data/vendor/tesseract-2.04/cutil/listio.h +43 -0
  325. data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
  326. data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
  327. data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
  328. data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
  329. data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
  330. data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
  331. data/vendor/tesseract-2.04/cutil/structures.h +112 -0
  332. data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
  333. data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
  334. data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
  335. data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
  336. data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
  337. data/vendor/tesseract-2.04/cutil/variables.h +170 -0
  338. data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
  339. data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
  340. data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
  341. data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
  342. data/vendor/tesseract-2.04/dict/choices.h +241 -0
  343. data/vendor/tesseract-2.04/dict/context.cpp +270 -0
  344. data/vendor/tesseract-2.04/dict/context.h +82 -0
  345. data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
  346. data/vendor/tesseract-2.04/dict/dawg.h +394 -0
  347. data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
  348. data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
  349. data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
  350. data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
  351. data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
  352. data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
  353. data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
  354. data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
  355. data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
  356. data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
  357. data/vendor/tesseract-2.04/dict/permngram.h +33 -0
  358. data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
  359. data/vendor/tesseract-2.04/dict/permnum.h +83 -0
  360. data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
  361. data/vendor/tesseract-2.04/dict/permute.h +93 -0
  362. data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
  363. data/vendor/tesseract-2.04/dict/reduce.h +112 -0
  364. data/vendor/tesseract-2.04/dict/states.cpp +382 -0
  365. data/vendor/tesseract-2.04/dict/states.h +111 -0
  366. data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
  367. data/vendor/tesseract-2.04/dict/stopper.h +103 -0
  368. data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
  369. data/vendor/tesseract-2.04/dict/trie.h +190 -0
  370. data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
  371. data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
  372. data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
  373. data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
  374. data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
  375. data/vendor/tesseract-2.04/eurotext.tif +0 -0
  376. data/vendor/tesseract-2.04/image/Makefile.am +10 -0
  377. data/vendor/tesseract-2.04/image/Makefile.in +596 -0
  378. data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
  379. data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
  380. data/vendor/tesseract-2.04/image/img.h +336 -0
  381. data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
  382. data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
  383. data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
  384. data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
  385. data/vendor/tesseract-2.04/image/imgio.h +22 -0
  386. data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
  387. data/vendor/tesseract-2.04/image/imgs.h +102 -0
  388. data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
  389. data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
  390. data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
  391. data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
  392. data/vendor/tesseract-2.04/image/svshowim.h +25 -0
  393. data/vendor/tesseract-2.04/java/Makefile.am +4 -0
  394. data/vendor/tesseract-2.04/java/Makefile.in +473 -0
  395. data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
  396. data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
  397. data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
  398. data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
  399. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
  400. data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
  401. data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
  402. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
  403. data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
  404. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
  405. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
  406. data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
  407. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
  408. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
  409. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
  410. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
  411. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
  412. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
  413. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
  414. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
  415. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
  416. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
  417. data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
  418. data/vendor/tesseract-2.04/java/makefile +55 -0
  419. data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
  420. data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
  421. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
  422. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
  423. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
  424. data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
  425. data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
  426. data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
  427. data/vendor/tesseract-2.04/phototest.tif +0 -0
  428. data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
  429. data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
  430. data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
  431. data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
  432. data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
  433. data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
  434. data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
  435. data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
  436. data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
  437. data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
  438. data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
  439. data/vendor/tesseract-2.04/tessdata/confsets +3 -0
  440. data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
  441. data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
  442. data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
  443. data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
  444. data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
  445. data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
  446. data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
  447. data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
  448. data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
  449. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
  450. data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
  451. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
  452. data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
  453. data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
  454. data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
  455. data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
  456. data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
  457. data/vendor/tesseract-2.04/tessdll.cpp +351 -0
  458. data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
  459. data/vendor/tesseract-2.04/tessdll.h +143 -0
  460. data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
  461. data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
  462. data/vendor/tesseract-2.04/tesseract.dsw +116 -0
  463. data/vendor/tesseract-2.04/tesseract.sln +59 -0
  464. data/vendor/tesseract-2.04/tesseract.spec +188 -0
  465. data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
  466. data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
  467. data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
  468. data/vendor/tesseract-2.04/testing/README +43 -0
  469. data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
  470. data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
  471. data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
  472. data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
  473. data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
  474. data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
  475. data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
  476. data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
  477. data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
  478. data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
  479. data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
  480. data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
  481. data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
  482. data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
  483. data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
  484. data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
  485. data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
  486. data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
  487. data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
  488. data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
  489. data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
  490. data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
  491. data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
  492. data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
  493. data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
  494. data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
  495. data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
  496. data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
  497. data/vendor/tesseract-2.04/textord/makerow.h +295 -0
  498. data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
  499. data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
  500. data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
  501. data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
  502. data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
  503. data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
  504. data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
  505. data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
  506. data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
  507. data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
  508. data/vendor/tesseract-2.04/textord/tessout.h +76 -0
  509. data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
  510. data/vendor/tesseract-2.04/textord/topitch.h +195 -0
  511. data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
  512. data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
  513. data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
  514. data/vendor/tesseract-2.04/textord/tospace.h +193 -0
  515. data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
  516. data/vendor/tesseract-2.04/textord/tovars.h +94 -0
  517. data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
  518. data/vendor/tesseract-2.04/textord/underlin.h +53 -0
  519. data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
  520. data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
  521. data/vendor/tesseract-2.04/training/Makefile.am +54 -0
  522. data/vendor/tesseract-2.04/training/Makefile.in +720 -0
  523. data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
  524. data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
  525. data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
  526. data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
  527. data/vendor/tesseract-2.04/training/mergenf.h +106 -0
  528. data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
  529. data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
  530. data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
  531. data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
  532. data/vendor/tesseract-2.04/training/name2char.h +38 -0
  533. data/vendor/tesseract-2.04/training/training.cpp +190 -0
  534. data/vendor/tesseract-2.04/training/training.h +130 -0
  535. data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
  536. data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
  537. data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
  538. data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
  539. data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
  540. data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
  541. data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
  542. data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
  543. data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
  544. data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
  545. data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
  546. data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
  547. data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
  548. data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
  549. data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
  550. data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
  551. data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
  552. data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
  553. data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
  554. data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
  555. data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
  556. data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
  557. data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
  558. data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
  559. data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
  560. data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
  561. data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
  562. data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
  563. data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
  564. data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
  565. data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
  566. data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
  567. data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
  568. data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
  569. data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
  570. data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
  571. data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
  572. data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
  573. data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
  574. data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
  575. data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
  576. data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
  577. data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
  578. data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
  579. data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
  580. data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
  581. data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
  582. data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
  583. data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
  584. data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
  585. data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
  586. data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
  587. data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
  588. data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
  589. data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
  590. data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
  591. data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
  592. data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
  593. data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
  594. data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
  595. data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
  596. data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
  597. data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
  598. data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
  599. data/vendor/tesseract-2.04/wordrec/render.h +58 -0
  600. data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
  601. data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
  602. data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
  603. data/vendor/tesseract-2.04/wordrec/split.h +115 -0
  604. data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
  605. data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
  606. data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
  607. data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
  608. data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
  609. data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
  610. data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
  611. data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
  612. metadata +708 -0
@@ -0,0 +1,270 @@
1
+ /* -*-C-*-
2
+ ********************************************************************************
3
+ *
4
+ * File: context.c (Formerly context.c)
5
+ * Description: Context checking functions
6
+ * Author: Mark Seaman, OCR Technology
7
+ * Created: Thu Feb 15 11:18:24 1990
8
+ * Modified: Tue Jul 9 17:38:16 1991 (Mark Seaman) marks@hpgrlt
9
+ * Language: C
10
+ * Package: N/A
11
+ * Status: Experimental (Do Not Distribute)
12
+ *
13
+ * (c) Copyright 1990, Hewlett-Packard Company.
14
+ ** Licensed under the Apache License, Version 2.0 (the "License");
15
+ ** you may not use this file except in compliance with the License.
16
+ ** You may obtain a copy of the License at
17
+ ** http://www.apache.org/licenses/LICENSE-2.0
18
+ ** Unless required by applicable law or agreed to in writing, software
19
+ ** distributed under the License is distributed on an "AS IS" BASIS,
20
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ ** See the License for the specific language governing permissions and
22
+ ** limitations under the License.
23
+ *
24
+ *********************************************************************************/
25
+ #include "context.h"
26
+ #include "tordvars.h"
27
+ #include "callcpp.h"
28
+ #include "globals.h"
29
+
30
+ #include <stdio.h>
31
+ #include <ctype.h>
32
+ #include <string.h>
33
+ #include <math.h>
34
+
35
+ // Initialize probability_in_context to point to a default implementation (a
36
+ // main program can override this).
37
+ PROBABILITY_IN_CONTEXT_FUNCTION probability_in_context = &def_probability_in_context;
38
+
39
+ double def_probability_in_context(const char* context,
40
+ int context_bytes,
41
+ const char* character,
42
+ int character_bytes) {
43
+ (void) context;
44
+ (void) context_bytes;
45
+ (void) character;
46
+ (void) character_bytes;
47
+ return 0.0;
48
+ }
49
+
50
+ /*----------------------------------------------------------------------
51
+ V a r i a b l e s
52
+ ----------------------------------------------------------------------*/
53
+ static FILE *choice_file = NULL; /* File to save choices */
54
+
55
+ /*----------------------------------------------------------------------
56
+ F u n c t i o n s
57
+ ----------------------------------------------------------------------*/
58
+ /**********************************************************************
59
+ * close_choices
60
+ *
61
+ * Close the choices file.
62
+ **********************************************************************/
63
+ void close_choices() {
64
+ if (choice_file)
65
+ fclose(choice_file);
66
+ }
67
+
68
+
69
+ /**********************************************************************
70
+ * fix_quotes
71
+ *
72
+ * Fix up two single quote to make them two double quotes.
73
+ **********************************************************************/
74
+ void fix_quotes(char *str) {
75
+ int i;
76
+ for (i = 0; i < strlen (str); i++) {
77
+
78
+ if (((str[i] == '\'') || (str[i] == '`')) &&
79
+ ((str[i + 1] == '\'') || (str[i + 1] == '`'))) {
80
+ str[i] = '\"';
81
+ strcpy (str + i + 1, str + i + 2);
82
+ }
83
+ }
84
+ }
85
+
86
+
87
+ /**********************************************************************
88
+ * punctuation_ok
89
+ *
90
+ * Check a string to see if it matches a set of punctuation rules.
91
+ **********************************************************************/
92
+ int punctuation_ok(const char *word, const char *lengths) {
93
+ int punctuation_types[5];
94
+ int trailing = 0;
95
+ int num_puncts = 0;
96
+ register int x;
97
+ int offset;
98
+ UNICHAR_ID ch_id;
99
+
100
+ for (x = 0; x < 5; x++)
101
+ punctuation_types[x] = 0;
102
+
103
+ // check for un-supported symbols
104
+ for (x = 0, offset = 0; x < strlen (lengths); offset += lengths[x++]) {
105
+ // a un-supported symbol
106
+ if (!unicharset.contains_unichar (word + offset, lengths[x])) {
107
+ return -1;
108
+ }
109
+ }
110
+
111
+ for (x = 0, offset = 0; x < strlen (lengths); offset += lengths[x++]) {
112
+ if (unicharset.get_isalpha (word + offset, lengths[x])) {
113
+ if (trailing &&
114
+ !(unicharset.get_isalpha (word + offset - lengths[x - 1], lengths[x - 1])
115
+ #if 0
116
+ ||
117
+ (word[x - 1] == '\'' &&
118
+ (word[x] == 's' || word[x] == 'd' || word[x] == 'l')) ||
119
+ (word[x - 1] == '-')
120
+ #endif
121
+ ))
122
+ return (-1);
123
+ trailing = 1;
124
+ }
125
+ else {
126
+ ch_id = unicharset.unichar_to_id(word + offset, lengths[x]);
127
+
128
+ if (unicharset.eq(ch_id, ".") && trailing) {
129
+ if (punctuation_types[0])
130
+ return (-1);
131
+ (punctuation_types[0])++;
132
+ }
133
+
134
+ else if (((unicharset.eq(ch_id, "{")) ||
135
+ (unicharset.eq(ch_id, "[")) ||
136
+ (unicharset.eq(ch_id, "("))) && !trailing) {
137
+ if (punctuation_types[1])
138
+ return (-1);
139
+ (punctuation_types[1])++;
140
+ }
141
+
142
+ else if (((unicharset.eq(ch_id, "}")) ||
143
+ (unicharset.eq(ch_id, "]")) ||
144
+ (unicharset.eq(ch_id, ")"))) && trailing) {
145
+ if (punctuation_types[2])
146
+ return (-1);
147
+ (punctuation_types[2])++;
148
+ }
149
+
150
+ else if (((unicharset.eq(ch_id, ":")) ||
151
+ (unicharset.eq(ch_id, ";")) ||
152
+ (unicharset.eq(ch_id, "!")) ||
153
+ (unicharset.eq(ch_id, "-")) ||
154
+ (unicharset.eq(ch_id, ",")) ||
155
+ (unicharset.eq(ch_id, "?"))) && trailing) {
156
+ if (punctuation_types[3])
157
+ return (-1);
158
+ (punctuation_types[3])++;
159
+ if (unicharset.eq(ch_id, "-"))
160
+ punctuation_types[3] = 0;
161
+ }
162
+
163
+ else if (x < strlen(lengths) - 1 &&
164
+ ((unicharset.eq(ch_id, "`")) ||
165
+ (unicharset.eq(ch_id, "\"")) ||
166
+ (unicharset.eq(ch_id, "\'")))) {
167
+ UNICHAR_ID ch_id2 = unicharset.unichar_to_id(word + offset + lengths[x],
168
+ lengths[x + 1]);
169
+ if ((unicharset.eq(ch_id2, "`")) ||
170
+ (unicharset.eq(ch_id2, "\'"))) {
171
+ offset += lengths[x++];
172
+ }
173
+ (punctuation_types[4])++;
174
+ if (punctuation_types[4] > 2)
175
+ return (-1);
176
+ }
177
+
178
+ else if (!unicharset.get_isdigit (ch_id))
179
+ return (-1);
180
+ }
181
+ }
182
+
183
+ for (x = 0; x < 5; x++) {
184
+ if (punctuation_types[x])
185
+ num_puncts++;
186
+ }
187
+
188
+ return (num_puncts);
189
+ }
190
+
191
+
192
+ /**********************************************************************
193
+ * case_ok
194
+ *
195
+ * Check a string to see if it matches a set of lexical rules.
196
+ **********************************************************************/
197
+ int case_ok(const char *word, const char *lengths) {
198
+ static int case_state_table[6][4] = { {
199
+ /* 0. Begining of word */
200
+ /* P U L D */
201
+ /* -1. Error on case */
202
+ 0, 1, 5, 4
203
+ },
204
+ { /* 1. After initial capital */
205
+ 0, 3, 2, 4
206
+ },
207
+ { /* 2. After lower case */
208
+ 0, -1, 2, -1
209
+ },
210
+ { /* 3. After upper case */
211
+ 0, 3, -1, 4
212
+ },
213
+ { /* 4. After a digit */
214
+ 0, -1, -1, 4
215
+ },
216
+ { /* 5. After initial lower case */
217
+ 5, -1, 2, -1
218
+ },
219
+ };
220
+
221
+ register int last_state = 0;
222
+ register int state = 0;
223
+ register int x;
224
+ int offset;
225
+ UNICHAR_ID ch_id;
226
+
227
+ for (x = 0, offset = 0; x < strlen (lengths); offset += lengths[x++]) {
228
+
229
+ ch_id = unicharset.unichar_to_id(word + offset, lengths[x]);
230
+ if (unicharset.get_isupper(ch_id))
231
+ state = case_state_table[state][1];
232
+ else if (unicharset.get_isalpha(ch_id))
233
+ state = case_state_table[state][2];
234
+ else if (unicharset.get_isdigit(ch_id))
235
+ state = case_state_table[state][3];
236
+ else
237
+ state = case_state_table[state][0];
238
+
239
+ if (debug_3)
240
+ cprintf ("Case state = %d, char = %s\n", state,
241
+ unicharset.id_to_unichar(ch_id));
242
+ if (state == -1) {
243
+ /* Handle ACCRONYMs */
244
+ #if 0
245
+ if (word[x] == 's' &&
246
+ !isalpha (word[x + 1]) && !isdigit (word[x + 1]))
247
+ state = last_state;
248
+ else
249
+ #endif
250
+ return (FALSE);
251
+ }
252
+
253
+ last_state = state;
254
+ }
255
+ return state != 5; /*single lower is bad */
256
+ }
257
+
258
+
259
+ /**********************************************************************
260
+ * write_choice_line
261
+ *
262
+ * Write a blank line to the choices file. This will indicate that
263
+ * there is a new word that is following.
264
+ **********************************************************************/
265
+ void write_choice_line() {
266
+ if (choice_file) {
267
+ fprintf (choice_file, "\n");
268
+ fflush(choice_file);
269
+ }
270
+ }
@@ -0,0 +1,82 @@
1
+ /* -*-C-*-
2
+ ********************************************************************************
3
+ *
4
+ * File: context.h (Formerly context.h)
5
+ * Description: Context checking functions
6
+ * Author: Mark Seaman, OCR Technology
7
+ * Created: Thu Feb 15 11:18:24 1990
8
+ * Modified: Tue Jul 9 17:00:38 1991 (Mark Seaman) marks@hpgrlt
9
+ * Language: C
10
+ * Package: N/A
11
+ * Status: Experimental (Do Not Distribute)
12
+ *
13
+ * (c) Copyright 1990, Hewlett-Packard Company.
14
+ ** Licensed under the Apache License, Version 2.0 (the "License");
15
+ ** you may not use this file except in compliance with the License.
16
+ ** You may obtain a copy of the License at
17
+ ** http://www.apache.org/licenses/LICENSE-2.0
18
+ ** Unless required by applicable law or agreed to in writing, software
19
+ ** distributed under the License is distributed on an "AS IS" BASIS,
20
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ ** See the License for the specific language governing permissions and
22
+ ** limitations under the License.
23
+ *
24
+ ********************************************************************************
25
+ */
26
+
27
+ #ifndef CONTEXT_H
28
+ #define CONTEXT_H
29
+
30
+ #include "choices.h"
31
+
32
+ /*----------------------------------------------------------------------
33
+ F u n c t i o n s
34
+ ----------------------------------------------------------------------*/
35
+ void close_choices();
36
+
37
+ void fix_quotes(char *str);
38
+
39
+ int punctuation_ok(const char *word, const char *lengths);
40
+
41
+ int case_ok(const char *word, const char *lengths);
42
+
43
+ void write_choice_line();
44
+
45
+ typedef double (*PROBABILITY_IN_CONTEXT_FUNCTION)(const char* context,
46
+ int context_bytes,
47
+ const char* character,
48
+ int character_bytes);
49
+
50
+ extern PROBABILITY_IN_CONTEXT_FUNCTION probability_in_context;
51
+
52
+ extern double def_probability_in_context(const char* context,
53
+ int context_bytes,
54
+ const char* character,
55
+ int character_bytes);
56
+
57
+ /*
58
+ #if defined(__STDC__) || defined(__cplusplus)
59
+ # define _ARGS(s) s
60
+ #else
61
+ # define _ARGS(s) ()
62
+ #endif*/
63
+
64
+ /* context.c
65
+ void close_choices
66
+ _ARGS((void));
67
+
68
+ void fix_quotes
69
+ _ARGS((char *str));
70
+
71
+ int punctuation_ok
72
+ _ARGS((char *word));
73
+
74
+ int case_ok
75
+ _ARGS((char *word));
76
+
77
+ void write_choice_line
78
+ _ARGS((void));
79
+
80
+ #undef _ARGS
81
+ */
82
+ #endif
@@ -0,0 +1,363 @@
1
+ /* -*-C-*-
2
+ ********************************************************************************
3
+ *
4
+ * File: dawg.c (Formerly dawg.c)
5
+ * Description: Use a Directed Accyclic Word Graph
6
+ * Author: Mark Seaman, OCR Technology
7
+ * Created: Fri Oct 16 14:37:00 1987
8
+ * Modified: Wed Jul 24 16:59:16 1991 (Mark Seaman) marks@hpgrlt
9
+ * Language: C
10
+ * Package: N/A
11
+ * Status: Reusable Software Component
12
+ *
13
+ * (c) Copyright 1987, Hewlett-Packard Company.
14
+ ** Licensed under the Apache License, Version 2.0 (the "License");
15
+ ** you may not use this file except in compliance with the License.
16
+ ** You may obtain a copy of the License at
17
+ ** http://www.apache.org/licenses/LICENSE-2.0
18
+ ** Unless required by applicable law or agreed to in writing, software
19
+ ** distributed under the License is distributed on an "AS IS" BASIS,
20
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ ** See the License for the specific language governing permissions and
22
+ ** limitations under the License.
23
+ *
24
+ *********************************************************************************/
25
+ /*----------------------------------------------------------------------
26
+ I n c l u d e s
27
+ ----------------------------------------------------------------------*/
28
+ #ifdef __MSW32__
29
+ #include <windows.h>
30
+ #else
31
+ #include <arpa/inet.h>
32
+ #endif
33
+ #include "dawg.h"
34
+ #include "cutil.h"
35
+ #include "tprintf.h"
36
+ #include "freelist.h"
37
+ #include "context.h"
38
+ #include "strngs.h"
39
+ #include "emalloc.h"
40
+
41
+ /*----------------------------------------------------------------------
42
+ V a r i a b l e s
43
+ ----------------------------------------------------------------------*/
44
+ inT32 debug = 0;
45
+ inT32 case_sensative = 1;
46
+
47
+ /*----------------------------------------------------------------------
48
+ F u n c t i o n s
49
+ ----------------------------------------------------------------------*/
50
+ /**********************************************************************
51
+ * edge_char_of
52
+ *
53
+ * Return the edge that corresponds to the letter out of this node.
54
+ **********************************************************************/
55
+ EDGE_REF edge_char_of(EDGE_ARRAY dawg,
56
+ NODE_REF node,
57
+ int character,
58
+ int word_end) {
59
+ EDGE_REF edge = node;
60
+
61
+ if (! case_sensative) character = tolower (character);
62
+
63
+ if (edge_occupied (dawg, edge)) {
64
+ do {
65
+ if ((edge_letter (dawg, edge) == character) &&
66
+ (! word_end || end_of_word(dawg,edge)))
67
+ return (edge);
68
+
69
+ } edge_loop (dawg, edge);
70
+ }
71
+
72
+ return (NO_EDGE);
73
+ }
74
+
75
+
76
+ /**********************************************************************
77
+ * edges_in_node
78
+ *
79
+ * Count the number of edges in this node in the DAWG. This includes
80
+ * both forward and back links.
81
+ **********************************************************************/
82
+ inT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node) {
83
+ EDGE_REF edge = node;
84
+
85
+ if (edge_occupied (dawg, edge)) {
86
+ edge_loop(dawg, edge);
87
+ if (edge_occupied (dawg, edge) && backward_edge (dawg, edge)) {
88
+ edge_loop(dawg, edge);
89
+ return (edge - node);
90
+ }
91
+ else {
92
+ return (edge - node);
93
+ }
94
+ }
95
+ else {
96
+ return (edge - node);
97
+ }
98
+ }
99
+
100
+
101
+ /*
102
+ * Initialize letter_is_okay to point to default implmentation (a main
103
+ * program can override this).
104
+ */
105
+ LETTER_OK_FUNC letter_is_okay = &def_letter_is_okay;
106
+
107
+ /**********************************************************************
108
+ * def_letter_is_okay
109
+ *
110
+ * Default way to check this letter in light of the current state. If
111
+ * everything is still OK then return TRUE.
112
+ **********************************************************************/
113
+ // TODO(tkielbus) Change the prevchar argument to make it unicode safe.
114
+ // We might want to get rid of def_letter_is_okay at some point though.
115
+ inT32 def_letter_is_okay(EDGE_ARRAY dawg,
116
+ NODE_REF *node,
117
+ inT32 char_index,
118
+ char prevchar,
119
+ const char *word,
120
+ inT32 word_end) {
121
+ EDGE_REF edge;
122
+ STRING dummy_word(word); // Auto-deleting string fixes memory leak.
123
+ STRING word_single_lengths; //Lengths of single UTF-8 characters of the word.
124
+ const char *ptr;
125
+
126
+ for (ptr = word; *ptr != '\0';) {
127
+ int step = UNICHAR::utf8_step(ptr);
128
+ if (step == 0)
129
+ return FALSE;
130
+ word_single_lengths += step;
131
+ ptr += step;
132
+ }
133
+
134
+ if (*node == NO_EDGE) { /* Trailing punctuation */
135
+ if (trailing_punc(dummy_word[char_index]) &&
136
+ punctuation_ok(dummy_word.string(), word_single_lengths.string()) >= 0)
137
+ return (TRUE);
138
+ else
139
+ return (FALSE);
140
+ }
141
+ // rays: removed incorrect code that attempted to enforce leading
142
+ // punctutation (or nothing) before an alpha character.
143
+ /* Check the DAWG */
144
+ edge = edge_char_of(dawg, *node,
145
+ static_cast<unsigned char>(dummy_word[char_index]),
146
+ word_end);
147
+
148
+ if (edge != NO_EDGE) { /* Normal edge in DAWG */
149
+ if (case_sensative || case_is_okay (dummy_word, char_index)) {
150
+ //next_node (dawg, edge);
151
+ *node = next_node(dawg, edge);
152
+ if (*node == 0)
153
+ *node = NO_EDGE;
154
+ return (TRUE);
155
+ } else {
156
+ return (FALSE);
157
+ }
158
+ }
159
+ else {
160
+ /* Leading punctuation */
161
+ if (leading_punc (word [char_index]) &&
162
+ (char_index == 0 || leading_punc (dummy_word [char_index-1]))) {
163
+ *node = 0;
164
+ if (punctuation_ok(word, word_single_lengths.string()) >= 0)
165
+ return (TRUE);
166
+ else
167
+ return FALSE;
168
+ }
169
+ /* Trailing punctuation */
170
+ if (verify_trailing_punct (dawg, &dummy_word[0], char_index)) {
171
+ *node = NO_EDGE;
172
+ return (TRUE);
173
+ }
174
+
175
+ return (FALSE);
176
+ }
177
+ }
178
+
179
+
180
+ /**********************************************************************
181
+ * num_forward_edges
182
+ *
183
+ * Count and return the number of forward edges for this node.
184
+ **********************************************************************/
185
+ inT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node) {
186
+ EDGE_REF edge = node;
187
+ inT32 num = 0;
188
+
189
+ if (forward_edge (dawg, edge)) {
190
+ do {
191
+ num++;
192
+ } edge_loop (dawg, edge);
193
+ }
194
+
195
+ return (num);
196
+ }
197
+
198
+
199
+ /**********************************************************************
200
+ * print_dawg_node
201
+ *
202
+ * Print the contents of one of the nodes in the DAWG.
203
+ **********************************************************************/
204
+ void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node) {
205
+ EDGE_REF edge = node;
206
+ const char *forward_string = "FORWARD";
207
+ const char *backward_string = " ";
208
+
209
+ const char *last_string = "LAST";
210
+ const char *not_last_string = " ";
211
+
212
+ const char *eow_string = "EOW";
213
+ const char *not_eow_string = " ";
214
+
215
+ const char *direction;
216
+ const char *is_last;
217
+ const char *eow;
218
+
219
+ int ch;
220
+
221
+ if (edge_occupied (dawg, edge)) {
222
+ do {
223
+ if (forward_edge (dawg, edge)) direction = forward_string;
224
+ else direction = backward_string;
225
+
226
+ if (last_edge (dawg, edge)) is_last = last_string;
227
+ else is_last = not_last_string;
228
+
229
+ if (end_of_word (dawg, edge)) eow = eow_string;
230
+ else eow = not_eow_string;
231
+
232
+ ch = edge_letter (dawg, edge);
233
+ cprintf (REFFORMAT " : next = " REFFORMAT ", char = '%c', %s %s %s\n",
234
+ edge, next_node (dawg, edge), ch,
235
+ direction, is_last, eow);
236
+
237
+ if (edge - node > MAX_NODE_EDGES_DISPLAY) return;
238
+ } edge_loop (dawg, edge);
239
+
240
+ if (edge_occupied (dawg, edge) && backward_edge (dawg, edge)) {
241
+ do {
242
+ if (forward_edge (dawg, edge)) direction = forward_string;
243
+ else direction = backward_string;
244
+
245
+ if (last_edge (dawg, edge)) is_last = last_string;
246
+ else is_last = not_last_string;
247
+
248
+ if (end_of_word (dawg, edge)) eow = eow_string;
249
+ else eow = not_eow_string;
250
+
251
+ ch = edge_letter (dawg, edge);
252
+ cprintf (REFFORMAT " : next = " REFFORMAT ", char = '%c', %s %s %s\n",
253
+ edge, next_node (dawg, edge), ch,
254
+ direction, is_last, eow);
255
+
256
+ if (edge - node > MAX_NODE_EDGES_DISPLAY) return;
257
+ } edge_loop (dawg, edge);
258
+ }
259
+ }
260
+ else {
261
+ cprintf (REFFORMAT " : no edges in this node\n", node);
262
+ }
263
+ cprintf("\n");
264
+ }
265
+
266
+
267
+ /**********************************************************************
268
+ * read_squished_dawg
269
+ *
270
+ * Read the DAWG from a file and return it. Must be freed with memfree.
271
+ **********************************************************************/
272
+ EDGE_ARRAY read_squished_dawg(const char *filename) {
273
+ FILE *file;
274
+ EDGE_REF edge;
275
+ inT32 num_edges = 0;
276
+ inT32 node_count = 0;
277
+
278
+ if (debug) print_string ("read_debug");
279
+
280
+ #ifdef __UNIX__
281
+ file = open_file (filename, "r");
282
+ #else
283
+ file = open_file (filename, "rb");
284
+ #endif
285
+ fread (&num_edges, sizeof (inT32), 1, file);
286
+ num_edges = ntohl(num_edges);
287
+ if (num_edges > MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE || num_edges < 0) {
288
+ tprintf("(ENDIAN)Error: trying to read a DAWG '%s' that contains "
289
+ "%d edges while the maximum is %d.\n",
290
+ filename, num_edges, MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE);
291
+ exit(1);
292
+ }
293
+
294
+ uinT32 *dawg_32 = (uinT32*) Emalloc(num_edges * sizeof (uinT32));
295
+ fread(&dawg_32[0], sizeof (uinT32), num_edges, file);
296
+ fclose(file);
297
+ EDGE_ARRAY dawg = (EDGE_ARRAY) memalloc(sizeof(EDGE_RECORD) * num_edges);
298
+
299
+ for (edge = 0; edge < num_edges; ++edge)
300
+ dawg[edge] = ntohl(dawg_32[edge]);
301
+
302
+ Efree(dawg_32);
303
+
304
+ for (edge = 0; edge < num_edges; ++edge)
305
+ if (last_edge (dawg, edge)) node_count++;
306
+
307
+ return dawg;
308
+ }
309
+
310
+
311
+ /**********************************************************************
312
+ * verify_trailing_punct
313
+ *
314
+ * Make sure that there is a valid transition from the word core to a
315
+ * string of trailing puntuation. TRUE is returned if everything is
316
+ * OK.
317
+ **********************************************************************/
318
+ inT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, inT32 char_index) {
319
+ char last_char;
320
+ char *first_char;
321
+
322
+ if (trailing_punc (word [char_index])) {
323
+
324
+ last_char = word [char_index];
325
+ word [char_index] = (char) 0;
326
+
327
+ for (first_char = word; leading_punc (first_char[0]); first_char++);
328
+
329
+ if (word_in_dawg (dawg, first_char)) {
330
+ word [char_index] = last_char;
331
+ return (TRUE);
332
+ }
333
+ word [char_index] = last_char;
334
+ }
335
+ return (FALSE);
336
+ }
337
+
338
+
339
+ /**********************************************************************
340
+ * word_in_dawg
341
+ *
342
+ * Test to see if the word can be found in the DAWG.
343
+ **********************************************************************/
344
+ inT32 word_in_dawg(EDGE_ARRAY dawg, const char *string) {
345
+ NODE_REF node = 0;
346
+ inT32 i;
347
+ inT32 length;
348
+
349
+ length=strlen(string);
350
+ if (length==0)
351
+ return FALSE;
352
+ for (i=0; i<length; i++) {
353
+ if (debug > 1) {
354
+ print_dawg_node(dawg, node);
355
+ new_line();
356
+ }
357
+ if (! letter_is_okay (dawg, &node, i, '\0', string, (string[i+1]==0))) {
358
+ return (FALSE);
359
+ }
360
+ }
361
+
362
+ return (TRUE);
363
+ }