entangledstate-isbn 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (290) hide show
  1. data/README +1 -1
  2. data/Rakefile +0 -18
  3. data/VERSION +1 -0
  4. data/isbn.gemspec +290 -7
  5. data/lib/isbn.rb +6 -6
  6. data/src/gocr-0.48/.cvsignore +6 -0
  7. data/src/gocr-0.48/AUTHORS +7 -0
  8. data/src/gocr-0.48/BUGS +55 -0
  9. data/src/gocr-0.48/CREDITS +17 -0
  10. data/src/gocr-0.48/HISTORY +243 -0
  11. data/src/gocr-0.48/INSTALL +83 -0
  12. data/src/gocr-0.48/Makefile +193 -0
  13. data/src/gocr-0.48/Makefile.in +193 -0
  14. data/src/gocr-0.48/README +165 -0
  15. data/src/gocr-0.48/READMEde.txt +80 -0
  16. data/src/gocr-0.48/REMARK.txt +18 -0
  17. data/src/gocr-0.48/REVIEW +538 -0
  18. data/src/gocr-0.48/TODO +65 -0
  19. data/src/gocr-0.48/bin/.cvsignore +2 -0
  20. data/src/gocr-0.48/bin/create_db +38 -0
  21. data/src/gocr-0.48/bin/gocr.tcl +527 -0
  22. data/src/gocr-0.48/bin/gocr_chk.sh +44 -0
  23. data/src/gocr-0.48/configure +4689 -0
  24. data/src/gocr-0.48/configure.in +71 -0
  25. data/src/gocr-0.48/doc/.#Makefile.1.6 +39 -0
  26. data/src/gocr-0.48/doc/.cvsignore +2 -0
  27. data/src/gocr-0.48/doc/Makefile +39 -0
  28. data/src/gocr-0.48/doc/Makefile.in +39 -0
  29. data/src/gocr-0.48/doc/example.dtd +53 -0
  30. data/src/gocr-0.48/doc/example.xml +21 -0
  31. data/src/gocr-0.48/doc/examples.txt +67 -0
  32. data/src/gocr-0.48/doc/gocr.html +578 -0
  33. data/src/gocr-0.48/doc/unicode.txt +57 -0
  34. data/src/gocr-0.48/examples/.#Makefile.1.22 +166 -0
  35. data/src/gocr-0.48/examples/4x6.png +0 -0
  36. data/src/gocr-0.48/examples/4x6.txt +2 -0
  37. data/src/gocr-0.48/examples/5x7.png +0 -0
  38. data/src/gocr-0.48/examples/5x7.png.txt +2 -0
  39. data/src/gocr-0.48/examples/5x8.png +0 -0
  40. data/src/gocr-0.48/examples/5x8.png.txt +2 -0
  41. data/src/gocr-0.48/examples/Makefile +166 -0
  42. data/src/gocr-0.48/examples/color.fig +20 -0
  43. data/src/gocr-0.48/examples/ex.fig +16 -0
  44. data/src/gocr-0.48/examples/font.tex +22 -0
  45. data/src/gocr-0.48/examples/font1.tex +46 -0
  46. data/src/gocr-0.48/examples/font2.fig +27 -0
  47. data/src/gocr-0.48/examples/font_nw.tex +24 -0
  48. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  49. data/src/gocr-0.48/examples/handwrt1.txt +10 -0
  50. data/src/gocr-0.48/examples/inverse.fig +20 -0
  51. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  52. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  53. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +4 -0
  54. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a.txt +6 -0
  56. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-b.png.txt +4 -0
  58. data/src/gocr-0.48/examples/polish.tex +28 -0
  59. data/src/gocr-0.48/examples/rotate45.fig +14 -0
  60. data/src/gocr-0.48/examples/score +36 -0
  61. data/src/gocr-0.48/examples/text.tex +28 -0
  62. data/src/gocr-0.48/gocr.spec +143 -0
  63. data/src/gocr-0.48/gpl.html +537 -0
  64. data/src/gocr-0.48/include/.cvsignore +2 -0
  65. data/src/gocr-0.48/include/config.h +36 -0
  66. data/src/gocr-0.48/include/config.h.in +36 -0
  67. data/src/gocr-0.48/include/version.h +2 -0
  68. data/src/gocr-0.48/install-sh +3 -0
  69. data/src/gocr-0.48/make.bat +57 -0
  70. data/src/gocr-0.48/man/.cvsignore +2 -0
  71. data/src/gocr-0.48/man/Makefile +29 -0
  72. data/src/gocr-0.48/man/Makefile.in +29 -0
  73. data/src/gocr-0.48/man/man1/gocr.1 +166 -0
  74. data/src/gocr-0.48/src/.cvsignore +4 -0
  75. data/src/gocr-0.48/src/Makefile +132 -0
  76. data/src/gocr-0.48/src/Makefile.in +132 -0
  77. data/src/gocr-0.48/src/amiga.h +31 -0
  78. data/src/gocr-0.48/src/barcode.c +846 -0
  79. data/src/gocr-0.48/src/barcode.c.orig +593 -0
  80. data/src/gocr-0.48/src/barcode.h +11 -0
  81. data/src/gocr-0.48/src/box.c +372 -0
  82. data/src/gocr-0.48/src/database.c +462 -0
  83. data/src/gocr-0.48/src/detect.c +943 -0
  84. data/src/gocr-0.48/src/gocr.c +373 -0
  85. data/src/gocr-0.48/src/gocr.h +288 -0
  86. data/src/gocr-0.48/src/jconv.c +168 -0
  87. data/src/gocr-0.48/src/job.c +84 -0
  88. data/src/gocr-0.48/src/lines.c +350 -0
  89. data/src/gocr-0.48/src/list.c +334 -0
  90. data/src/gocr-0.48/src/list.h +90 -0
  91. data/src/gocr-0.48/src/ocr0.c +6756 -0
  92. data/src/gocr-0.48/src/ocr0.h +63 -0
  93. data/src/gocr-0.48/src/ocr0n.c +1475 -0
  94. data/src/gocr-0.48/src/ocr1.c +85 -0
  95. data/src/gocr-0.48/src/ocr1.h +3 -0
  96. data/src/gocr-0.48/src/otsu.c +289 -0
  97. data/src/gocr-0.48/src/otsu.h +23 -0
  98. data/src/gocr-0.48/src/output.c +289 -0
  99. data/src/gocr-0.48/src/output.h +37 -0
  100. data/src/gocr-0.48/src/pcx.c +153 -0
  101. data/src/gocr-0.48/src/pcx.h +9 -0
  102. data/src/gocr-0.48/src/pgm2asc.c +2893 -0
  103. data/src/gocr-0.48/src/pgm2asc.h +105 -0
  104. data/src/gocr-0.48/src/pixel.c +537 -0
  105. data/src/gocr-0.48/src/pnm.c +533 -0
  106. data/src/gocr-0.48/src/pnm.h +35 -0
  107. data/src/gocr-0.48/src/progress.c +87 -0
  108. data/src/gocr-0.48/src/progress.h +42 -0
  109. data/src/gocr-0.48/src/remove.c +703 -0
  110. data/src/gocr-0.48/src/tga.c +87 -0
  111. data/src/gocr-0.48/src/tga.h +6 -0
  112. data/src/gocr-0.48/src/unicode.c +1314 -0
  113. data/src/gocr-0.48/src/unicode.h +1257 -0
  114. data/src/jpeg-7/Makefile.am +133 -0
  115. data/src/jpeg-7/Makefile.in +1089 -0
  116. data/src/jpeg-7/README +322 -0
  117. data/src/jpeg-7/aclocal.m4 +8990 -0
  118. data/src/jpeg-7/ansi2knr.1 +36 -0
  119. data/src/jpeg-7/ansi2knr.c +739 -0
  120. data/src/jpeg-7/cderror.h +132 -0
  121. data/src/jpeg-7/cdjpeg.c +181 -0
  122. data/src/jpeg-7/cdjpeg.h +187 -0
  123. data/src/jpeg-7/change.log +270 -0
  124. data/src/jpeg-7/cjpeg.1 +325 -0
  125. data/src/jpeg-7/cjpeg.c +616 -0
  126. data/src/jpeg-7/ckconfig.c +402 -0
  127. data/src/jpeg-7/coderules.txt +118 -0
  128. data/src/jpeg-7/config.guess +1561 -0
  129. data/src/jpeg-7/config.sub +1686 -0
  130. data/src/jpeg-7/configure +17139 -0
  131. data/src/jpeg-7/configure.ac +317 -0
  132. data/src/jpeg-7/depcomp +630 -0
  133. data/src/jpeg-7/djpeg.1 +251 -0
  134. data/src/jpeg-7/djpeg.c +617 -0
  135. data/src/jpeg-7/example.c +433 -0
  136. data/src/jpeg-7/filelist.txt +215 -0
  137. data/src/jpeg-7/install-sh +520 -0
  138. data/src/jpeg-7/install.txt +1097 -0
  139. data/src/jpeg-7/jaricom.c +148 -0
  140. data/src/jpeg-7/jcapimin.c +282 -0
  141. data/src/jpeg-7/jcapistd.c +161 -0
  142. data/src/jpeg-7/jcarith.c +921 -0
  143. data/src/jpeg-7/jccoefct.c +453 -0
  144. data/src/jpeg-7/jccolor.c +459 -0
  145. data/src/jpeg-7/jcdctmgr.c +482 -0
  146. data/src/jpeg-7/jchuff.c +1612 -0
  147. data/src/jpeg-7/jcinit.c +65 -0
  148. data/src/jpeg-7/jcmainct.c +293 -0
  149. data/src/jpeg-7/jcmarker.c +667 -0
  150. data/src/jpeg-7/jcmaster.c +770 -0
  151. data/src/jpeg-7/jcomapi.c +106 -0
  152. data/src/jpeg-7/jconfig.bcc +48 -0
  153. data/src/jpeg-7/jconfig.cfg +45 -0
  154. data/src/jpeg-7/jconfig.dj +38 -0
  155. data/src/jpeg-7/jconfig.mac +43 -0
  156. data/src/jpeg-7/jconfig.manx +43 -0
  157. data/src/jpeg-7/jconfig.mc6 +52 -0
  158. data/src/jpeg-7/jconfig.sas +43 -0
  159. data/src/jpeg-7/jconfig.st +42 -0
  160. data/src/jpeg-7/jconfig.txt +155 -0
  161. data/src/jpeg-7/jconfig.vc +45 -0
  162. data/src/jpeg-7/jconfig.vms +37 -0
  163. data/src/jpeg-7/jconfig.wat +38 -0
  164. data/src/jpeg-7/jcparam.c +632 -0
  165. data/src/jpeg-7/jcprepct.c +358 -0
  166. data/src/jpeg-7/jcsample.c +545 -0
  167. data/src/jpeg-7/jctrans.c +381 -0
  168. data/src/jpeg-7/jdapimin.c +396 -0
  169. data/src/jpeg-7/jdapistd.c +275 -0
  170. data/src/jpeg-7/jdarith.c +762 -0
  171. data/src/jpeg-7/jdatadst.c +151 -0
  172. data/src/jpeg-7/jdatasrc.c +212 -0
  173. data/src/jpeg-7/jdcoefct.c +736 -0
  174. data/src/jpeg-7/jdcolor.c +396 -0
  175. data/src/jpeg-7/jdct.h +393 -0
  176. data/src/jpeg-7/jddctmgr.c +382 -0
  177. data/src/jpeg-7/jdhuff.c +1309 -0
  178. data/src/jpeg-7/jdinput.c +384 -0
  179. data/src/jpeg-7/jdmainct.c +512 -0
  180. data/src/jpeg-7/jdmarker.c +1360 -0
  181. data/src/jpeg-7/jdmaster.c +663 -0
  182. data/src/jpeg-7/jdmerge.c +400 -0
  183. data/src/jpeg-7/jdpostct.c +290 -0
  184. data/src/jpeg-7/jdsample.c +361 -0
  185. data/src/jpeg-7/jdtrans.c +136 -0
  186. data/src/jpeg-7/jerror.c +252 -0
  187. data/src/jpeg-7/jerror.h +304 -0
  188. data/src/jpeg-7/jfdctflt.c +174 -0
  189. data/src/jpeg-7/jfdctfst.c +230 -0
  190. data/src/jpeg-7/jfdctint.c +4348 -0
  191. data/src/jpeg-7/jidctflt.c +242 -0
  192. data/src/jpeg-7/jidctfst.c +368 -0
  193. data/src/jpeg-7/jidctint.c +5137 -0
  194. data/src/jpeg-7/jinclude.h +91 -0
  195. data/src/jpeg-7/jmemansi.c +167 -0
  196. data/src/jpeg-7/jmemdos.c +638 -0
  197. data/src/jpeg-7/jmemdosa.asm +379 -0
  198. data/src/jpeg-7/jmemmac.c +289 -0
  199. data/src/jpeg-7/jmemmgr.c +1118 -0
  200. data/src/jpeg-7/jmemname.c +276 -0
  201. data/src/jpeg-7/jmemnobs.c +109 -0
  202. data/src/jpeg-7/jmemsys.h +198 -0
  203. data/src/jpeg-7/jmorecfg.h +369 -0
  204. data/src/jpeg-7/jpegint.h +395 -0
  205. data/src/jpeg-7/jpeglib.h +1135 -0
  206. data/src/jpeg-7/jpegtran.1 +272 -0
  207. data/src/jpeg-7/jpegtran.c +546 -0
  208. data/src/jpeg-7/jquant1.c +856 -0
  209. data/src/jpeg-7/jquant2.c +1310 -0
  210. data/src/jpeg-7/jutils.c +179 -0
  211. data/src/jpeg-7/jversion.h +14 -0
  212. data/src/jpeg-7/libjpeg.map +4 -0
  213. data/src/jpeg-7/libjpeg.txt +3067 -0
  214. data/src/jpeg-7/ltmain.sh +8406 -0
  215. data/src/jpeg-7/makcjpeg.st +36 -0
  216. data/src/jpeg-7/makdjpeg.st +36 -0
  217. data/src/jpeg-7/makeadsw.vc6 +77 -0
  218. data/src/jpeg-7/makeasln.vc9 +33 -0
  219. data/src/jpeg-7/makecdep.vc6 +82 -0
  220. data/src/jpeg-7/makecdsp.vc6 +130 -0
  221. data/src/jpeg-7/makecmak.vc6 +159 -0
  222. data/src/jpeg-7/makecvcp.vc9 +186 -0
  223. data/src/jpeg-7/makeddep.vc6 +82 -0
  224. data/src/jpeg-7/makeddsp.vc6 +130 -0
  225. data/src/jpeg-7/makedmak.vc6 +159 -0
  226. data/src/jpeg-7/makedvcp.vc9 +186 -0
  227. data/src/jpeg-7/makefile.ansi +220 -0
  228. data/src/jpeg-7/makefile.bcc +291 -0
  229. data/src/jpeg-7/makefile.dj +226 -0
  230. data/src/jpeg-7/makefile.manx +220 -0
  231. data/src/jpeg-7/makefile.mc6 +255 -0
  232. data/src/jpeg-7/makefile.mms +224 -0
  233. data/src/jpeg-7/makefile.sas +258 -0
  234. data/src/jpeg-7/makefile.unix +234 -0
  235. data/src/jpeg-7/makefile.vc +217 -0
  236. data/src/jpeg-7/makefile.vms +142 -0
  237. data/src/jpeg-7/makefile.wat +239 -0
  238. data/src/jpeg-7/makejdep.vc6 +423 -0
  239. data/src/jpeg-7/makejdsp.vc6 +285 -0
  240. data/src/jpeg-7/makejdsw.vc6 +29 -0
  241. data/src/jpeg-7/makejmak.vc6 +425 -0
  242. data/src/jpeg-7/makejsln.vc9 +17 -0
  243. data/src/jpeg-7/makejvcp.vc9 +328 -0
  244. data/src/jpeg-7/makeproj.mac +213 -0
  245. data/src/jpeg-7/makerdep.vc6 +6 -0
  246. data/src/jpeg-7/makerdsp.vc6 +78 -0
  247. data/src/jpeg-7/makermak.vc6 +110 -0
  248. data/src/jpeg-7/makervcp.vc9 +133 -0
  249. data/src/jpeg-7/maketdep.vc6 +43 -0
  250. data/src/jpeg-7/maketdsp.vc6 +122 -0
  251. data/src/jpeg-7/maketmak.vc6 +131 -0
  252. data/src/jpeg-7/maketvcp.vc9 +178 -0
  253. data/src/jpeg-7/makewdep.vc6 +6 -0
  254. data/src/jpeg-7/makewdsp.vc6 +78 -0
  255. data/src/jpeg-7/makewmak.vc6 +110 -0
  256. data/src/jpeg-7/makewvcp.vc9 +133 -0
  257. data/src/jpeg-7/makljpeg.st +68 -0
  258. data/src/jpeg-7/maktjpeg.st +30 -0
  259. data/src/jpeg-7/makvms.opt +4 -0
  260. data/src/jpeg-7/missing +376 -0
  261. data/src/jpeg-7/rdbmp.c +439 -0
  262. data/src/jpeg-7/rdcolmap.c +253 -0
  263. data/src/jpeg-7/rdgif.c +38 -0
  264. data/src/jpeg-7/rdjpgcom.1 +63 -0
  265. data/src/jpeg-7/rdjpgcom.c +515 -0
  266. data/src/jpeg-7/rdppm.c +459 -0
  267. data/src/jpeg-7/rdrle.c +387 -0
  268. data/src/jpeg-7/rdswitch.c +365 -0
  269. data/src/jpeg-7/rdtarga.c +500 -0
  270. data/src/jpeg-7/structure.txt +945 -0
  271. data/src/jpeg-7/testimg.bmp +0 -0
  272. data/src/jpeg-7/testimg.jpg +0 -0
  273. data/src/jpeg-7/testimg.ppm +4 -0
  274. data/src/jpeg-7/testimgp.jpg +0 -0
  275. data/src/jpeg-7/testorig.jpg +0 -0
  276. data/src/jpeg-7/testprog.jpg +0 -0
  277. data/src/jpeg-7/transupp.c +1533 -0
  278. data/src/jpeg-7/transupp.h +205 -0
  279. data/src/jpeg-7/usage.txt +605 -0
  280. data/src/jpeg-7/wizard.txt +211 -0
  281. data/src/jpeg-7/wrbmp.c +442 -0
  282. data/src/jpeg-7/wrgif.c +399 -0
  283. data/src/jpeg-7/wrjpgcom.1 +103 -0
  284. data/src/jpeg-7/wrjpgcom.c +583 -0
  285. data/src/jpeg-7/wrppm.c +269 -0
  286. data/src/jpeg-7/wrrle.c +305 -0
  287. data/src/jpeg-7/wrtarga.c +253 -0
  288. metadata +287 -6
  289. data/LICENSE +0 -20
  290. data/VERSION.yml +0 -4
@@ -0,0 +1,373 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2009 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+
21
+ sometimes I have written comments in german language, sorry for that
22
+
23
+ This file was retrieved from pgm2asc.cc of Joerg, in order to have
24
+ a library of the ocr-engine from Klaas Freitag
25
+
26
+ */
27
+ #include "config.h"
28
+ #include <stdlib.h>
29
+ #include <stdio.h>
30
+ #include <assert.h>
31
+ #include <string.h>
32
+ #ifdef HAVE_GETTIMEOFDAY
33
+ #include <sys/time.h>
34
+ #endif
35
+ #ifdef HAVE_UNISTD_H
36
+ #include <unistd.h>
37
+ #endif
38
+
39
+ #include "pnm.h"
40
+ #include "pgm2asc.h"
41
+ #include "pcx.h"
42
+ #include "ocr0.h" /* only_numbers */
43
+ #include "progress.h"
44
+ #include "version.h"
45
+
46
+ static void out_version(int v) {
47
+ fprintf(stderr, " Optical Character Recognition --- gocr "
48
+ version_string " " release_string "\n"
49
+ " Copyright (C) 2001-2009 Joerg Schulenburg GPG=1024D/53BDFBE3\n"
50
+ " released under the GNU General Public License\n");
51
+ /* as recommended, (c) and license should be part of the binary */
52
+ /* no email because of SPAM, see README for contacting the author */
53
+ if (v)
54
+ fprintf(stderr, " use option -h for help\n");
55
+ if (v & 2)
56
+ exit(1);
57
+ return;
58
+ }
59
+
60
+ static void help(void) {
61
+ out_version(0);
62
+ /* output is shortened to essentials, see manual page for details */
63
+ fprintf(stderr,
64
+ " using: gocr [options] pnm_file_name # use - for stdin\n"
65
+ " options (see gocr manual pages for more details):\n"
66
+ " -h, --help\n"
67
+ " -i name - input image file (pnm,pgm,pbm,ppm,pcx,...)\n"
68
+ " -o name - output file (redirection of stdout)\n"
69
+ " -e name - logging file (redirection of stderr)\n"
70
+ " -x name - progress output to fifo (see manual)\n"
71
+ " -p name - database path including final slash (default is ./db/)\n");
72
+ fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
73
+ " -f fmt - output format (ISO8859_1 TeX HTML XML UTF8 ASCII)\n"
74
+ " -l num - threshold grey level 0<160<=255 (0 = autodetect)\n"
75
+ " -d num - dust_size (remove small clusters, -1 = autodetect)\n"
76
+ " -s num - spacewidth/dots (0 = autodetect)\n"
77
+ " -v num - verbose (see manual page)\n"
78
+ " -c string - list of chars (debugging, see manual)\n"
79
+ " -C string - char filter (ex. hexdigits: ""0-9A-Fx"", only ASCII)\n"
80
+ " -m num - operation modes (bitpattern, see manual)\n");
81
+ fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
82
+ " -a num - value of certainty (in percent, 0..100, default=95)\n"
83
+ " -u string - output this string for every unrecognized character\n");
84
+ fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
85
+ " examples:\n"
86
+ "\tgocr -m 4 text1.pbm # do layout analyzis\n"
87
+ "\tgocr -m 130 -p ./database/ text1.pbm # extend database\n"
88
+ "\tdjpeg -pnm -gray text.jpg | gocr - # use jpeg-file via pipe\n"
89
+ "\n");
90
+ fprintf(stderr, " webpage: http://jocr.sourceforge.net/\n");
91
+ exit(0);
92
+ }
93
+
94
+ #ifdef HAVE_GETTIMEOFDAY
95
+ /* from the glibc documentation */
96
+ static int timeval_subtract (struct timeval *result, struct timeval *x,
97
+ struct timeval *y) {
98
+
99
+ /* Perform the carry for the later subtraction by updating Y. */
100
+ if (x->tv_usec < y->tv_usec) {
101
+ int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
102
+ y->tv_usec -= 1000000 * nsec;
103
+ y->tv_sec += nsec;
104
+ }
105
+ if (x->tv_usec - y->tv_usec > 1000000) {
106
+ int nsec = (x->tv_usec - y->tv_usec) / 1000000;
107
+ y->tv_usec += 1000000 * nsec;
108
+ y->tv_sec -= nsec;
109
+ }
110
+
111
+ /* Compute the time remaining to wait.
112
+ `tv_usec' is certainly positive. */
113
+ result->tv_sec = x->tv_sec - y->tv_sec;
114
+ result->tv_usec = x->tv_usec - y->tv_usec;
115
+
116
+ /* Return 1 if result is negative. */
117
+ return x->tv_sec < y->tv_sec;
118
+ }
119
+ #endif
120
+
121
+ static void process_arguments(job_t *job, int argn, char *argv[])
122
+ {
123
+ int i;
124
+ char *s1;
125
+
126
+ assert(job);
127
+
128
+ if (argn <= 1) {
129
+ out_version(1);
130
+ exit(0);
131
+ }
132
+ #ifdef HAVE_PGM_H
133
+ pnm_init(&argn, &argv);
134
+ #endif
135
+
136
+ /* process arguments */
137
+ for (i = 1; i < argn; i++) {
138
+ if (strcmp(argv[i], "--help") == 0)
139
+ help(); /* and quits */
140
+ if (argv[i][0] == '-' && argv[i][1] != 0) {
141
+ s1 = "";
142
+ if (i + 1 < argn)
143
+ s1 = argv[i + 1];
144
+ switch (argv[i][1]) {
145
+ case 'h': /* help */
146
+ help();
147
+ break;
148
+ case 'i': /* input image file */
149
+ job->src.fname = s1;
150
+ i++;
151
+ break;
152
+ case 'e': /* logging file */
153
+ if (s1[0] == '-' && s1[1] == '\0') {
154
+ #ifdef HAVE_UNISTD_H
155
+ dup2(STDOUT_FILENO, STDERR_FILENO); /* -e /dev/stdout works */
156
+ #else
157
+ fprintf(stderr, "stderr redirection not possible without unistd.h\n");
158
+ #endif
159
+ }
160
+ else if (!freopen(s1, "w", stderr)) {
161
+ fprintf(stderr, "stderr redirection to %s failed\n", s1);
162
+ }
163
+ i++;
164
+ break;
165
+ case 'p': /* database path */
166
+ job->cfg.db_path=s1;
167
+ i++;
168
+ break;
169
+ case 'o': /* output file */
170
+ if (s1[0] == '-' && s1[1] == '\0') { /* default */
171
+ }
172
+ else if (!freopen(s1, "w", stdout)) {
173
+ fprintf(stderr, "stdout redirection to %s failed\n", s1);
174
+ };
175
+ i++;
176
+ break;
177
+ case 'f': /* output format */
178
+ if (strcmp(s1, "ISO8859_1") == 0) job->cfg.out_format=ISO8859_1; else
179
+ if (strcmp(s1, "TeX") == 0) job->cfg.out_format=TeX; else
180
+ if (strcmp(s1, "HTML") == 0) job->cfg.out_format=HTML; else
181
+ if (strcmp(s1, "XML") == 0) job->cfg.out_format=XML; else
182
+ if (strcmp(s1, "SGML") == 0) job->cfg.out_format=SGML; else
183
+ if (strcmp(s1, "UTF8") == 0) job->cfg.out_format=UTF8; else
184
+ if (strcmp(s1, "ASCII") == 0) job->cfg.out_format=ASCII; else
185
+ fprintf(stderr,"Warning: unknown format (-f %s)\n",s1);
186
+ i++;
187
+ break;
188
+ case 'c': /* list of chars (_ = not recognized chars) */
189
+ job->cfg.lc = s1;
190
+ i++;
191
+ break;
192
+ case 'C': /* char filter, default: NULL (all chars) */
193
+ /* ToDo: UTF8 input, wchar */
194
+ job->cfg.cfilter = s1;
195
+ i++;
196
+ break;
197
+ case 'd': /* dust size */
198
+ job->cfg.dust_size = atoi(s1);
199
+ i++;
200
+ break;
201
+ case 'l': /* grey level 0<160<=255, 0 for autodetect */
202
+ job->cfg.cs = atoi(s1);
203
+ i++;
204
+ break;
205
+ case 's': /* spacewidth/dots (0 = autodetect) */
206
+ job->cfg.spc = atoi(s1);
207
+ i++;
208
+ break;
209
+ case 'v': /* verbose mode */
210
+ job->cfg.verbose |= atoi(s1);
211
+ i++;
212
+ break;
213
+ case 'm': /* operation modes */
214
+ job->cfg.mode |= atoi(s1);
215
+ i++;
216
+ break;
217
+ case 'n': /* numbers only */
218
+ job->cfg.only_numbers = atoi(s1);
219
+ i++;
220
+ break;
221
+ case 'x': /* initialize progress output s1=fname */
222
+ ini_progress(s1);
223
+ i++;
224
+ break;
225
+ case 'a': /* set certainty */
226
+ job->cfg.certainty = atoi(s1);;
227
+ i++;
228
+ break;
229
+ case 'u': /* output marker for unrecognized chars */
230
+ job->cfg.unrec_marker = s1;
231
+ i++;
232
+ break;
233
+ default:
234
+ fprintf(stderr, "# unknown option use -h for help\n");
235
+ }
236
+ continue;
237
+ }
238
+ else /* argument can be filename v0.2.5 */ if (argv[i][0] != '-'
239
+ || argv[i][1] == '\0' ) {
240
+ job->src.fname = argv[i];
241
+ }
242
+ }
243
+ }
244
+
245
+ static void mark_start(job_t *job) {
246
+ assert(job);
247
+
248
+ if (job->cfg.verbose) {
249
+ out_version(0);
250
+ /* insert some helpful info for support */
251
+ fprintf(stderr, "# compiled: " __DATE__ );
252
+ #if defined(__GNUC__)
253
+ fprintf(stderr, " GNUC-%d", __GNUC__ );
254
+ #endif
255
+ #ifdef __GNUC_MINOR__
256
+ fprintf(stderr, ".%d", __GNUC_MINOR__ );
257
+ #endif
258
+ #if defined(__linux)
259
+ fprintf(stderr, " linux");
260
+ #elif defined(__unix)
261
+ fprintf(stderr, " unix");
262
+ #endif
263
+ #if defined(__WIN32) || defined(__WIN32__)
264
+ fprintf(stderr, " WIN32");
265
+ #endif
266
+ #if defined(__WIN64) || defined(__WIN64__)
267
+ fprintf(stderr, " WIN64");
268
+ #endif
269
+ #if defined(__VERSION__)
270
+ fprintf(stderr, " version " __VERSION__ );
271
+ #endif
272
+ fprintf(stderr, "\n");
273
+ fprintf(stderr,
274
+ "# options are: -l %d -s %d -v %d -c %s -m %d -d %d -n %d -a %d -C \"%s\"\n",
275
+ job->cfg.cs, job->cfg.spc, job->cfg.verbose, job->cfg.lc, job->cfg.mode,
276
+ job->cfg.dust_size, job->cfg.only_numbers, job->cfg.certainty,
277
+ job->cfg.cfilter);
278
+ fprintf(stderr, "# file: %s\n", job->src.fname);
279
+ #ifdef USE_UNICODE
280
+ fprintf(stderr,"# using unicode\n");
281
+ #endif
282
+ #ifdef HAVE_GETTIMEOFDAY
283
+ gettimeofday(&job->tmp.init_time, NULL);
284
+ #endif
285
+ }
286
+ }
287
+
288
+ static void mark_end(job_t *job) {
289
+ assert(job);
290
+
291
+ #ifdef HAVE_GETTIMEOFDAY
292
+ /* show elapsed time */
293
+ if (job->cfg.verbose) {
294
+ struct timeval end, result;
295
+ gettimeofday(&end, NULL);
296
+ timeval_subtract(&result, &end, &job->tmp.init_time);
297
+ fprintf(stderr,"Elapsed time: %d:%02d:%3.3f.\n", (int)result.tv_sec/60,
298
+ (int)result.tv_sec%60, (float)result.tv_usec/1000);
299
+ }
300
+ #endif
301
+ }
302
+
303
+ static int read_picture(job_t *job) {
304
+ int rc=0;
305
+ assert(job);
306
+
307
+ if (strstr(job->src.fname, ".pcx"))
308
+ readpcx(job->src.fname, &job->src.p, job->cfg.verbose);
309
+ else
310
+ rc=readpgm(job->src.fname, &job->src.p, job->cfg.verbose);
311
+ return rc; /* 1 for multiple images, 0 else */
312
+ }
313
+
314
+ /* subject of change, we need more output for XML (ToDo) */
315
+ void print_output(job_t *job) {
316
+ int linecounter = 0;
317
+ const char *line;
318
+
319
+ assert(job);
320
+
321
+ linecounter = 0;
322
+ line = getTextLine(linecounter++);
323
+ while (line) {
324
+ /* notice: decode() is shiftet to getTextLine since 0.38 */
325
+ fputs(line, stdout);
326
+ if (job->cfg.out_format==HTML) fputs("<br />",stdout);
327
+ if (job->cfg.out_format!=XML) fputc('\n', stdout);
328
+ line = getTextLine(linecounter++);
329
+ }
330
+ free_textlines();
331
+ }
332
+
333
+ /* FIXME jb: remove JOB; */
334
+ job_t *JOB;
335
+
336
+
337
+ /* -------------------------------------------------------------
338
+ // ------ MAIN - replace this by your own aplication!
339
+ // ------------------------------------------------------------- */
340
+ int main(int argn, char *argv[]) {
341
+ int multipnm=1;
342
+ job_t job;
343
+
344
+ JOB = &job;
345
+ setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
346
+
347
+ while (multipnm==1) {
348
+
349
+ job_init(&job);
350
+
351
+ process_arguments(&job, argn, argv);
352
+
353
+ mark_start(&job);
354
+
355
+ multipnm = read_picture(&job);
356
+ /* separation of main and rest for using as lib
357
+ this will be changed later => introduction of set_option()
358
+ for better communication to the engine */
359
+ if (multipnm<0) break; /* read error */
360
+
361
+ /* call main loop */
362
+ pgm2asc(&job);
363
+
364
+ mark_end(&job);
365
+
366
+ print_output(&job);
367
+
368
+ job_free(&job);
369
+
370
+ }
371
+
372
+ return 0;
373
+ }
@@ -0,0 +1,288 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2006 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+
21
+ sometimes I have written comments in german language, sorry for that
22
+
23
+ - look for ??? for preliminary code
24
+ */
25
+
26
+ /* General headerfile with gocr-definitions */
27
+
28
+ #ifndef __GOCR_H__
29
+ #define __GOCR_H__
30
+
31
+ #include "pnm.h"
32
+ #include "unicode.h"
33
+ #include "list.h"
34
+ #include <stddef.h>
35
+ #ifdef HAVE_GETTIMEOFDAY
36
+ #include <sys/time.h>
37
+ #endif
38
+
39
+ /*
40
+ * wchar_t should always exist (ANSI), but WCHAR.H is sometimes missing
41
+ * USE_UNICODE should be removed or replaced by HAVE_WCHAR_H in future
42
+ */
43
+ #ifdef HAVE_WCHAR_H
44
+ #define USE_UNICODE 1
45
+ #endif
46
+
47
+ /* extern "C"{ */
48
+ /* ------------------------ feature extraction ----------------- */
49
+ #define AT 7 /* mark */
50
+ #define M1 1 /* mark */
51
+ enum direction {
52
+ UP=1, DO, RI, LE
53
+ };
54
+ typedef enum direction DIRECTION;
55
+ #define ST 7 /* stop */
56
+ /* ------------------------------------------------------------- */
57
+ /* detect maximas in of line overlapps (return in %) and line koord */
58
+ #define HOR 1 /* horizontal */
59
+ #define VER 2 /* vertikal */
60
+ #define RIS 3 /* rising=steigend */
61
+ #define FAL 4 /* falling=fallend */
62
+
63
+ #define MAXlines 1024
64
+
65
+ /* ToDo: if we have a tree instead of a list, a line could be a node object */
66
+ struct tlines {
67
+ int num;
68
+ int dx, dy; /* direction of text lines (straight/skew) */
69
+ int m1[MAXlines], /* start of line = upper bound of 'A' */
70
+ m2[MAXlines], /* upper bound of 'e' */
71
+ m3[MAXlines], /* lower bound of 'e' = baseline */
72
+ m4[MAXlines]; /* stop of line = lower bound of 'q' */
73
+ /* ToDo: add sureness per m1,m2 etc? */
74
+ int x0[MAXlines],
75
+ x1[MAXlines]; /* left and right border */
76
+ int wt[MAXlines]; /* weight, how sure thats correct in percent, v0.41 */
77
+ int pitch[MAXlines]; /* word pitch (later per box?), v0.41 */
78
+ int mono[MAXlines]; /* spacing type, 0=proportional, 1=monospaced */
79
+ };
80
+
81
+ #define NumAlt 10 /* maximal number of alternative chars (table length) */
82
+ #define MaxNumFrames 8 /* maximum number of frames per char/box */
83
+ #define MaxFrameVectors 128 /* maximum vectors per frame (*8=1KB/box) */
84
+ /* ToDo: use only malloc_box(),free_box(),copybox() for creation, destroy etc.
85
+ * adding reference_counter to avoid pointer pointing to freed box
86
+ */
87
+ struct box { /* this structure should contain all pixel infos of a letter */
88
+ int x0,x1,y0,y1,x,y,dots; /* xmin,xmax,ymin,ymax,reference-pixel,i-dots */
89
+ int num_boxes, /* 1 "abc", 2 "!i?", 3 "&auml;" (composed objects) 0.41 */
90
+ num_subboxes; /* 1 for "abdegopqADOPQR", 2 for "B" (holes) 0.41 */
91
+ wchar_t c; /* detected char (same as tac[0], obsolete?) */
92
+ wchar_t modifier; /* default=0, see compose() in unicode.c */
93
+ int num; /* same number = same char */
94
+ int line; /* line number (points to struct tlines lines) */
95
+ int m1,m2,m3,m4; /* m2 = upper boundary, m3 = baseline */
96
+ /* planed: sizeof hole_1, hole_2, certainty (run1=100%,run2=90%,etc.) */
97
+ pix *p; /* pointer to pixmap (v0.2.5) */
98
+ /* tac, wac is used together with setac() to manage very similar chars */
99
+ int num_ac; /* length of table (alternative chars), default=0 */
100
+ wchar_t tac[NumAlt]; /* alternative chars, only used by setac(),getac() */
101
+ int wac[NumAlt]; /* weight of alternative chars */
102
+ char *tas[NumAlt]; /* alternative UTF8-strings or XML codes if tac[]=0 */
103
+ /* replacing old obj */
104
+ /* ToDo: (*obj)[NumAlt] + olen[NumAlt] ??? */
105
+ /* ToDo: bitmap for possible Picture|Object|Char ??? */
106
+ /* char *obj; */ /* pointer to text-object ... -> replaced by tas[] */
107
+ /* ... (melted chars, barcode, picture coords, ...) */
108
+ /* must be freed before box is freed! */
109
+ /* do _not_ copy only the pointer to object */
110
+ /* --------------------------------------------------------
111
+ * extension since v0.41 js05, Store frame vectors,
112
+ * which is a table of vectors sourrounding the char and its
113
+ * inner white holes. The advantage is the independence from
114
+ * resolution, handling of holes, overlap and rotation.
115
+ * --------------------------------------------------------- */
116
+ int num_frames; /* number of frames: 1 for cfhklmnrstuvwxyz */
117
+ /* 2 for abdegijopq */
118
+ int frame_vol[MaxNumFrames]; /* volume inside frame +/- (black/white) */
119
+ int frame_per[MaxNumFrames]; /* periphery, summed length of vectors */
120
+ int num_frame_vectors[MaxNumFrames]; /* index to next frame */
121
+ /* biggest frame should be stored first (outer frame) */
122
+ /* biggest has the maximum pair distance */
123
+ /* num vector loops */
124
+ int frame_vector[MaxFrameVectors][2]; /* may be 16*int=fixpoint_number */
125
+
126
+ };
127
+ typedef struct box Box;
128
+
129
+ /* true if the coordination pair (a,b) is outside the image p */
130
+ #define outbounds(p, a, b) (a < 0 || b < 0 || a >= (p)->x || b >= (p)->y)
131
+
132
+ /* ToDo: this structure seems to be obsolete, remove it */
133
+ typedef struct path {
134
+ int start; /* color at the beginning of the path, (0=white, 1=black) */
135
+ int *x; /* x coordinates of transitions */
136
+ int *y; /* y coordinates of transitions */
137
+ int num; /* current number of entries in x or y */
138
+ int max; /* maximum number of entries in x or y */
139
+ /* (if more values need to be stored, the arrays are enlarged) */
140
+ } path_t;
141
+
142
+ /* job_t contains all information needed for an OCR task */
143
+ typedef struct job_s {
144
+ struct { /* source data */
145
+ char *fname; /* input filename; default value: "-" */
146
+ pix p; /* source pixel data, pixelmap 8bit gray */
147
+ } src;
148
+ struct { /* temporary stuff, e.g. buffers */
149
+ #ifdef HAVE_GETTIMEOFDAY
150
+ struct timeval init_time; /* starting time of this job */
151
+ #endif
152
+ pix ppo; /* pixmap for visual debugging output, obsolete */
153
+
154
+ /* sometimes recognition function is called again and again, if result was 0
155
+ n_run tells the pixel function to return alternative results */
156
+ int n_run; /* num of run, if run_2 critical pattern get other results */
157
+ /* used for 2nd try, pixel uses slower filter function etc. */
158
+ List dblist; /* list of boxes loaded from the character database */
159
+ } tmp;
160
+ struct { /* results */
161
+ List boxlist; /* store every object in a box, which contains */
162
+ /* the characteristics of the object (see struct box) */
163
+ List linelist; /* recognized text lines after recognition */
164
+
165
+ struct tlines lines; /* used to access to line-data (statistics) */
166
+ /* here the positions (frames) of lines are */
167
+ /* stored for further use */
168
+ int avX,avY; /* average X,Y (avX=sumX/numC) */
169
+ int sumX,sumY,numC; /* sum of all X,Y; num chars */
170
+ } res;
171
+ struct { /* configuration */
172
+ int cs; /* critical grey value (pixel<cs => black pixel) */
173
+ /* range: 0..255, 0 means autodetection */
174
+ int spc; /* spacewidth/dots (0 = autodetect); default value: 0 */
175
+ int mode; /* operation modes; default value: 0 */
176
+ /* operation mode (see --help) */
177
+ int dust_size; /* dust size; default value: 10 */
178
+ int only_numbers; /* numbers only; default value: 0 */
179
+ int verbose; /* verbose mode; default value: 0 */
180
+ /* verbose option (see --help) */
181
+ FORMAT out_format; /* output format; default value: ISO8859_1*/
182
+ char *lc; /* debuglist of chars (_ = not recognized chars) */
183
+ /* default value: "_" */
184
+ char *db_path; /* pathname for database; default value: NULL */
185
+ char *cfilter; /* char filter; default value: NULL, ex: "A-Za-z" */
186
+ /* limit of certainty where chars are accepted as identified */
187
+ int certainty; /* in units of 100 (percent); 0..100; default 95 */
188
+ char *unrec_marker; /* output this string for every unrecognized char */
189
+ } cfg;
190
+ } job_t;
191
+
192
+ /* initialze job structure */
193
+ void job_init(job_t *job);
194
+
195
+ /* free job structure */
196
+ void job_free(job_t *job);
197
+
198
+ /*FIXME jb: remove JOB; */
199
+ extern job_t *JOB;
200
+
201
+ /* calculate the overlapp of the line (0-1) with black points
202
+ * by rekursiv bisection
203
+ * (evl. Fehlertoleranz mit pixel in Umgebung dx,dy suchen) (umschaltbar) ???
204
+ * MidPoint Line Algorithm (Bresenham) Foley: ComputerGraphics better?
205
+ * will be replaced by vector functions
206
+ */
207
+
208
+ /* gerade y=dy/dx*x+b, implizit d=F(x,y)=dy*x-dx*y+b*dx=0
209
+ * incrementell y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y)) */
210
+ int get_line(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
211
+ int get_line2(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
212
+
213
+ /* look for white 0x02 or black 0x01 dots (0x03 = white+black) */
214
+ char get_bw(int x0, int x1, int y0, int y1,
215
+ pix *p, int cs,int mask);
216
+
217
+ /* look for black crossing a line x0,y0,x1,y1
218
+ * follow line and count crossings ([white]-black-transitions)
219
+ */
220
+ int num_cross(int x0, int x1, int y0, int y1,
221
+ pix *p, int cs);
222
+
223
+ /* memory allocation with error checking */
224
+ void *xrealloc(void *ptr, size_t size);
225
+
226
+ /* follow a line x0,y0,x1,y1 recording locations of transitions,
227
+ * return count of transitions
228
+ */
229
+ int follow_path(int x0, int x1, int y0, int y1, pix *p, int cs, path_t *path);
230
+
231
+ /* -------------------------------------------------------------
232
+ * mark edge-points
233
+ * - first move forward until b/w-edge
234
+ * - more than 2 pixel?
235
+ * - loop around
236
+ * - if forward pixel : go up, rotate right
237
+ * - if forward no pixel : rotate left
238
+ * - stop if found first 2 pixel in same order
239
+ * mit an rechter-Wand-entlang-gehen strategie
240
+ * --------------------------------------------------------------
241
+ * turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
242
+ * out: last-position
243
+ * Zaehle dabei, Schritte,Sackgassen,xmax,ymax,ro-,ru-,lo-,lu-Ecken
244
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
245
+ *
246
+ * is this the right place for declaration?
247
+ */
248
+ void turmite(pix *p, int *x, int *y,
249
+ int x0, int x1, int y0, int y1, int cs, int rw, int rb);
250
+
251
+ /* test if points are connected via t-pixel (rekursiv!) */
252
+ int joined(pix *p, int x0, int y0, int x1, int y1, int cs);
253
+
254
+ /* move from x,y to direction r until pixel or l steps
255
+ * return number of steps
256
+ */
257
+ int loop(pix *p, int x, int y, int l, int cs, int col, DIRECTION r);
258
+
259
+ #define MAX_HOLES 3
260
+ typedef struct list_holes {
261
+ int num; /* numbers of holes, initialize with 0 */
262
+ struct hole_s {
263
+ int size,x,y,x0,y0,x1,y1; /* size, start point, outer rectangle */
264
+ } hole[MAX_HOLES];
265
+ } holes_t;
266
+
267
+ /* look for white holes surrounded by black points
268
+ * at moment white point with black in all four directions
269
+ */
270
+ int num_hole(int x0, int x1, int y0, int y1, pix *p, int cs, holes_t *holes);
271
+
272
+ /* count for black nonconnected objects --- used for i,auml,ouml,etc. */
273
+ int num_obj(int x0, int x1, int y0, int y1, pix *p, int cs);
274
+
275
+ int distance( pix *p1, struct box *box1, /* box-frame */
276
+ pix *p2, struct box *box2, int cs);
277
+
278
+ /* call the OCR engine ;) */
279
+ /* char whatletter(struct box *box1,int cs); */
280
+
281
+ /* declared in pixel.c */
282
+ /* getpixel() was pixel() but it may collide with netpnm pixel declaration */
283
+ int getpixel(pix *p, int x, int y);
284
+ int marked(pix *p, int x, int y);
285
+ void put(pix * p, int x, int y, int ia, int io);
286
+
287
+ /* } */ /* extern C */
288
+ #endif /* __GOCR_H__ */