isbn 2.0.4 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (288) hide show
  1. data/{README → README.md} +5 -11
  2. data/Rakefile +20 -14
  3. data/isbn.gemspec +23 -0
  4. data/lib/isbn.rb +2 -0
  5. data/test/isbn_spec.rb +1 -1
  6. metadata +29 -316
  7. data/VERSION +0 -1
  8. data/src/gocr-0.48/.cvsignore +0 -6
  9. data/src/gocr-0.48/AUTHORS +0 -7
  10. data/src/gocr-0.48/BUGS +0 -55
  11. data/src/gocr-0.48/CREDITS +0 -17
  12. data/src/gocr-0.48/HISTORY +0 -243
  13. data/src/gocr-0.48/INSTALL +0 -83
  14. data/src/gocr-0.48/Makefile +0 -193
  15. data/src/gocr-0.48/Makefile.in +0 -193
  16. data/src/gocr-0.48/README +0 -165
  17. data/src/gocr-0.48/READMEde.txt +0 -80
  18. data/src/gocr-0.48/REMARK.txt +0 -18
  19. data/src/gocr-0.48/REVIEW +0 -538
  20. data/src/gocr-0.48/TODO +0 -65
  21. data/src/gocr-0.48/bin/.cvsignore +0 -2
  22. data/src/gocr-0.48/bin/create_db +0 -38
  23. data/src/gocr-0.48/bin/gocr.tcl +0 -527
  24. data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
  25. data/src/gocr-0.48/configure +0 -4689
  26. data/src/gocr-0.48/configure.in +0 -71
  27. data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
  28. data/src/gocr-0.48/doc/.cvsignore +0 -2
  29. data/src/gocr-0.48/doc/Makefile +0 -39
  30. data/src/gocr-0.48/doc/Makefile.in +0 -39
  31. data/src/gocr-0.48/doc/example.dtd +0 -53
  32. data/src/gocr-0.48/doc/example.xml +0 -21
  33. data/src/gocr-0.48/doc/examples.txt +0 -67
  34. data/src/gocr-0.48/doc/gocr.html +0 -578
  35. data/src/gocr-0.48/doc/unicode.txt +0 -57
  36. data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
  37. data/src/gocr-0.48/examples/4x6.png +0 -0
  38. data/src/gocr-0.48/examples/4x6.txt +0 -2
  39. data/src/gocr-0.48/examples/5x7.png +0 -0
  40. data/src/gocr-0.48/examples/5x7.png.txt +0 -2
  41. data/src/gocr-0.48/examples/5x8.png +0 -0
  42. data/src/gocr-0.48/examples/5x8.png.txt +0 -2
  43. data/src/gocr-0.48/examples/Makefile +0 -166
  44. data/src/gocr-0.48/examples/color.fig +0 -20
  45. data/src/gocr-0.48/examples/ex.fig +0 -16
  46. data/src/gocr-0.48/examples/font.tex +0 -22
  47. data/src/gocr-0.48/examples/font1.tex +0 -46
  48. data/src/gocr-0.48/examples/font2.fig +0 -27
  49. data/src/gocr-0.48/examples/font_nw.tex +0 -24
  50. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  51. data/src/gocr-0.48/examples/handwrt1.txt +0 -10
  52. data/src/gocr-0.48/examples/inverse.fig +0 -20
  53. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
  56. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-a.txt +0 -6
  58. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  59. data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
  60. data/src/gocr-0.48/examples/polish.tex +0 -28
  61. data/src/gocr-0.48/examples/rotate45.fig +0 -14
  62. data/src/gocr-0.48/examples/score +0 -36
  63. data/src/gocr-0.48/examples/text.tex +0 -28
  64. data/src/gocr-0.48/gpl.html +0 -537
  65. data/src/gocr-0.48/include/.cvsignore +0 -2
  66. data/src/gocr-0.48/include/config.h +0 -36
  67. data/src/gocr-0.48/include/config.h.in +0 -36
  68. data/src/gocr-0.48/include/version.h +0 -2
  69. data/src/gocr-0.48/install-sh +0 -3
  70. data/src/gocr-0.48/make.bat +0 -57
  71. data/src/gocr-0.48/man/.cvsignore +0 -2
  72. data/src/gocr-0.48/man/Makefile +0 -29
  73. data/src/gocr-0.48/man/Makefile.in +0 -29
  74. data/src/gocr-0.48/man/man1/gocr.1 +0 -166
  75. data/src/gocr-0.48/src/.cvsignore +0 -4
  76. data/src/gocr-0.48/src/Makefile +0 -132
  77. data/src/gocr-0.48/src/Makefile.in +0 -132
  78. data/src/gocr-0.48/src/amiga.h +0 -31
  79. data/src/gocr-0.48/src/barcode.c +0 -846
  80. data/src/gocr-0.48/src/barcode.c.orig +0 -593
  81. data/src/gocr-0.48/src/barcode.h +0 -11
  82. data/src/gocr-0.48/src/box.c +0 -372
  83. data/src/gocr-0.48/src/database.c +0 -462
  84. data/src/gocr-0.48/src/detect.c +0 -943
  85. data/src/gocr-0.48/src/gocr.c +0 -373
  86. data/src/gocr-0.48/src/gocr.h +0 -288
  87. data/src/gocr-0.48/src/jconv.c +0 -168
  88. data/src/gocr-0.48/src/job.c +0 -84
  89. data/src/gocr-0.48/src/lines.c +0 -350
  90. data/src/gocr-0.48/src/list.c +0 -334
  91. data/src/gocr-0.48/src/list.h +0 -90
  92. data/src/gocr-0.48/src/ocr0.c +0 -6756
  93. data/src/gocr-0.48/src/ocr0.h +0 -63
  94. data/src/gocr-0.48/src/ocr0n.c +0 -1475
  95. data/src/gocr-0.48/src/ocr1.c +0 -85
  96. data/src/gocr-0.48/src/ocr1.h +0 -3
  97. data/src/gocr-0.48/src/otsu.c +0 -289
  98. data/src/gocr-0.48/src/otsu.h +0 -23
  99. data/src/gocr-0.48/src/output.c +0 -289
  100. data/src/gocr-0.48/src/output.h +0 -37
  101. data/src/gocr-0.48/src/pcx.c +0 -153
  102. data/src/gocr-0.48/src/pcx.h +0 -9
  103. data/src/gocr-0.48/src/pgm2asc.c +0 -2893
  104. data/src/gocr-0.48/src/pgm2asc.h +0 -105
  105. data/src/gocr-0.48/src/pixel.c +0 -537
  106. data/src/gocr-0.48/src/pnm.c +0 -533
  107. data/src/gocr-0.48/src/pnm.h +0 -35
  108. data/src/gocr-0.48/src/progress.c +0 -87
  109. data/src/gocr-0.48/src/progress.h +0 -42
  110. data/src/gocr-0.48/src/remove.c +0 -703
  111. data/src/gocr-0.48/src/tga.c +0 -87
  112. data/src/gocr-0.48/src/tga.h +0 -6
  113. data/src/gocr-0.48/src/unicode.c +0 -1314
  114. data/src/gocr-0.48/src/unicode.h +0 -1257
  115. data/src/jpeg-7/Makefile.am +0 -133
  116. data/src/jpeg-7/Makefile.in +0 -1089
  117. data/src/jpeg-7/README +0 -322
  118. data/src/jpeg-7/aclocal.m4 +0 -8990
  119. data/src/jpeg-7/ansi2knr.1 +0 -36
  120. data/src/jpeg-7/ansi2knr.c +0 -739
  121. data/src/jpeg-7/cderror.h +0 -132
  122. data/src/jpeg-7/cdjpeg.c +0 -181
  123. data/src/jpeg-7/cdjpeg.h +0 -187
  124. data/src/jpeg-7/change.log +0 -270
  125. data/src/jpeg-7/cjpeg.1 +0 -325
  126. data/src/jpeg-7/cjpeg.c +0 -616
  127. data/src/jpeg-7/ckconfig.c +0 -402
  128. data/src/jpeg-7/coderules.txt +0 -118
  129. data/src/jpeg-7/config.guess +0 -1561
  130. data/src/jpeg-7/config.sub +0 -1686
  131. data/src/jpeg-7/configure +0 -17139
  132. data/src/jpeg-7/configure.ac +0 -317
  133. data/src/jpeg-7/depcomp +0 -630
  134. data/src/jpeg-7/djpeg.1 +0 -251
  135. data/src/jpeg-7/djpeg.c +0 -617
  136. data/src/jpeg-7/example.c +0 -433
  137. data/src/jpeg-7/filelist.txt +0 -215
  138. data/src/jpeg-7/install-sh +0 -520
  139. data/src/jpeg-7/install.txt +0 -1097
  140. data/src/jpeg-7/jaricom.c +0 -148
  141. data/src/jpeg-7/jcapimin.c +0 -282
  142. data/src/jpeg-7/jcapistd.c +0 -161
  143. data/src/jpeg-7/jcarith.c +0 -921
  144. data/src/jpeg-7/jccoefct.c +0 -453
  145. data/src/jpeg-7/jccolor.c +0 -459
  146. data/src/jpeg-7/jcdctmgr.c +0 -482
  147. data/src/jpeg-7/jchuff.c +0 -1612
  148. data/src/jpeg-7/jcinit.c +0 -65
  149. data/src/jpeg-7/jcmainct.c +0 -293
  150. data/src/jpeg-7/jcmarker.c +0 -667
  151. data/src/jpeg-7/jcmaster.c +0 -770
  152. data/src/jpeg-7/jcomapi.c +0 -106
  153. data/src/jpeg-7/jconfig.bcc +0 -48
  154. data/src/jpeg-7/jconfig.cfg +0 -45
  155. data/src/jpeg-7/jconfig.dj +0 -38
  156. data/src/jpeg-7/jconfig.mac +0 -43
  157. data/src/jpeg-7/jconfig.manx +0 -43
  158. data/src/jpeg-7/jconfig.mc6 +0 -52
  159. data/src/jpeg-7/jconfig.sas +0 -43
  160. data/src/jpeg-7/jconfig.st +0 -42
  161. data/src/jpeg-7/jconfig.txt +0 -155
  162. data/src/jpeg-7/jconfig.vc +0 -45
  163. data/src/jpeg-7/jconfig.vms +0 -37
  164. data/src/jpeg-7/jconfig.wat +0 -38
  165. data/src/jpeg-7/jcparam.c +0 -632
  166. data/src/jpeg-7/jcprepct.c +0 -358
  167. data/src/jpeg-7/jcsample.c +0 -545
  168. data/src/jpeg-7/jctrans.c +0 -381
  169. data/src/jpeg-7/jdapimin.c +0 -396
  170. data/src/jpeg-7/jdapistd.c +0 -275
  171. data/src/jpeg-7/jdarith.c +0 -762
  172. data/src/jpeg-7/jdatadst.c +0 -151
  173. data/src/jpeg-7/jdatasrc.c +0 -212
  174. data/src/jpeg-7/jdcoefct.c +0 -736
  175. data/src/jpeg-7/jdcolor.c +0 -396
  176. data/src/jpeg-7/jdct.h +0 -393
  177. data/src/jpeg-7/jddctmgr.c +0 -382
  178. data/src/jpeg-7/jdhuff.c +0 -1309
  179. data/src/jpeg-7/jdinput.c +0 -384
  180. data/src/jpeg-7/jdmainct.c +0 -512
  181. data/src/jpeg-7/jdmarker.c +0 -1360
  182. data/src/jpeg-7/jdmaster.c +0 -663
  183. data/src/jpeg-7/jdmerge.c +0 -400
  184. data/src/jpeg-7/jdpostct.c +0 -290
  185. data/src/jpeg-7/jdsample.c +0 -361
  186. data/src/jpeg-7/jdtrans.c +0 -136
  187. data/src/jpeg-7/jerror.c +0 -252
  188. data/src/jpeg-7/jerror.h +0 -304
  189. data/src/jpeg-7/jfdctflt.c +0 -174
  190. data/src/jpeg-7/jfdctfst.c +0 -230
  191. data/src/jpeg-7/jfdctint.c +0 -4348
  192. data/src/jpeg-7/jidctflt.c +0 -242
  193. data/src/jpeg-7/jidctfst.c +0 -368
  194. data/src/jpeg-7/jidctint.c +0 -5137
  195. data/src/jpeg-7/jinclude.h +0 -91
  196. data/src/jpeg-7/jmemansi.c +0 -167
  197. data/src/jpeg-7/jmemdos.c +0 -638
  198. data/src/jpeg-7/jmemdosa.asm +0 -379
  199. data/src/jpeg-7/jmemmac.c +0 -289
  200. data/src/jpeg-7/jmemmgr.c +0 -1118
  201. data/src/jpeg-7/jmemname.c +0 -276
  202. data/src/jpeg-7/jmemnobs.c +0 -109
  203. data/src/jpeg-7/jmemsys.h +0 -198
  204. data/src/jpeg-7/jmorecfg.h +0 -369
  205. data/src/jpeg-7/jpegint.h +0 -395
  206. data/src/jpeg-7/jpeglib.h +0 -1135
  207. data/src/jpeg-7/jpegtran.1 +0 -272
  208. data/src/jpeg-7/jpegtran.c +0 -546
  209. data/src/jpeg-7/jquant1.c +0 -856
  210. data/src/jpeg-7/jquant2.c +0 -1310
  211. data/src/jpeg-7/jutils.c +0 -179
  212. data/src/jpeg-7/jversion.h +0 -14
  213. data/src/jpeg-7/libjpeg.map +0 -4
  214. data/src/jpeg-7/libjpeg.txt +0 -3067
  215. data/src/jpeg-7/ltmain.sh +0 -8406
  216. data/src/jpeg-7/makcjpeg.st +0 -36
  217. data/src/jpeg-7/makdjpeg.st +0 -36
  218. data/src/jpeg-7/makeadsw.vc6 +0 -77
  219. data/src/jpeg-7/makeasln.vc9 +0 -33
  220. data/src/jpeg-7/makecdep.vc6 +0 -82
  221. data/src/jpeg-7/makecdsp.vc6 +0 -130
  222. data/src/jpeg-7/makecmak.vc6 +0 -159
  223. data/src/jpeg-7/makecvcp.vc9 +0 -186
  224. data/src/jpeg-7/makeddep.vc6 +0 -82
  225. data/src/jpeg-7/makeddsp.vc6 +0 -130
  226. data/src/jpeg-7/makedmak.vc6 +0 -159
  227. data/src/jpeg-7/makedvcp.vc9 +0 -186
  228. data/src/jpeg-7/makefile.ansi +0 -220
  229. data/src/jpeg-7/makefile.bcc +0 -291
  230. data/src/jpeg-7/makefile.dj +0 -226
  231. data/src/jpeg-7/makefile.manx +0 -220
  232. data/src/jpeg-7/makefile.mc6 +0 -255
  233. data/src/jpeg-7/makefile.mms +0 -224
  234. data/src/jpeg-7/makefile.sas +0 -258
  235. data/src/jpeg-7/makefile.unix +0 -234
  236. data/src/jpeg-7/makefile.vc +0 -217
  237. data/src/jpeg-7/makefile.vms +0 -142
  238. data/src/jpeg-7/makefile.wat +0 -239
  239. data/src/jpeg-7/makejdep.vc6 +0 -423
  240. data/src/jpeg-7/makejdsp.vc6 +0 -285
  241. data/src/jpeg-7/makejdsw.vc6 +0 -29
  242. data/src/jpeg-7/makejmak.vc6 +0 -425
  243. data/src/jpeg-7/makejsln.vc9 +0 -17
  244. data/src/jpeg-7/makejvcp.vc9 +0 -328
  245. data/src/jpeg-7/makeproj.mac +0 -213
  246. data/src/jpeg-7/makerdep.vc6 +0 -6
  247. data/src/jpeg-7/makerdsp.vc6 +0 -78
  248. data/src/jpeg-7/makermak.vc6 +0 -110
  249. data/src/jpeg-7/makervcp.vc9 +0 -133
  250. data/src/jpeg-7/maketdep.vc6 +0 -43
  251. data/src/jpeg-7/maketdsp.vc6 +0 -122
  252. data/src/jpeg-7/maketmak.vc6 +0 -131
  253. data/src/jpeg-7/maketvcp.vc9 +0 -178
  254. data/src/jpeg-7/makewdep.vc6 +0 -6
  255. data/src/jpeg-7/makewdsp.vc6 +0 -78
  256. data/src/jpeg-7/makewmak.vc6 +0 -110
  257. data/src/jpeg-7/makewvcp.vc9 +0 -133
  258. data/src/jpeg-7/makljpeg.st +0 -68
  259. data/src/jpeg-7/maktjpeg.st +0 -30
  260. data/src/jpeg-7/makvms.opt +0 -4
  261. data/src/jpeg-7/missing +0 -376
  262. data/src/jpeg-7/rdbmp.c +0 -439
  263. data/src/jpeg-7/rdcolmap.c +0 -253
  264. data/src/jpeg-7/rdgif.c +0 -38
  265. data/src/jpeg-7/rdjpgcom.1 +0 -63
  266. data/src/jpeg-7/rdjpgcom.c +0 -515
  267. data/src/jpeg-7/rdppm.c +0 -459
  268. data/src/jpeg-7/rdrle.c +0 -387
  269. data/src/jpeg-7/rdswitch.c +0 -365
  270. data/src/jpeg-7/rdtarga.c +0 -500
  271. data/src/jpeg-7/structure.txt +0 -945
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +0 -4
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +0 -1533
  279. data/src/jpeg-7/transupp.h +0 -205
  280. data/src/jpeg-7/usage.txt +0 -605
  281. data/src/jpeg-7/wizard.txt +0 -211
  282. data/src/jpeg-7/wrbmp.c +0 -442
  283. data/src/jpeg-7/wrgif.c +0 -399
  284. data/src/jpeg-7/wrjpgcom.1 +0 -103
  285. data/src/jpeg-7/wrjpgcom.c +0 -583
  286. data/src/jpeg-7/wrppm.c +0 -269
  287. data/src/jpeg-7/wrrle.c +0 -305
  288. data/src/jpeg-7/wrtarga.c +0 -253
@@ -1,373 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2009 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
-
21
- sometimes I have written comments in german language, sorry for that
22
-
23
- This file was retrieved from pgm2asc.cc of Joerg, in order to have
24
- a library of the ocr-engine from Klaas Freitag
25
-
26
- */
27
- #include "config.h"
28
- #include <stdlib.h>
29
- #include <stdio.h>
30
- #include <assert.h>
31
- #include <string.h>
32
- #ifdef HAVE_GETTIMEOFDAY
33
- #include <sys/time.h>
34
- #endif
35
- #ifdef HAVE_UNISTD_H
36
- #include <unistd.h>
37
- #endif
38
-
39
- #include "pnm.h"
40
- #include "pgm2asc.h"
41
- #include "pcx.h"
42
- #include "ocr0.h" /* only_numbers */
43
- #include "progress.h"
44
- #include "version.h"
45
-
46
- static void out_version(int v) {
47
- fprintf(stderr, " Optical Character Recognition --- gocr "
48
- version_string " " release_string "\n"
49
- " Copyright (C) 2001-2009 Joerg Schulenburg GPG=1024D/53BDFBE3\n"
50
- " released under the GNU General Public License\n");
51
- /* as recommended, (c) and license should be part of the binary */
52
- /* no email because of SPAM, see README for contacting the author */
53
- if (v)
54
- fprintf(stderr, " use option -h for help\n");
55
- if (v & 2)
56
- exit(1);
57
- return;
58
- }
59
-
60
- static void help(void) {
61
- out_version(0);
62
- /* output is shortened to essentials, see manual page for details */
63
- fprintf(stderr,
64
- " using: gocr [options] pnm_file_name # use - for stdin\n"
65
- " options (see gocr manual pages for more details):\n"
66
- " -h, --help\n"
67
- " -i name - input image file (pnm,pgm,pbm,ppm,pcx,...)\n"
68
- " -o name - output file (redirection of stdout)\n"
69
- " -e name - logging file (redirection of stderr)\n"
70
- " -x name - progress output to fifo (see manual)\n"
71
- " -p name - database path including final slash (default is ./db/)\n");
72
- fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
73
- " -f fmt - output format (ISO8859_1 TeX HTML XML UTF8 ASCII)\n"
74
- " -l num - threshold grey level 0<160<=255 (0 = autodetect)\n"
75
- " -d num - dust_size (remove small clusters, -1 = autodetect)\n"
76
- " -s num - spacewidth/dots (0 = autodetect)\n"
77
- " -v num - verbose (see manual page)\n"
78
- " -c string - list of chars (debugging, see manual)\n"
79
- " -C string - char filter (ex. hexdigits: ""0-9A-Fx"", only ASCII)\n"
80
- " -m num - operation modes (bitpattern, see manual)\n");
81
- fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
82
- " -a num - value of certainty (in percent, 0..100, default=95)\n"
83
- " -u string - output this string for every unrecognized character\n");
84
- fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
85
- " examples:\n"
86
- "\tgocr -m 4 text1.pbm # do layout analyzis\n"
87
- "\tgocr -m 130 -p ./database/ text1.pbm # extend database\n"
88
- "\tdjpeg -pnm -gray text.jpg | gocr - # use jpeg-file via pipe\n"
89
- "\n");
90
- fprintf(stderr, " webpage: http://jocr.sourceforge.net/\n");
91
- exit(0);
92
- }
93
-
94
- #ifdef HAVE_GETTIMEOFDAY
95
- /* from the glibc documentation */
96
- static int timeval_subtract (struct timeval *result, struct timeval *x,
97
- struct timeval *y) {
98
-
99
- /* Perform the carry for the later subtraction by updating Y. */
100
- if (x->tv_usec < y->tv_usec) {
101
- int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
102
- y->tv_usec -= 1000000 * nsec;
103
- y->tv_sec += nsec;
104
- }
105
- if (x->tv_usec - y->tv_usec > 1000000) {
106
- int nsec = (x->tv_usec - y->tv_usec) / 1000000;
107
- y->tv_usec += 1000000 * nsec;
108
- y->tv_sec -= nsec;
109
- }
110
-
111
- /* Compute the time remaining to wait.
112
- `tv_usec' is certainly positive. */
113
- result->tv_sec = x->tv_sec - y->tv_sec;
114
- result->tv_usec = x->tv_usec - y->tv_usec;
115
-
116
- /* Return 1 if result is negative. */
117
- return x->tv_sec < y->tv_sec;
118
- }
119
- #endif
120
-
121
- static void process_arguments(job_t *job, int argn, char *argv[])
122
- {
123
- int i;
124
- char *s1;
125
-
126
- assert(job);
127
-
128
- if (argn <= 1) {
129
- out_version(1);
130
- exit(0);
131
- }
132
- #ifdef HAVE_PGM_H
133
- pnm_init(&argn, &argv);
134
- #endif
135
-
136
- /* process arguments */
137
- for (i = 1; i < argn; i++) {
138
- if (strcmp(argv[i], "--help") == 0)
139
- help(); /* and quits */
140
- if (argv[i][0] == '-' && argv[i][1] != 0) {
141
- s1 = "";
142
- if (i + 1 < argn)
143
- s1 = argv[i + 1];
144
- switch (argv[i][1]) {
145
- case 'h': /* help */
146
- help();
147
- break;
148
- case 'i': /* input image file */
149
- job->src.fname = s1;
150
- i++;
151
- break;
152
- case 'e': /* logging file */
153
- if (s1[0] == '-' && s1[1] == '\0') {
154
- #ifdef HAVE_UNISTD_H
155
- dup2(STDOUT_FILENO, STDERR_FILENO); /* -e /dev/stdout works */
156
- #else
157
- fprintf(stderr, "stderr redirection not possible without unistd.h\n");
158
- #endif
159
- }
160
- else if (!freopen(s1, "w", stderr)) {
161
- fprintf(stderr, "stderr redirection to %s failed\n", s1);
162
- }
163
- i++;
164
- break;
165
- case 'p': /* database path */
166
- job->cfg.db_path=s1;
167
- i++;
168
- break;
169
- case 'o': /* output file */
170
- if (s1[0] == '-' && s1[1] == '\0') { /* default */
171
- }
172
- else if (!freopen(s1, "w", stdout)) {
173
- fprintf(stderr, "stdout redirection to %s failed\n", s1);
174
- };
175
- i++;
176
- break;
177
- case 'f': /* output format */
178
- if (strcmp(s1, "ISO8859_1") == 0) job->cfg.out_format=ISO8859_1; else
179
- if (strcmp(s1, "TeX") == 0) job->cfg.out_format=TeX; else
180
- if (strcmp(s1, "HTML") == 0) job->cfg.out_format=HTML; else
181
- if (strcmp(s1, "XML") == 0) job->cfg.out_format=XML; else
182
- if (strcmp(s1, "SGML") == 0) job->cfg.out_format=SGML; else
183
- if (strcmp(s1, "UTF8") == 0) job->cfg.out_format=UTF8; else
184
- if (strcmp(s1, "ASCII") == 0) job->cfg.out_format=ASCII; else
185
- fprintf(stderr,"Warning: unknown format (-f %s)\n",s1);
186
- i++;
187
- break;
188
- case 'c': /* list of chars (_ = not recognized chars) */
189
- job->cfg.lc = s1;
190
- i++;
191
- break;
192
- case 'C': /* char filter, default: NULL (all chars) */
193
- /* ToDo: UTF8 input, wchar */
194
- job->cfg.cfilter = s1;
195
- i++;
196
- break;
197
- case 'd': /* dust size */
198
- job->cfg.dust_size = atoi(s1);
199
- i++;
200
- break;
201
- case 'l': /* grey level 0<160<=255, 0 for autodetect */
202
- job->cfg.cs = atoi(s1);
203
- i++;
204
- break;
205
- case 's': /* spacewidth/dots (0 = autodetect) */
206
- job->cfg.spc = atoi(s1);
207
- i++;
208
- break;
209
- case 'v': /* verbose mode */
210
- job->cfg.verbose |= atoi(s1);
211
- i++;
212
- break;
213
- case 'm': /* operation modes */
214
- job->cfg.mode |= atoi(s1);
215
- i++;
216
- break;
217
- case 'n': /* numbers only */
218
- job->cfg.only_numbers = atoi(s1);
219
- i++;
220
- break;
221
- case 'x': /* initialize progress output s1=fname */
222
- ini_progress(s1);
223
- i++;
224
- break;
225
- case 'a': /* set certainty */
226
- job->cfg.certainty = atoi(s1);;
227
- i++;
228
- break;
229
- case 'u': /* output marker for unrecognized chars */
230
- job->cfg.unrec_marker = s1;
231
- i++;
232
- break;
233
- default:
234
- fprintf(stderr, "# unknown option use -h for help\n");
235
- }
236
- continue;
237
- }
238
- else /* argument can be filename v0.2.5 */ if (argv[i][0] != '-'
239
- || argv[i][1] == '\0' ) {
240
- job->src.fname = argv[i];
241
- }
242
- }
243
- }
244
-
245
- static void mark_start(job_t *job) {
246
- assert(job);
247
-
248
- if (job->cfg.verbose) {
249
- out_version(0);
250
- /* insert some helpful info for support */
251
- fprintf(stderr, "# compiled: " __DATE__ );
252
- #if defined(__GNUC__)
253
- fprintf(stderr, " GNUC-%d", __GNUC__ );
254
- #endif
255
- #ifdef __GNUC_MINOR__
256
- fprintf(stderr, ".%d", __GNUC_MINOR__ );
257
- #endif
258
- #if defined(__linux)
259
- fprintf(stderr, " linux");
260
- #elif defined(__unix)
261
- fprintf(stderr, " unix");
262
- #endif
263
- #if defined(__WIN32) || defined(__WIN32__)
264
- fprintf(stderr, " WIN32");
265
- #endif
266
- #if defined(__WIN64) || defined(__WIN64__)
267
- fprintf(stderr, " WIN64");
268
- #endif
269
- #if defined(__VERSION__)
270
- fprintf(stderr, " version " __VERSION__ );
271
- #endif
272
- fprintf(stderr, "\n");
273
- fprintf(stderr,
274
- "# options are: -l %d -s %d -v %d -c %s -m %d -d %d -n %d -a %d -C \"%s\"\n",
275
- job->cfg.cs, job->cfg.spc, job->cfg.verbose, job->cfg.lc, job->cfg.mode,
276
- job->cfg.dust_size, job->cfg.only_numbers, job->cfg.certainty,
277
- job->cfg.cfilter);
278
- fprintf(stderr, "# file: %s\n", job->src.fname);
279
- #ifdef USE_UNICODE
280
- fprintf(stderr,"# using unicode\n");
281
- #endif
282
- #ifdef HAVE_GETTIMEOFDAY
283
- gettimeofday(&job->tmp.init_time, NULL);
284
- #endif
285
- }
286
- }
287
-
288
- static void mark_end(job_t *job) {
289
- assert(job);
290
-
291
- #ifdef HAVE_GETTIMEOFDAY
292
- /* show elapsed time */
293
- if (job->cfg.verbose) {
294
- struct timeval end, result;
295
- gettimeofday(&end, NULL);
296
- timeval_subtract(&result, &end, &job->tmp.init_time);
297
- fprintf(stderr,"Elapsed time: %d:%02d:%3.3f.\n", (int)result.tv_sec/60,
298
- (int)result.tv_sec%60, (float)result.tv_usec/1000);
299
- }
300
- #endif
301
- }
302
-
303
- static int read_picture(job_t *job) {
304
- int rc=0;
305
- assert(job);
306
-
307
- if (strstr(job->src.fname, ".pcx"))
308
- readpcx(job->src.fname, &job->src.p, job->cfg.verbose);
309
- else
310
- rc=readpgm(job->src.fname, &job->src.p, job->cfg.verbose);
311
- return rc; /* 1 for multiple images, 0 else */
312
- }
313
-
314
- /* subject of change, we need more output for XML (ToDo) */
315
- void print_output(job_t *job) {
316
- int linecounter = 0;
317
- const char *line;
318
-
319
- assert(job);
320
-
321
- linecounter = 0;
322
- line = getTextLine(linecounter++);
323
- while (line) {
324
- /* notice: decode() is shiftet to getTextLine since 0.38 */
325
- fputs(line, stdout);
326
- if (job->cfg.out_format==HTML) fputs("<br />",stdout);
327
- if (job->cfg.out_format!=XML) fputc('\n', stdout);
328
- line = getTextLine(linecounter++);
329
- }
330
- free_textlines();
331
- }
332
-
333
- /* FIXME jb: remove JOB; */
334
- job_t *JOB;
335
-
336
-
337
- /* -------------------------------------------------------------
338
- // ------ MAIN - replace this by your own aplication!
339
- // ------------------------------------------------------------- */
340
- int main(int argn, char *argv[]) {
341
- int multipnm=1;
342
- job_t job;
343
-
344
- JOB = &job;
345
- setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
346
-
347
- while (multipnm==1) {
348
-
349
- job_init(&job);
350
-
351
- process_arguments(&job, argn, argv);
352
-
353
- mark_start(&job);
354
-
355
- multipnm = read_picture(&job);
356
- /* separation of main and rest for using as lib
357
- this will be changed later => introduction of set_option()
358
- for better communication to the engine */
359
- if (multipnm<0) break; /* read error */
360
-
361
- /* call main loop */
362
- pgm2asc(&job);
363
-
364
- mark_end(&job);
365
-
366
- print_output(&job);
367
-
368
- job_free(&job);
369
-
370
- }
371
-
372
- return 0;
373
- }
@@ -1,288 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2006 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
-
21
- sometimes I have written comments in german language, sorry for that
22
-
23
- - look for ??? for preliminary code
24
- */
25
-
26
- /* General headerfile with gocr-definitions */
27
-
28
- #ifndef __GOCR_H__
29
- #define __GOCR_H__
30
-
31
- #include "pnm.h"
32
- #include "unicode.h"
33
- #include "list.h"
34
- #include <stddef.h>
35
- #ifdef HAVE_GETTIMEOFDAY
36
- #include <sys/time.h>
37
- #endif
38
-
39
- /*
40
- * wchar_t should always exist (ANSI), but WCHAR.H is sometimes missing
41
- * USE_UNICODE should be removed or replaced by HAVE_WCHAR_H in future
42
- */
43
- #ifdef HAVE_WCHAR_H
44
- #define USE_UNICODE 1
45
- #endif
46
-
47
- /* extern "C"{ */
48
- /* ------------------------ feature extraction ----------------- */
49
- #define AT 7 /* mark */
50
- #define M1 1 /* mark */
51
- enum direction {
52
- UP=1, DO, RI, LE
53
- };
54
- typedef enum direction DIRECTION;
55
- #define ST 7 /* stop */
56
- /* ------------------------------------------------------------- */
57
- /* detect maximas in of line overlapps (return in %) and line koord */
58
- #define HOR 1 /* horizontal */
59
- #define VER 2 /* vertikal */
60
- #define RIS 3 /* rising=steigend */
61
- #define FAL 4 /* falling=fallend */
62
-
63
- #define MAXlines 1024
64
-
65
- /* ToDo: if we have a tree instead of a list, a line could be a node object */
66
- struct tlines {
67
- int num;
68
- int dx, dy; /* direction of text lines (straight/skew) */
69
- int m1[MAXlines], /* start of line = upper bound of 'A' */
70
- m2[MAXlines], /* upper bound of 'e' */
71
- m3[MAXlines], /* lower bound of 'e' = baseline */
72
- m4[MAXlines]; /* stop of line = lower bound of 'q' */
73
- /* ToDo: add sureness per m1,m2 etc? */
74
- int x0[MAXlines],
75
- x1[MAXlines]; /* left and right border */
76
- int wt[MAXlines]; /* weight, how sure thats correct in percent, v0.41 */
77
- int pitch[MAXlines]; /* word pitch (later per box?), v0.41 */
78
- int mono[MAXlines]; /* spacing type, 0=proportional, 1=monospaced */
79
- };
80
-
81
- #define NumAlt 10 /* maximal number of alternative chars (table length) */
82
- #define MaxNumFrames 8 /* maximum number of frames per char/box */
83
- #define MaxFrameVectors 128 /* maximum vectors per frame (*8=1KB/box) */
84
- /* ToDo: use only malloc_box(),free_box(),copybox() for creation, destroy etc.
85
- * adding reference_counter to avoid pointer pointing to freed box
86
- */
87
- struct box { /* this structure should contain all pixel infos of a letter */
88
- int x0,x1,y0,y1,x,y,dots; /* xmin,xmax,ymin,ymax,reference-pixel,i-dots */
89
- int num_boxes, /* 1 "abc", 2 "!i?", 3 "&auml;" (composed objects) 0.41 */
90
- num_subboxes; /* 1 for "abdegopqADOPQR", 2 for "B" (holes) 0.41 */
91
- wchar_t c; /* detected char (same as tac[0], obsolete?) */
92
- wchar_t modifier; /* default=0, see compose() in unicode.c */
93
- int num; /* same number = same char */
94
- int line; /* line number (points to struct tlines lines) */
95
- int m1,m2,m3,m4; /* m2 = upper boundary, m3 = baseline */
96
- /* planed: sizeof hole_1, hole_2, certainty (run1=100%,run2=90%,etc.) */
97
- pix *p; /* pointer to pixmap (v0.2.5) */
98
- /* tac, wac is used together with setac() to manage very similar chars */
99
- int num_ac; /* length of table (alternative chars), default=0 */
100
- wchar_t tac[NumAlt]; /* alternative chars, only used by setac(),getac() */
101
- int wac[NumAlt]; /* weight of alternative chars */
102
- char *tas[NumAlt]; /* alternative UTF8-strings or XML codes if tac[]=0 */
103
- /* replacing old obj */
104
- /* ToDo: (*obj)[NumAlt] + olen[NumAlt] ??? */
105
- /* ToDo: bitmap for possible Picture|Object|Char ??? */
106
- /* char *obj; */ /* pointer to text-object ... -> replaced by tas[] */
107
- /* ... (melted chars, barcode, picture coords, ...) */
108
- /* must be freed before box is freed! */
109
- /* do _not_ copy only the pointer to object */
110
- /* --------------------------------------------------------
111
- * extension since v0.41 js05, Store frame vectors,
112
- * which is a table of vectors sourrounding the char and its
113
- * inner white holes. The advantage is the independence from
114
- * resolution, handling of holes, overlap and rotation.
115
- * --------------------------------------------------------- */
116
- int num_frames; /* number of frames: 1 for cfhklmnrstuvwxyz */
117
- /* 2 for abdegijopq */
118
- int frame_vol[MaxNumFrames]; /* volume inside frame +/- (black/white) */
119
- int frame_per[MaxNumFrames]; /* periphery, summed length of vectors */
120
- int num_frame_vectors[MaxNumFrames]; /* index to next frame */
121
- /* biggest frame should be stored first (outer frame) */
122
- /* biggest has the maximum pair distance */
123
- /* num vector loops */
124
- int frame_vector[MaxFrameVectors][2]; /* may be 16*int=fixpoint_number */
125
-
126
- };
127
- typedef struct box Box;
128
-
129
- /* true if the coordination pair (a,b) is outside the image p */
130
- #define outbounds(p, a, b) (a < 0 || b < 0 || a >= (p)->x || b >= (p)->y)
131
-
132
- /* ToDo: this structure seems to be obsolete, remove it */
133
- typedef struct path {
134
- int start; /* color at the beginning of the path, (0=white, 1=black) */
135
- int *x; /* x coordinates of transitions */
136
- int *y; /* y coordinates of transitions */
137
- int num; /* current number of entries in x or y */
138
- int max; /* maximum number of entries in x or y */
139
- /* (if more values need to be stored, the arrays are enlarged) */
140
- } path_t;
141
-
142
- /* job_t contains all information needed for an OCR task */
143
- typedef struct job_s {
144
- struct { /* source data */
145
- char *fname; /* input filename; default value: "-" */
146
- pix p; /* source pixel data, pixelmap 8bit gray */
147
- } src;
148
- struct { /* temporary stuff, e.g. buffers */
149
- #ifdef HAVE_GETTIMEOFDAY
150
- struct timeval init_time; /* starting time of this job */
151
- #endif
152
- pix ppo; /* pixmap for visual debugging output, obsolete */
153
-
154
- /* sometimes recognition function is called again and again, if result was 0
155
- n_run tells the pixel function to return alternative results */
156
- int n_run; /* num of run, if run_2 critical pattern get other results */
157
- /* used for 2nd try, pixel uses slower filter function etc. */
158
- List dblist; /* list of boxes loaded from the character database */
159
- } tmp;
160
- struct { /* results */
161
- List boxlist; /* store every object in a box, which contains */
162
- /* the characteristics of the object (see struct box) */
163
- List linelist; /* recognized text lines after recognition */
164
-
165
- struct tlines lines; /* used to access to line-data (statistics) */
166
- /* here the positions (frames) of lines are */
167
- /* stored for further use */
168
- int avX,avY; /* average X,Y (avX=sumX/numC) */
169
- int sumX,sumY,numC; /* sum of all X,Y; num chars */
170
- } res;
171
- struct { /* configuration */
172
- int cs; /* critical grey value (pixel<cs => black pixel) */
173
- /* range: 0..255, 0 means autodetection */
174
- int spc; /* spacewidth/dots (0 = autodetect); default value: 0 */
175
- int mode; /* operation modes; default value: 0 */
176
- /* operation mode (see --help) */
177
- int dust_size; /* dust size; default value: 10 */
178
- int only_numbers; /* numbers only; default value: 0 */
179
- int verbose; /* verbose mode; default value: 0 */
180
- /* verbose option (see --help) */
181
- FORMAT out_format; /* output format; default value: ISO8859_1*/
182
- char *lc; /* debuglist of chars (_ = not recognized chars) */
183
- /* default value: "_" */
184
- char *db_path; /* pathname for database; default value: NULL */
185
- char *cfilter; /* char filter; default value: NULL, ex: "A-Za-z" */
186
- /* limit of certainty where chars are accepted as identified */
187
- int certainty; /* in units of 100 (percent); 0..100; default 95 */
188
- char *unrec_marker; /* output this string for every unrecognized char */
189
- } cfg;
190
- } job_t;
191
-
192
- /* initialze job structure */
193
- void job_init(job_t *job);
194
-
195
- /* free job structure */
196
- void job_free(job_t *job);
197
-
198
- /*FIXME jb: remove JOB; */
199
- extern job_t *JOB;
200
-
201
- /* calculate the overlapp of the line (0-1) with black points
202
- * by rekursiv bisection
203
- * (evl. Fehlertoleranz mit pixel in Umgebung dx,dy suchen) (umschaltbar) ???
204
- * MidPoint Line Algorithm (Bresenham) Foley: ComputerGraphics better?
205
- * will be replaced by vector functions
206
- */
207
-
208
- /* gerade y=dy/dx*x+b, implizit d=F(x,y)=dy*x-dx*y+b*dx=0
209
- * incrementell y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y)) */
210
- int get_line(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
211
- int get_line2(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
212
-
213
- /* look for white 0x02 or black 0x01 dots (0x03 = white+black) */
214
- char get_bw(int x0, int x1, int y0, int y1,
215
- pix *p, int cs,int mask);
216
-
217
- /* look for black crossing a line x0,y0,x1,y1
218
- * follow line and count crossings ([white]-black-transitions)
219
- */
220
- int num_cross(int x0, int x1, int y0, int y1,
221
- pix *p, int cs);
222
-
223
- /* memory allocation with error checking */
224
- void *xrealloc(void *ptr, size_t size);
225
-
226
- /* follow a line x0,y0,x1,y1 recording locations of transitions,
227
- * return count of transitions
228
- */
229
- int follow_path(int x0, int x1, int y0, int y1, pix *p, int cs, path_t *path);
230
-
231
- /* -------------------------------------------------------------
232
- * mark edge-points
233
- * - first move forward until b/w-edge
234
- * - more than 2 pixel?
235
- * - loop around
236
- * - if forward pixel : go up, rotate right
237
- * - if forward no pixel : rotate left
238
- * - stop if found first 2 pixel in same order
239
- * mit an rechter-Wand-entlang-gehen strategie
240
- * --------------------------------------------------------------
241
- * turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
242
- * out: last-position
243
- * Zaehle dabei, Schritte,Sackgassen,xmax,ymax,ro-,ru-,lo-,lu-Ecken
244
- * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
245
- *
246
- * is this the right place for declaration?
247
- */
248
- void turmite(pix *p, int *x, int *y,
249
- int x0, int x1, int y0, int y1, int cs, int rw, int rb);
250
-
251
- /* test if points are connected via t-pixel (rekursiv!) */
252
- int joined(pix *p, int x0, int y0, int x1, int y1, int cs);
253
-
254
- /* move from x,y to direction r until pixel or l steps
255
- * return number of steps
256
- */
257
- int loop(pix *p, int x, int y, int l, int cs, int col, DIRECTION r);
258
-
259
- #define MAX_HOLES 3
260
- typedef struct list_holes {
261
- int num; /* numbers of holes, initialize with 0 */
262
- struct hole_s {
263
- int size,x,y,x0,y0,x1,y1; /* size, start point, outer rectangle */
264
- } hole[MAX_HOLES];
265
- } holes_t;
266
-
267
- /* look for white holes surrounded by black points
268
- * at moment white point with black in all four directions
269
- */
270
- int num_hole(int x0, int x1, int y0, int y1, pix *p, int cs, holes_t *holes);
271
-
272
- /* count for black nonconnected objects --- used for i,auml,ouml,etc. */
273
- int num_obj(int x0, int x1, int y0, int y1, pix *p, int cs);
274
-
275
- int distance( pix *p1, struct box *box1, /* box-frame */
276
- pix *p2, struct box *box2, int cs);
277
-
278
- /* call the OCR engine ;) */
279
- /* char whatletter(struct box *box1,int cs); */
280
-
281
- /* declared in pixel.c */
282
- /* getpixel() was pixel() but it may collide with netpnm pixel declaration */
283
- int getpixel(pix *p, int x, int y);
284
- int marked(pix *p, int x, int y);
285
- void put(pix * p, int x, int y, int ia, int io);
286
-
287
- /* } */ /* extern C */
288
- #endif /* __GOCR_H__ */