isbn 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. data/{README → README.md} +5 -11
  2. data/Rakefile +20 -14
  3. data/isbn.gemspec +23 -0
  4. data/lib/isbn.rb +2 -0
  5. data/test/isbn_spec.rb +1 -1
  6. metadata +29 -316
  7. data/VERSION +0 -1
  8. data/src/gocr-0.48/.cvsignore +0 -6
  9. data/src/gocr-0.48/AUTHORS +0 -7
  10. data/src/gocr-0.48/BUGS +0 -55
  11. data/src/gocr-0.48/CREDITS +0 -17
  12. data/src/gocr-0.48/HISTORY +0 -243
  13. data/src/gocr-0.48/INSTALL +0 -83
  14. data/src/gocr-0.48/Makefile +0 -193
  15. data/src/gocr-0.48/Makefile.in +0 -193
  16. data/src/gocr-0.48/README +0 -165
  17. data/src/gocr-0.48/READMEde.txt +0 -80
  18. data/src/gocr-0.48/REMARK.txt +0 -18
  19. data/src/gocr-0.48/REVIEW +0 -538
  20. data/src/gocr-0.48/TODO +0 -65
  21. data/src/gocr-0.48/bin/.cvsignore +0 -2
  22. data/src/gocr-0.48/bin/create_db +0 -38
  23. data/src/gocr-0.48/bin/gocr.tcl +0 -527
  24. data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
  25. data/src/gocr-0.48/configure +0 -4689
  26. data/src/gocr-0.48/configure.in +0 -71
  27. data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
  28. data/src/gocr-0.48/doc/.cvsignore +0 -2
  29. data/src/gocr-0.48/doc/Makefile +0 -39
  30. data/src/gocr-0.48/doc/Makefile.in +0 -39
  31. data/src/gocr-0.48/doc/example.dtd +0 -53
  32. data/src/gocr-0.48/doc/example.xml +0 -21
  33. data/src/gocr-0.48/doc/examples.txt +0 -67
  34. data/src/gocr-0.48/doc/gocr.html +0 -578
  35. data/src/gocr-0.48/doc/unicode.txt +0 -57
  36. data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
  37. data/src/gocr-0.48/examples/4x6.png +0 -0
  38. data/src/gocr-0.48/examples/4x6.txt +0 -2
  39. data/src/gocr-0.48/examples/5x7.png +0 -0
  40. data/src/gocr-0.48/examples/5x7.png.txt +0 -2
  41. data/src/gocr-0.48/examples/5x8.png +0 -0
  42. data/src/gocr-0.48/examples/5x8.png.txt +0 -2
  43. data/src/gocr-0.48/examples/Makefile +0 -166
  44. data/src/gocr-0.48/examples/color.fig +0 -20
  45. data/src/gocr-0.48/examples/ex.fig +0 -16
  46. data/src/gocr-0.48/examples/font.tex +0 -22
  47. data/src/gocr-0.48/examples/font1.tex +0 -46
  48. data/src/gocr-0.48/examples/font2.fig +0 -27
  49. data/src/gocr-0.48/examples/font_nw.tex +0 -24
  50. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  51. data/src/gocr-0.48/examples/handwrt1.txt +0 -10
  52. data/src/gocr-0.48/examples/inverse.fig +0 -20
  53. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
  56. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-a.txt +0 -6
  58. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  59. data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
  60. data/src/gocr-0.48/examples/polish.tex +0 -28
  61. data/src/gocr-0.48/examples/rotate45.fig +0 -14
  62. data/src/gocr-0.48/examples/score +0 -36
  63. data/src/gocr-0.48/examples/text.tex +0 -28
  64. data/src/gocr-0.48/gpl.html +0 -537
  65. data/src/gocr-0.48/include/.cvsignore +0 -2
  66. data/src/gocr-0.48/include/config.h +0 -36
  67. data/src/gocr-0.48/include/config.h.in +0 -36
  68. data/src/gocr-0.48/include/version.h +0 -2
  69. data/src/gocr-0.48/install-sh +0 -3
  70. data/src/gocr-0.48/make.bat +0 -57
  71. data/src/gocr-0.48/man/.cvsignore +0 -2
  72. data/src/gocr-0.48/man/Makefile +0 -29
  73. data/src/gocr-0.48/man/Makefile.in +0 -29
  74. data/src/gocr-0.48/man/man1/gocr.1 +0 -166
  75. data/src/gocr-0.48/src/.cvsignore +0 -4
  76. data/src/gocr-0.48/src/Makefile +0 -132
  77. data/src/gocr-0.48/src/Makefile.in +0 -132
  78. data/src/gocr-0.48/src/amiga.h +0 -31
  79. data/src/gocr-0.48/src/barcode.c +0 -846
  80. data/src/gocr-0.48/src/barcode.c.orig +0 -593
  81. data/src/gocr-0.48/src/barcode.h +0 -11
  82. data/src/gocr-0.48/src/box.c +0 -372
  83. data/src/gocr-0.48/src/database.c +0 -462
  84. data/src/gocr-0.48/src/detect.c +0 -943
  85. data/src/gocr-0.48/src/gocr.c +0 -373
  86. data/src/gocr-0.48/src/gocr.h +0 -288
  87. data/src/gocr-0.48/src/jconv.c +0 -168
  88. data/src/gocr-0.48/src/job.c +0 -84
  89. data/src/gocr-0.48/src/lines.c +0 -350
  90. data/src/gocr-0.48/src/list.c +0 -334
  91. data/src/gocr-0.48/src/list.h +0 -90
  92. data/src/gocr-0.48/src/ocr0.c +0 -6756
  93. data/src/gocr-0.48/src/ocr0.h +0 -63
  94. data/src/gocr-0.48/src/ocr0n.c +0 -1475
  95. data/src/gocr-0.48/src/ocr1.c +0 -85
  96. data/src/gocr-0.48/src/ocr1.h +0 -3
  97. data/src/gocr-0.48/src/otsu.c +0 -289
  98. data/src/gocr-0.48/src/otsu.h +0 -23
  99. data/src/gocr-0.48/src/output.c +0 -289
  100. data/src/gocr-0.48/src/output.h +0 -37
  101. data/src/gocr-0.48/src/pcx.c +0 -153
  102. data/src/gocr-0.48/src/pcx.h +0 -9
  103. data/src/gocr-0.48/src/pgm2asc.c +0 -2893
  104. data/src/gocr-0.48/src/pgm2asc.h +0 -105
  105. data/src/gocr-0.48/src/pixel.c +0 -537
  106. data/src/gocr-0.48/src/pnm.c +0 -533
  107. data/src/gocr-0.48/src/pnm.h +0 -35
  108. data/src/gocr-0.48/src/progress.c +0 -87
  109. data/src/gocr-0.48/src/progress.h +0 -42
  110. data/src/gocr-0.48/src/remove.c +0 -703
  111. data/src/gocr-0.48/src/tga.c +0 -87
  112. data/src/gocr-0.48/src/tga.h +0 -6
  113. data/src/gocr-0.48/src/unicode.c +0 -1314
  114. data/src/gocr-0.48/src/unicode.h +0 -1257
  115. data/src/jpeg-7/Makefile.am +0 -133
  116. data/src/jpeg-7/Makefile.in +0 -1089
  117. data/src/jpeg-7/README +0 -322
  118. data/src/jpeg-7/aclocal.m4 +0 -8990
  119. data/src/jpeg-7/ansi2knr.1 +0 -36
  120. data/src/jpeg-7/ansi2knr.c +0 -739
  121. data/src/jpeg-7/cderror.h +0 -132
  122. data/src/jpeg-7/cdjpeg.c +0 -181
  123. data/src/jpeg-7/cdjpeg.h +0 -187
  124. data/src/jpeg-7/change.log +0 -270
  125. data/src/jpeg-7/cjpeg.1 +0 -325
  126. data/src/jpeg-7/cjpeg.c +0 -616
  127. data/src/jpeg-7/ckconfig.c +0 -402
  128. data/src/jpeg-7/coderules.txt +0 -118
  129. data/src/jpeg-7/config.guess +0 -1561
  130. data/src/jpeg-7/config.sub +0 -1686
  131. data/src/jpeg-7/configure +0 -17139
  132. data/src/jpeg-7/configure.ac +0 -317
  133. data/src/jpeg-7/depcomp +0 -630
  134. data/src/jpeg-7/djpeg.1 +0 -251
  135. data/src/jpeg-7/djpeg.c +0 -617
  136. data/src/jpeg-7/example.c +0 -433
  137. data/src/jpeg-7/filelist.txt +0 -215
  138. data/src/jpeg-7/install-sh +0 -520
  139. data/src/jpeg-7/install.txt +0 -1097
  140. data/src/jpeg-7/jaricom.c +0 -148
  141. data/src/jpeg-7/jcapimin.c +0 -282
  142. data/src/jpeg-7/jcapistd.c +0 -161
  143. data/src/jpeg-7/jcarith.c +0 -921
  144. data/src/jpeg-7/jccoefct.c +0 -453
  145. data/src/jpeg-7/jccolor.c +0 -459
  146. data/src/jpeg-7/jcdctmgr.c +0 -482
  147. data/src/jpeg-7/jchuff.c +0 -1612
  148. data/src/jpeg-7/jcinit.c +0 -65
  149. data/src/jpeg-7/jcmainct.c +0 -293
  150. data/src/jpeg-7/jcmarker.c +0 -667
  151. data/src/jpeg-7/jcmaster.c +0 -770
  152. data/src/jpeg-7/jcomapi.c +0 -106
  153. data/src/jpeg-7/jconfig.bcc +0 -48
  154. data/src/jpeg-7/jconfig.cfg +0 -45
  155. data/src/jpeg-7/jconfig.dj +0 -38
  156. data/src/jpeg-7/jconfig.mac +0 -43
  157. data/src/jpeg-7/jconfig.manx +0 -43
  158. data/src/jpeg-7/jconfig.mc6 +0 -52
  159. data/src/jpeg-7/jconfig.sas +0 -43
  160. data/src/jpeg-7/jconfig.st +0 -42
  161. data/src/jpeg-7/jconfig.txt +0 -155
  162. data/src/jpeg-7/jconfig.vc +0 -45
  163. data/src/jpeg-7/jconfig.vms +0 -37
  164. data/src/jpeg-7/jconfig.wat +0 -38
  165. data/src/jpeg-7/jcparam.c +0 -632
  166. data/src/jpeg-7/jcprepct.c +0 -358
  167. data/src/jpeg-7/jcsample.c +0 -545
  168. data/src/jpeg-7/jctrans.c +0 -381
  169. data/src/jpeg-7/jdapimin.c +0 -396
  170. data/src/jpeg-7/jdapistd.c +0 -275
  171. data/src/jpeg-7/jdarith.c +0 -762
  172. data/src/jpeg-7/jdatadst.c +0 -151
  173. data/src/jpeg-7/jdatasrc.c +0 -212
  174. data/src/jpeg-7/jdcoefct.c +0 -736
  175. data/src/jpeg-7/jdcolor.c +0 -396
  176. data/src/jpeg-7/jdct.h +0 -393
  177. data/src/jpeg-7/jddctmgr.c +0 -382
  178. data/src/jpeg-7/jdhuff.c +0 -1309
  179. data/src/jpeg-7/jdinput.c +0 -384
  180. data/src/jpeg-7/jdmainct.c +0 -512
  181. data/src/jpeg-7/jdmarker.c +0 -1360
  182. data/src/jpeg-7/jdmaster.c +0 -663
  183. data/src/jpeg-7/jdmerge.c +0 -400
  184. data/src/jpeg-7/jdpostct.c +0 -290
  185. data/src/jpeg-7/jdsample.c +0 -361
  186. data/src/jpeg-7/jdtrans.c +0 -136
  187. data/src/jpeg-7/jerror.c +0 -252
  188. data/src/jpeg-7/jerror.h +0 -304
  189. data/src/jpeg-7/jfdctflt.c +0 -174
  190. data/src/jpeg-7/jfdctfst.c +0 -230
  191. data/src/jpeg-7/jfdctint.c +0 -4348
  192. data/src/jpeg-7/jidctflt.c +0 -242
  193. data/src/jpeg-7/jidctfst.c +0 -368
  194. data/src/jpeg-7/jidctint.c +0 -5137
  195. data/src/jpeg-7/jinclude.h +0 -91
  196. data/src/jpeg-7/jmemansi.c +0 -167
  197. data/src/jpeg-7/jmemdos.c +0 -638
  198. data/src/jpeg-7/jmemdosa.asm +0 -379
  199. data/src/jpeg-7/jmemmac.c +0 -289
  200. data/src/jpeg-7/jmemmgr.c +0 -1118
  201. data/src/jpeg-7/jmemname.c +0 -276
  202. data/src/jpeg-7/jmemnobs.c +0 -109
  203. data/src/jpeg-7/jmemsys.h +0 -198
  204. data/src/jpeg-7/jmorecfg.h +0 -369
  205. data/src/jpeg-7/jpegint.h +0 -395
  206. data/src/jpeg-7/jpeglib.h +0 -1135
  207. data/src/jpeg-7/jpegtran.1 +0 -272
  208. data/src/jpeg-7/jpegtran.c +0 -546
  209. data/src/jpeg-7/jquant1.c +0 -856
  210. data/src/jpeg-7/jquant2.c +0 -1310
  211. data/src/jpeg-7/jutils.c +0 -179
  212. data/src/jpeg-7/jversion.h +0 -14
  213. data/src/jpeg-7/libjpeg.map +0 -4
  214. data/src/jpeg-7/libjpeg.txt +0 -3067
  215. data/src/jpeg-7/ltmain.sh +0 -8406
  216. data/src/jpeg-7/makcjpeg.st +0 -36
  217. data/src/jpeg-7/makdjpeg.st +0 -36
  218. data/src/jpeg-7/makeadsw.vc6 +0 -77
  219. data/src/jpeg-7/makeasln.vc9 +0 -33
  220. data/src/jpeg-7/makecdep.vc6 +0 -82
  221. data/src/jpeg-7/makecdsp.vc6 +0 -130
  222. data/src/jpeg-7/makecmak.vc6 +0 -159
  223. data/src/jpeg-7/makecvcp.vc9 +0 -186
  224. data/src/jpeg-7/makeddep.vc6 +0 -82
  225. data/src/jpeg-7/makeddsp.vc6 +0 -130
  226. data/src/jpeg-7/makedmak.vc6 +0 -159
  227. data/src/jpeg-7/makedvcp.vc9 +0 -186
  228. data/src/jpeg-7/makefile.ansi +0 -220
  229. data/src/jpeg-7/makefile.bcc +0 -291
  230. data/src/jpeg-7/makefile.dj +0 -226
  231. data/src/jpeg-7/makefile.manx +0 -220
  232. data/src/jpeg-7/makefile.mc6 +0 -255
  233. data/src/jpeg-7/makefile.mms +0 -224
  234. data/src/jpeg-7/makefile.sas +0 -258
  235. data/src/jpeg-7/makefile.unix +0 -234
  236. data/src/jpeg-7/makefile.vc +0 -217
  237. data/src/jpeg-7/makefile.vms +0 -142
  238. data/src/jpeg-7/makefile.wat +0 -239
  239. data/src/jpeg-7/makejdep.vc6 +0 -423
  240. data/src/jpeg-7/makejdsp.vc6 +0 -285
  241. data/src/jpeg-7/makejdsw.vc6 +0 -29
  242. data/src/jpeg-7/makejmak.vc6 +0 -425
  243. data/src/jpeg-7/makejsln.vc9 +0 -17
  244. data/src/jpeg-7/makejvcp.vc9 +0 -328
  245. data/src/jpeg-7/makeproj.mac +0 -213
  246. data/src/jpeg-7/makerdep.vc6 +0 -6
  247. data/src/jpeg-7/makerdsp.vc6 +0 -78
  248. data/src/jpeg-7/makermak.vc6 +0 -110
  249. data/src/jpeg-7/makervcp.vc9 +0 -133
  250. data/src/jpeg-7/maketdep.vc6 +0 -43
  251. data/src/jpeg-7/maketdsp.vc6 +0 -122
  252. data/src/jpeg-7/maketmak.vc6 +0 -131
  253. data/src/jpeg-7/maketvcp.vc9 +0 -178
  254. data/src/jpeg-7/makewdep.vc6 +0 -6
  255. data/src/jpeg-7/makewdsp.vc6 +0 -78
  256. data/src/jpeg-7/makewmak.vc6 +0 -110
  257. data/src/jpeg-7/makewvcp.vc9 +0 -133
  258. data/src/jpeg-7/makljpeg.st +0 -68
  259. data/src/jpeg-7/maktjpeg.st +0 -30
  260. data/src/jpeg-7/makvms.opt +0 -4
  261. data/src/jpeg-7/missing +0 -376
  262. data/src/jpeg-7/rdbmp.c +0 -439
  263. data/src/jpeg-7/rdcolmap.c +0 -253
  264. data/src/jpeg-7/rdgif.c +0 -38
  265. data/src/jpeg-7/rdjpgcom.1 +0 -63
  266. data/src/jpeg-7/rdjpgcom.c +0 -515
  267. data/src/jpeg-7/rdppm.c +0 -459
  268. data/src/jpeg-7/rdrle.c +0 -387
  269. data/src/jpeg-7/rdswitch.c +0 -365
  270. data/src/jpeg-7/rdtarga.c +0 -500
  271. data/src/jpeg-7/structure.txt +0 -945
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +0 -4
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +0 -1533
  279. data/src/jpeg-7/transupp.h +0 -205
  280. data/src/jpeg-7/usage.txt +0 -605
  281. data/src/jpeg-7/wizard.txt +0 -211
  282. data/src/jpeg-7/wrbmp.c +0 -442
  283. data/src/jpeg-7/wrgif.c +0 -399
  284. data/src/jpeg-7/wrjpgcom.1 +0 -103
  285. data/src/jpeg-7/wrjpgcom.c +0 -583
  286. data/src/jpeg-7/wrppm.c +0 -269
  287. data/src/jpeg-7/wrrle.c +0 -305
  288. data/src/jpeg-7/wrtarga.c +0 -253
@@ -1,373 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2009 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
-
21
- sometimes I have written comments in german language, sorry for that
22
-
23
- This file was retrieved from pgm2asc.cc of Joerg, in order to have
24
- a library of the ocr-engine from Klaas Freitag
25
-
26
- */
27
- #include "config.h"
28
- #include <stdlib.h>
29
- #include <stdio.h>
30
- #include <assert.h>
31
- #include <string.h>
32
- #ifdef HAVE_GETTIMEOFDAY
33
- #include <sys/time.h>
34
- #endif
35
- #ifdef HAVE_UNISTD_H
36
- #include <unistd.h>
37
- #endif
38
-
39
- #include "pnm.h"
40
- #include "pgm2asc.h"
41
- #include "pcx.h"
42
- #include "ocr0.h" /* only_numbers */
43
- #include "progress.h"
44
- #include "version.h"
45
-
46
- static void out_version(int v) {
47
- fprintf(stderr, " Optical Character Recognition --- gocr "
48
- version_string " " release_string "\n"
49
- " Copyright (C) 2001-2009 Joerg Schulenburg GPG=1024D/53BDFBE3\n"
50
- " released under the GNU General Public License\n");
51
- /* as recommended, (c) and license should be part of the binary */
52
- /* no email because of SPAM, see README for contacting the author */
53
- if (v)
54
- fprintf(stderr, " use option -h for help\n");
55
- if (v & 2)
56
- exit(1);
57
- return;
58
- }
59
-
60
- static void help(void) {
61
- out_version(0);
62
- /* output is shortened to essentials, see manual page for details */
63
- fprintf(stderr,
64
- " using: gocr [options] pnm_file_name # use - for stdin\n"
65
- " options (see gocr manual pages for more details):\n"
66
- " -h, --help\n"
67
- " -i name - input image file (pnm,pgm,pbm,ppm,pcx,...)\n"
68
- " -o name - output file (redirection of stdout)\n"
69
- " -e name - logging file (redirection of stderr)\n"
70
- " -x name - progress output to fifo (see manual)\n"
71
- " -p name - database path including final slash (default is ./db/)\n");
72
- fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
73
- " -f fmt - output format (ISO8859_1 TeX HTML XML UTF8 ASCII)\n"
74
- " -l num - threshold grey level 0<160<=255 (0 = autodetect)\n"
75
- " -d num - dust_size (remove small clusters, -1 = autodetect)\n"
76
- " -s num - spacewidth/dots (0 = autodetect)\n"
77
- " -v num - verbose (see manual page)\n"
78
- " -c string - list of chars (debugging, see manual)\n"
79
- " -C string - char filter (ex. hexdigits: ""0-9A-Fx"", only ASCII)\n"
80
- " -m num - operation modes (bitpattern, see manual)\n");
81
- fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
82
- " -a num - value of certainty (in percent, 0..100, default=95)\n"
83
- " -u string - output this string for every unrecognized character\n");
84
- fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
85
- " examples:\n"
86
- "\tgocr -m 4 text1.pbm # do layout analyzis\n"
87
- "\tgocr -m 130 -p ./database/ text1.pbm # extend database\n"
88
- "\tdjpeg -pnm -gray text.jpg | gocr - # use jpeg-file via pipe\n"
89
- "\n");
90
- fprintf(stderr, " webpage: http://jocr.sourceforge.net/\n");
91
- exit(0);
92
- }
93
-
94
- #ifdef HAVE_GETTIMEOFDAY
95
- /* from the glibc documentation */
96
- static int timeval_subtract (struct timeval *result, struct timeval *x,
97
- struct timeval *y) {
98
-
99
- /* Perform the carry for the later subtraction by updating Y. */
100
- if (x->tv_usec < y->tv_usec) {
101
- int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
102
- y->tv_usec -= 1000000 * nsec;
103
- y->tv_sec += nsec;
104
- }
105
- if (x->tv_usec - y->tv_usec > 1000000) {
106
- int nsec = (x->tv_usec - y->tv_usec) / 1000000;
107
- y->tv_usec += 1000000 * nsec;
108
- y->tv_sec -= nsec;
109
- }
110
-
111
- /* Compute the time remaining to wait.
112
- `tv_usec' is certainly positive. */
113
- result->tv_sec = x->tv_sec - y->tv_sec;
114
- result->tv_usec = x->tv_usec - y->tv_usec;
115
-
116
- /* Return 1 if result is negative. */
117
- return x->tv_sec < y->tv_sec;
118
- }
119
- #endif
120
-
121
- static void process_arguments(job_t *job, int argn, char *argv[])
122
- {
123
- int i;
124
- char *s1;
125
-
126
- assert(job);
127
-
128
- if (argn <= 1) {
129
- out_version(1);
130
- exit(0);
131
- }
132
- #ifdef HAVE_PGM_H
133
- pnm_init(&argn, &argv);
134
- #endif
135
-
136
- /* process arguments */
137
- for (i = 1; i < argn; i++) {
138
- if (strcmp(argv[i], "--help") == 0)
139
- help(); /* and quits */
140
- if (argv[i][0] == '-' && argv[i][1] != 0) {
141
- s1 = "";
142
- if (i + 1 < argn)
143
- s1 = argv[i + 1];
144
- switch (argv[i][1]) {
145
- case 'h': /* help */
146
- help();
147
- break;
148
- case 'i': /* input image file */
149
- job->src.fname = s1;
150
- i++;
151
- break;
152
- case 'e': /* logging file */
153
- if (s1[0] == '-' && s1[1] == '\0') {
154
- #ifdef HAVE_UNISTD_H
155
- dup2(STDOUT_FILENO, STDERR_FILENO); /* -e /dev/stdout works */
156
- #else
157
- fprintf(stderr, "stderr redirection not possible without unistd.h\n");
158
- #endif
159
- }
160
- else if (!freopen(s1, "w", stderr)) {
161
- fprintf(stderr, "stderr redirection to %s failed\n", s1);
162
- }
163
- i++;
164
- break;
165
- case 'p': /* database path */
166
- job->cfg.db_path=s1;
167
- i++;
168
- break;
169
- case 'o': /* output file */
170
- if (s1[0] == '-' && s1[1] == '\0') { /* default */
171
- }
172
- else if (!freopen(s1, "w", stdout)) {
173
- fprintf(stderr, "stdout redirection to %s failed\n", s1);
174
- };
175
- i++;
176
- break;
177
- case 'f': /* output format */
178
- if (strcmp(s1, "ISO8859_1") == 0) job->cfg.out_format=ISO8859_1; else
179
- if (strcmp(s1, "TeX") == 0) job->cfg.out_format=TeX; else
180
- if (strcmp(s1, "HTML") == 0) job->cfg.out_format=HTML; else
181
- if (strcmp(s1, "XML") == 0) job->cfg.out_format=XML; else
182
- if (strcmp(s1, "SGML") == 0) job->cfg.out_format=SGML; else
183
- if (strcmp(s1, "UTF8") == 0) job->cfg.out_format=UTF8; else
184
- if (strcmp(s1, "ASCII") == 0) job->cfg.out_format=ASCII; else
185
- fprintf(stderr,"Warning: unknown format (-f %s)\n",s1);
186
- i++;
187
- break;
188
- case 'c': /* list of chars (_ = not recognized chars) */
189
- job->cfg.lc = s1;
190
- i++;
191
- break;
192
- case 'C': /* char filter, default: NULL (all chars) */
193
- /* ToDo: UTF8 input, wchar */
194
- job->cfg.cfilter = s1;
195
- i++;
196
- break;
197
- case 'd': /* dust size */
198
- job->cfg.dust_size = atoi(s1);
199
- i++;
200
- break;
201
- case 'l': /* grey level 0<160<=255, 0 for autodetect */
202
- job->cfg.cs = atoi(s1);
203
- i++;
204
- break;
205
- case 's': /* spacewidth/dots (0 = autodetect) */
206
- job->cfg.spc = atoi(s1);
207
- i++;
208
- break;
209
- case 'v': /* verbose mode */
210
- job->cfg.verbose |= atoi(s1);
211
- i++;
212
- break;
213
- case 'm': /* operation modes */
214
- job->cfg.mode |= atoi(s1);
215
- i++;
216
- break;
217
- case 'n': /* numbers only */
218
- job->cfg.only_numbers = atoi(s1);
219
- i++;
220
- break;
221
- case 'x': /* initialize progress output s1=fname */
222
- ini_progress(s1);
223
- i++;
224
- break;
225
- case 'a': /* set certainty */
226
- job->cfg.certainty = atoi(s1);;
227
- i++;
228
- break;
229
- case 'u': /* output marker for unrecognized chars */
230
- job->cfg.unrec_marker = s1;
231
- i++;
232
- break;
233
- default:
234
- fprintf(stderr, "# unknown option use -h for help\n");
235
- }
236
- continue;
237
- }
238
- else /* argument can be filename v0.2.5 */ if (argv[i][0] != '-'
239
- || argv[i][1] == '\0' ) {
240
- job->src.fname = argv[i];
241
- }
242
- }
243
- }
244
-
245
- static void mark_start(job_t *job) {
246
- assert(job);
247
-
248
- if (job->cfg.verbose) {
249
- out_version(0);
250
- /* insert some helpful info for support */
251
- fprintf(stderr, "# compiled: " __DATE__ );
252
- #if defined(__GNUC__)
253
- fprintf(stderr, " GNUC-%d", __GNUC__ );
254
- #endif
255
- #ifdef __GNUC_MINOR__
256
- fprintf(stderr, ".%d", __GNUC_MINOR__ );
257
- #endif
258
- #if defined(__linux)
259
- fprintf(stderr, " linux");
260
- #elif defined(__unix)
261
- fprintf(stderr, " unix");
262
- #endif
263
- #if defined(__WIN32) || defined(__WIN32__)
264
- fprintf(stderr, " WIN32");
265
- #endif
266
- #if defined(__WIN64) || defined(__WIN64__)
267
- fprintf(stderr, " WIN64");
268
- #endif
269
- #if defined(__VERSION__)
270
- fprintf(stderr, " version " __VERSION__ );
271
- #endif
272
- fprintf(stderr, "\n");
273
- fprintf(stderr,
274
- "# options are: -l %d -s %d -v %d -c %s -m %d -d %d -n %d -a %d -C \"%s\"\n",
275
- job->cfg.cs, job->cfg.spc, job->cfg.verbose, job->cfg.lc, job->cfg.mode,
276
- job->cfg.dust_size, job->cfg.only_numbers, job->cfg.certainty,
277
- job->cfg.cfilter);
278
- fprintf(stderr, "# file: %s\n", job->src.fname);
279
- #ifdef USE_UNICODE
280
- fprintf(stderr,"# using unicode\n");
281
- #endif
282
- #ifdef HAVE_GETTIMEOFDAY
283
- gettimeofday(&job->tmp.init_time, NULL);
284
- #endif
285
- }
286
- }
287
-
288
- static void mark_end(job_t *job) {
289
- assert(job);
290
-
291
- #ifdef HAVE_GETTIMEOFDAY
292
- /* show elapsed time */
293
- if (job->cfg.verbose) {
294
- struct timeval end, result;
295
- gettimeofday(&end, NULL);
296
- timeval_subtract(&result, &end, &job->tmp.init_time);
297
- fprintf(stderr,"Elapsed time: %d:%02d:%3.3f.\n", (int)result.tv_sec/60,
298
- (int)result.tv_sec%60, (float)result.tv_usec/1000);
299
- }
300
- #endif
301
- }
302
-
303
- static int read_picture(job_t *job) {
304
- int rc=0;
305
- assert(job);
306
-
307
- if (strstr(job->src.fname, ".pcx"))
308
- readpcx(job->src.fname, &job->src.p, job->cfg.verbose);
309
- else
310
- rc=readpgm(job->src.fname, &job->src.p, job->cfg.verbose);
311
- return rc; /* 1 for multiple images, 0 else */
312
- }
313
-
314
- /* subject of change, we need more output for XML (ToDo) */
315
- void print_output(job_t *job) {
316
- int linecounter = 0;
317
- const char *line;
318
-
319
- assert(job);
320
-
321
- linecounter = 0;
322
- line = getTextLine(linecounter++);
323
- while (line) {
324
- /* notice: decode() is shiftet to getTextLine since 0.38 */
325
- fputs(line, stdout);
326
- if (job->cfg.out_format==HTML) fputs("<br />",stdout);
327
- if (job->cfg.out_format!=XML) fputc('\n', stdout);
328
- line = getTextLine(linecounter++);
329
- }
330
- free_textlines();
331
- }
332
-
333
- /* FIXME jb: remove JOB; */
334
- job_t *JOB;
335
-
336
-
337
- /* -------------------------------------------------------------
338
- // ------ MAIN - replace this by your own aplication!
339
- // ------------------------------------------------------------- */
340
- int main(int argn, char *argv[]) {
341
- int multipnm=1;
342
- job_t job;
343
-
344
- JOB = &job;
345
- setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
346
-
347
- while (multipnm==1) {
348
-
349
- job_init(&job);
350
-
351
- process_arguments(&job, argn, argv);
352
-
353
- mark_start(&job);
354
-
355
- multipnm = read_picture(&job);
356
- /* separation of main and rest for using as lib
357
- this will be changed later => introduction of set_option()
358
- for better communication to the engine */
359
- if (multipnm<0) break; /* read error */
360
-
361
- /* call main loop */
362
- pgm2asc(&job);
363
-
364
- mark_end(&job);
365
-
366
- print_output(&job);
367
-
368
- job_free(&job);
369
-
370
- }
371
-
372
- return 0;
373
- }
@@ -1,288 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2006 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
-
21
- sometimes I have written comments in german language, sorry for that
22
-
23
- - look for ??? for preliminary code
24
- */
25
-
26
- /* General headerfile with gocr-definitions */
27
-
28
- #ifndef __GOCR_H__
29
- #define __GOCR_H__
30
-
31
- #include "pnm.h"
32
- #include "unicode.h"
33
- #include "list.h"
34
- #include <stddef.h>
35
- #ifdef HAVE_GETTIMEOFDAY
36
- #include <sys/time.h>
37
- #endif
38
-
39
- /*
40
- * wchar_t should always exist (ANSI), but WCHAR.H is sometimes missing
41
- * USE_UNICODE should be removed or replaced by HAVE_WCHAR_H in future
42
- */
43
- #ifdef HAVE_WCHAR_H
44
- #define USE_UNICODE 1
45
- #endif
46
-
47
- /* extern "C"{ */
48
- /* ------------------------ feature extraction ----------------- */
49
- #define AT 7 /* mark */
50
- #define M1 1 /* mark */
51
- enum direction {
52
- UP=1, DO, RI, LE
53
- };
54
- typedef enum direction DIRECTION;
55
- #define ST 7 /* stop */
56
- /* ------------------------------------------------------------- */
57
- /* detect maximas in of line overlapps (return in %) and line koord */
58
- #define HOR 1 /* horizontal */
59
- #define VER 2 /* vertikal */
60
- #define RIS 3 /* rising=steigend */
61
- #define FAL 4 /* falling=fallend */
62
-
63
- #define MAXlines 1024
64
-
65
- /* ToDo: if we have a tree instead of a list, a line could be a node object */
66
- struct tlines {
67
- int num;
68
- int dx, dy; /* direction of text lines (straight/skew) */
69
- int m1[MAXlines], /* start of line = upper bound of 'A' */
70
- m2[MAXlines], /* upper bound of 'e' */
71
- m3[MAXlines], /* lower bound of 'e' = baseline */
72
- m4[MAXlines]; /* stop of line = lower bound of 'q' */
73
- /* ToDo: add sureness per m1,m2 etc? */
74
- int x0[MAXlines],
75
- x1[MAXlines]; /* left and right border */
76
- int wt[MAXlines]; /* weight, how sure thats correct in percent, v0.41 */
77
- int pitch[MAXlines]; /* word pitch (later per box?), v0.41 */
78
- int mono[MAXlines]; /* spacing type, 0=proportional, 1=monospaced */
79
- };
80
-
81
- #define NumAlt 10 /* maximal number of alternative chars (table length) */
82
- #define MaxNumFrames 8 /* maximum number of frames per char/box */
83
- #define MaxFrameVectors 128 /* maximum vectors per frame (*8=1KB/box) */
84
- /* ToDo: use only malloc_box(),free_box(),copybox() for creation, destroy etc.
85
- * adding reference_counter to avoid pointer pointing to freed box
86
- */
87
- struct box { /* this structure should contain all pixel infos of a letter */
88
- int x0,x1,y0,y1,x,y,dots; /* xmin,xmax,ymin,ymax,reference-pixel,i-dots */
89
- int num_boxes, /* 1 "abc", 2 "!i?", 3 "&auml;" (composed objects) 0.41 */
90
- num_subboxes; /* 1 for "abdegopqADOPQR", 2 for "B" (holes) 0.41 */
91
- wchar_t c; /* detected char (same as tac[0], obsolete?) */
92
- wchar_t modifier; /* default=0, see compose() in unicode.c */
93
- int num; /* same number = same char */
94
- int line; /* line number (points to struct tlines lines) */
95
- int m1,m2,m3,m4; /* m2 = upper boundary, m3 = baseline */
96
- /* planed: sizeof hole_1, hole_2, certainty (run1=100%,run2=90%,etc.) */
97
- pix *p; /* pointer to pixmap (v0.2.5) */
98
- /* tac, wac is used together with setac() to manage very similar chars */
99
- int num_ac; /* length of table (alternative chars), default=0 */
100
- wchar_t tac[NumAlt]; /* alternative chars, only used by setac(),getac() */
101
- int wac[NumAlt]; /* weight of alternative chars */
102
- char *tas[NumAlt]; /* alternative UTF8-strings or XML codes if tac[]=0 */
103
- /* replacing old obj */
104
- /* ToDo: (*obj)[NumAlt] + olen[NumAlt] ??? */
105
- /* ToDo: bitmap for possible Picture|Object|Char ??? */
106
- /* char *obj; */ /* pointer to text-object ... -> replaced by tas[] */
107
- /* ... (melted chars, barcode, picture coords, ...) */
108
- /* must be freed before box is freed! */
109
- /* do _not_ copy only the pointer to object */
110
- /* --------------------------------------------------------
111
- * extension since v0.41 js05, Store frame vectors,
112
- * which is a table of vectors sourrounding the char and its
113
- * inner white holes. The advantage is the independence from
114
- * resolution, handling of holes, overlap and rotation.
115
- * --------------------------------------------------------- */
116
- int num_frames; /* number of frames: 1 for cfhklmnrstuvwxyz */
117
- /* 2 for abdegijopq */
118
- int frame_vol[MaxNumFrames]; /* volume inside frame +/- (black/white) */
119
- int frame_per[MaxNumFrames]; /* periphery, summed length of vectors */
120
- int num_frame_vectors[MaxNumFrames]; /* index to next frame */
121
- /* biggest frame should be stored first (outer frame) */
122
- /* biggest has the maximum pair distance */
123
- /* num vector loops */
124
- int frame_vector[MaxFrameVectors][2]; /* may be 16*int=fixpoint_number */
125
-
126
- };
127
- typedef struct box Box;
128
-
129
- /* true if the coordination pair (a,b) is outside the image p */
130
- #define outbounds(p, a, b) (a < 0 || b < 0 || a >= (p)->x || b >= (p)->y)
131
-
132
- /* ToDo: this structure seems to be obsolete, remove it */
133
- typedef struct path {
134
- int start; /* color at the beginning of the path, (0=white, 1=black) */
135
- int *x; /* x coordinates of transitions */
136
- int *y; /* y coordinates of transitions */
137
- int num; /* current number of entries in x or y */
138
- int max; /* maximum number of entries in x or y */
139
- /* (if more values need to be stored, the arrays are enlarged) */
140
- } path_t;
141
-
142
- /* job_t contains all information needed for an OCR task */
143
- typedef struct job_s {
144
- struct { /* source data */
145
- char *fname; /* input filename; default value: "-" */
146
- pix p; /* source pixel data, pixelmap 8bit gray */
147
- } src;
148
- struct { /* temporary stuff, e.g. buffers */
149
- #ifdef HAVE_GETTIMEOFDAY
150
- struct timeval init_time; /* starting time of this job */
151
- #endif
152
- pix ppo; /* pixmap for visual debugging output, obsolete */
153
-
154
- /* sometimes recognition function is called again and again, if result was 0
155
- n_run tells the pixel function to return alternative results */
156
- int n_run; /* num of run, if run_2 critical pattern get other results */
157
- /* used for 2nd try, pixel uses slower filter function etc. */
158
- List dblist; /* list of boxes loaded from the character database */
159
- } tmp;
160
- struct { /* results */
161
- List boxlist; /* store every object in a box, which contains */
162
- /* the characteristics of the object (see struct box) */
163
- List linelist; /* recognized text lines after recognition */
164
-
165
- struct tlines lines; /* used to access to line-data (statistics) */
166
- /* here the positions (frames) of lines are */
167
- /* stored for further use */
168
- int avX,avY; /* average X,Y (avX=sumX/numC) */
169
- int sumX,sumY,numC; /* sum of all X,Y; num chars */
170
- } res;
171
- struct { /* configuration */
172
- int cs; /* critical grey value (pixel<cs => black pixel) */
173
- /* range: 0..255, 0 means autodetection */
174
- int spc; /* spacewidth/dots (0 = autodetect); default value: 0 */
175
- int mode; /* operation modes; default value: 0 */
176
- /* operation mode (see --help) */
177
- int dust_size; /* dust size; default value: 10 */
178
- int only_numbers; /* numbers only; default value: 0 */
179
- int verbose; /* verbose mode; default value: 0 */
180
- /* verbose option (see --help) */
181
- FORMAT out_format; /* output format; default value: ISO8859_1*/
182
- char *lc; /* debuglist of chars (_ = not recognized chars) */
183
- /* default value: "_" */
184
- char *db_path; /* pathname for database; default value: NULL */
185
- char *cfilter; /* char filter; default value: NULL, ex: "A-Za-z" */
186
- /* limit of certainty where chars are accepted as identified */
187
- int certainty; /* in units of 100 (percent); 0..100; default 95 */
188
- char *unrec_marker; /* output this string for every unrecognized char */
189
- } cfg;
190
- } job_t;
191
-
192
- /* initialze job structure */
193
- void job_init(job_t *job);
194
-
195
- /* free job structure */
196
- void job_free(job_t *job);
197
-
198
- /*FIXME jb: remove JOB; */
199
- extern job_t *JOB;
200
-
201
- /* calculate the overlapp of the line (0-1) with black points
202
- * by rekursiv bisection
203
- * (evl. Fehlertoleranz mit pixel in Umgebung dx,dy suchen) (umschaltbar) ???
204
- * MidPoint Line Algorithm (Bresenham) Foley: ComputerGraphics better?
205
- * will be replaced by vector functions
206
- */
207
-
208
- /* gerade y=dy/dx*x+b, implizit d=F(x,y)=dy*x-dx*y+b*dx=0
209
- * incrementell y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y)) */
210
- int get_line(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
211
- int get_line2(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
212
-
213
- /* look for white 0x02 or black 0x01 dots (0x03 = white+black) */
214
- char get_bw(int x0, int x1, int y0, int y1,
215
- pix *p, int cs,int mask);
216
-
217
- /* look for black crossing a line x0,y0,x1,y1
218
- * follow line and count crossings ([white]-black-transitions)
219
- */
220
- int num_cross(int x0, int x1, int y0, int y1,
221
- pix *p, int cs);
222
-
223
- /* memory allocation with error checking */
224
- void *xrealloc(void *ptr, size_t size);
225
-
226
- /* follow a line x0,y0,x1,y1 recording locations of transitions,
227
- * return count of transitions
228
- */
229
- int follow_path(int x0, int x1, int y0, int y1, pix *p, int cs, path_t *path);
230
-
231
- /* -------------------------------------------------------------
232
- * mark edge-points
233
- * - first move forward until b/w-edge
234
- * - more than 2 pixel?
235
- * - loop around
236
- * - if forward pixel : go up, rotate right
237
- * - if forward no pixel : rotate left
238
- * - stop if found first 2 pixel in same order
239
- * mit an rechter-Wand-entlang-gehen strategie
240
- * --------------------------------------------------------------
241
- * turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
242
- * out: last-position
243
- * Zaehle dabei, Schritte,Sackgassen,xmax,ymax,ro-,ru-,lo-,lu-Ecken
244
- * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
245
- *
246
- * is this the right place for declaration?
247
- */
248
- void turmite(pix *p, int *x, int *y,
249
- int x0, int x1, int y0, int y1, int cs, int rw, int rb);
250
-
251
- /* test if points are connected via t-pixel (rekursiv!) */
252
- int joined(pix *p, int x0, int y0, int x1, int y1, int cs);
253
-
254
- /* move from x,y to direction r until pixel or l steps
255
- * return number of steps
256
- */
257
- int loop(pix *p, int x, int y, int l, int cs, int col, DIRECTION r);
258
-
259
- #define MAX_HOLES 3
260
- typedef struct list_holes {
261
- int num; /* numbers of holes, initialize with 0 */
262
- struct hole_s {
263
- int size,x,y,x0,y0,x1,y1; /* size, start point, outer rectangle */
264
- } hole[MAX_HOLES];
265
- } holes_t;
266
-
267
- /* look for white holes surrounded by black points
268
- * at moment white point with black in all four directions
269
- */
270
- int num_hole(int x0, int x1, int y0, int y1, pix *p, int cs, holes_t *holes);
271
-
272
- /* count for black nonconnected objects --- used for i,auml,ouml,etc. */
273
- int num_obj(int x0, int x1, int y0, int y1, pix *p, int cs);
274
-
275
- int distance( pix *p1, struct box *box1, /* box-frame */
276
- pix *p2, struct box *box2, int cs);
277
-
278
- /* call the OCR engine ;) */
279
- /* char whatletter(struct box *box1,int cs); */
280
-
281
- /* declared in pixel.c */
282
- /* getpixel() was pixel() but it may collide with netpnm pixel declaration */
283
- int getpixel(pix *p, int x, int y);
284
- int marked(pix *p, int x, int y);
285
- void put(pix * p, int x, int y, int ia, int io);
286
-
287
- /* } */ /* extern C */
288
- #endif /* __GOCR_H__ */