isbn 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (291) hide show
  1. data/.gitignore +4 -0
  2. data/README +9 -0
  3. data/Rakefile +13 -0
  4. data/VERSION +1 -0
  5. data/isbn.gemspec +329 -0
  6. data/lib/isbn.rb +90 -0
  7. data/src/gocr-0.48/.cvsignore +6 -0
  8. data/src/gocr-0.48/AUTHORS +7 -0
  9. data/src/gocr-0.48/BUGS +55 -0
  10. data/src/gocr-0.48/CREDITS +17 -0
  11. data/src/gocr-0.48/HISTORY +243 -0
  12. data/src/gocr-0.48/INSTALL +83 -0
  13. data/src/gocr-0.48/Makefile +193 -0
  14. data/src/gocr-0.48/Makefile.in +193 -0
  15. data/src/gocr-0.48/README +165 -0
  16. data/src/gocr-0.48/READMEde.txt +80 -0
  17. data/src/gocr-0.48/REMARK.txt +18 -0
  18. data/src/gocr-0.48/REVIEW +538 -0
  19. data/src/gocr-0.48/TODO +65 -0
  20. data/src/gocr-0.48/bin/.cvsignore +2 -0
  21. data/src/gocr-0.48/bin/create_db +38 -0
  22. data/src/gocr-0.48/bin/gocr.tcl +527 -0
  23. data/src/gocr-0.48/bin/gocr_chk.sh +44 -0
  24. data/src/gocr-0.48/configure +4689 -0
  25. data/src/gocr-0.48/configure.in +71 -0
  26. data/src/gocr-0.48/doc/.#Makefile.1.6 +39 -0
  27. data/src/gocr-0.48/doc/.cvsignore +2 -0
  28. data/src/gocr-0.48/doc/Makefile +39 -0
  29. data/src/gocr-0.48/doc/Makefile.in +39 -0
  30. data/src/gocr-0.48/doc/example.dtd +53 -0
  31. data/src/gocr-0.48/doc/example.xml +21 -0
  32. data/src/gocr-0.48/doc/examples.txt +67 -0
  33. data/src/gocr-0.48/doc/gocr.html +578 -0
  34. data/src/gocr-0.48/doc/unicode.txt +57 -0
  35. data/src/gocr-0.48/examples/.#Makefile.1.22 +166 -0
  36. data/src/gocr-0.48/examples/4x6.png +0 -0
  37. data/src/gocr-0.48/examples/4x6.txt +2 -0
  38. data/src/gocr-0.48/examples/5x7.png +0 -0
  39. data/src/gocr-0.48/examples/5x7.png.txt +2 -0
  40. data/src/gocr-0.48/examples/5x8.png +0 -0
  41. data/src/gocr-0.48/examples/5x8.png.txt +2 -0
  42. data/src/gocr-0.48/examples/Makefile +166 -0
  43. data/src/gocr-0.48/examples/color.fig +20 -0
  44. data/src/gocr-0.48/examples/ex.fig +16 -0
  45. data/src/gocr-0.48/examples/font.tex +22 -0
  46. data/src/gocr-0.48/examples/font1.tex +46 -0
  47. data/src/gocr-0.48/examples/font2.fig +27 -0
  48. data/src/gocr-0.48/examples/font_nw.tex +24 -0
  49. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  50. data/src/gocr-0.48/examples/handwrt1.txt +10 -0
  51. data/src/gocr-0.48/examples/inverse.fig +20 -0
  52. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  53. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +4 -0
  55. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  56. data/src/gocr-0.48/examples/ocr-a.txt +6 -0
  57. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  58. data/src/gocr-0.48/examples/ocr-b.png.txt +4 -0
  59. data/src/gocr-0.48/examples/polish.tex +28 -0
  60. data/src/gocr-0.48/examples/rotate45.fig +14 -0
  61. data/src/gocr-0.48/examples/score +36 -0
  62. data/src/gocr-0.48/examples/text.tex +28 -0
  63. data/src/gocr-0.48/gocr.spec +143 -0
  64. data/src/gocr-0.48/gpl.html +537 -0
  65. data/src/gocr-0.48/include/.cvsignore +2 -0
  66. data/src/gocr-0.48/include/config.h +36 -0
  67. data/src/gocr-0.48/include/config.h.in +36 -0
  68. data/src/gocr-0.48/include/version.h +2 -0
  69. data/src/gocr-0.48/install-sh +3 -0
  70. data/src/gocr-0.48/make.bat +57 -0
  71. data/src/gocr-0.48/man/.cvsignore +2 -0
  72. data/src/gocr-0.48/man/Makefile +29 -0
  73. data/src/gocr-0.48/man/Makefile.in +29 -0
  74. data/src/gocr-0.48/man/man1/gocr.1 +166 -0
  75. data/src/gocr-0.48/src/.cvsignore +4 -0
  76. data/src/gocr-0.48/src/Makefile +132 -0
  77. data/src/gocr-0.48/src/Makefile.in +132 -0
  78. data/src/gocr-0.48/src/amiga.h +31 -0
  79. data/src/gocr-0.48/src/barcode.c +846 -0
  80. data/src/gocr-0.48/src/barcode.c.orig +593 -0
  81. data/src/gocr-0.48/src/barcode.h +11 -0
  82. data/src/gocr-0.48/src/box.c +372 -0
  83. data/src/gocr-0.48/src/database.c +462 -0
  84. data/src/gocr-0.48/src/detect.c +943 -0
  85. data/src/gocr-0.48/src/gocr.c +373 -0
  86. data/src/gocr-0.48/src/gocr.h +288 -0
  87. data/src/gocr-0.48/src/jconv.c +168 -0
  88. data/src/gocr-0.48/src/job.c +84 -0
  89. data/src/gocr-0.48/src/lines.c +350 -0
  90. data/src/gocr-0.48/src/list.c +334 -0
  91. data/src/gocr-0.48/src/list.h +90 -0
  92. data/src/gocr-0.48/src/ocr0.c +6756 -0
  93. data/src/gocr-0.48/src/ocr0.h +63 -0
  94. data/src/gocr-0.48/src/ocr0n.c +1475 -0
  95. data/src/gocr-0.48/src/ocr1.c +85 -0
  96. data/src/gocr-0.48/src/ocr1.h +3 -0
  97. data/src/gocr-0.48/src/otsu.c +289 -0
  98. data/src/gocr-0.48/src/otsu.h +23 -0
  99. data/src/gocr-0.48/src/output.c +289 -0
  100. data/src/gocr-0.48/src/output.h +37 -0
  101. data/src/gocr-0.48/src/pcx.c +153 -0
  102. data/src/gocr-0.48/src/pcx.h +9 -0
  103. data/src/gocr-0.48/src/pgm2asc.c +2893 -0
  104. data/src/gocr-0.48/src/pgm2asc.h +105 -0
  105. data/src/gocr-0.48/src/pixel.c +537 -0
  106. data/src/gocr-0.48/src/pnm.c +533 -0
  107. data/src/gocr-0.48/src/pnm.h +35 -0
  108. data/src/gocr-0.48/src/progress.c +87 -0
  109. data/src/gocr-0.48/src/progress.h +42 -0
  110. data/src/gocr-0.48/src/remove.c +703 -0
  111. data/src/gocr-0.48/src/tga.c +87 -0
  112. data/src/gocr-0.48/src/tga.h +6 -0
  113. data/src/gocr-0.48/src/unicode.c +1314 -0
  114. data/src/gocr-0.48/src/unicode.h +1257 -0
  115. data/src/jpeg-7/Makefile.am +133 -0
  116. data/src/jpeg-7/Makefile.in +1089 -0
  117. data/src/jpeg-7/README +322 -0
  118. data/src/jpeg-7/aclocal.m4 +8990 -0
  119. data/src/jpeg-7/ansi2knr.1 +36 -0
  120. data/src/jpeg-7/ansi2knr.c +739 -0
  121. data/src/jpeg-7/cderror.h +132 -0
  122. data/src/jpeg-7/cdjpeg.c +181 -0
  123. data/src/jpeg-7/cdjpeg.h +187 -0
  124. data/src/jpeg-7/change.log +270 -0
  125. data/src/jpeg-7/cjpeg.1 +325 -0
  126. data/src/jpeg-7/cjpeg.c +616 -0
  127. data/src/jpeg-7/ckconfig.c +402 -0
  128. data/src/jpeg-7/coderules.txt +118 -0
  129. data/src/jpeg-7/config.guess +1561 -0
  130. data/src/jpeg-7/config.sub +1686 -0
  131. data/src/jpeg-7/configure +17139 -0
  132. data/src/jpeg-7/configure.ac +317 -0
  133. data/src/jpeg-7/depcomp +630 -0
  134. data/src/jpeg-7/djpeg.1 +251 -0
  135. data/src/jpeg-7/djpeg.c +617 -0
  136. data/src/jpeg-7/example.c +433 -0
  137. data/src/jpeg-7/filelist.txt +215 -0
  138. data/src/jpeg-7/install-sh +520 -0
  139. data/src/jpeg-7/install.txt +1097 -0
  140. data/src/jpeg-7/jaricom.c +148 -0
  141. data/src/jpeg-7/jcapimin.c +282 -0
  142. data/src/jpeg-7/jcapistd.c +161 -0
  143. data/src/jpeg-7/jcarith.c +921 -0
  144. data/src/jpeg-7/jccoefct.c +453 -0
  145. data/src/jpeg-7/jccolor.c +459 -0
  146. data/src/jpeg-7/jcdctmgr.c +482 -0
  147. data/src/jpeg-7/jchuff.c +1612 -0
  148. data/src/jpeg-7/jcinit.c +65 -0
  149. data/src/jpeg-7/jcmainct.c +293 -0
  150. data/src/jpeg-7/jcmarker.c +667 -0
  151. data/src/jpeg-7/jcmaster.c +770 -0
  152. data/src/jpeg-7/jcomapi.c +106 -0
  153. data/src/jpeg-7/jconfig.bcc +48 -0
  154. data/src/jpeg-7/jconfig.cfg +45 -0
  155. data/src/jpeg-7/jconfig.dj +38 -0
  156. data/src/jpeg-7/jconfig.mac +43 -0
  157. data/src/jpeg-7/jconfig.manx +43 -0
  158. data/src/jpeg-7/jconfig.mc6 +52 -0
  159. data/src/jpeg-7/jconfig.sas +43 -0
  160. data/src/jpeg-7/jconfig.st +42 -0
  161. data/src/jpeg-7/jconfig.txt +155 -0
  162. data/src/jpeg-7/jconfig.vc +45 -0
  163. data/src/jpeg-7/jconfig.vms +37 -0
  164. data/src/jpeg-7/jconfig.wat +38 -0
  165. data/src/jpeg-7/jcparam.c +632 -0
  166. data/src/jpeg-7/jcprepct.c +358 -0
  167. data/src/jpeg-7/jcsample.c +545 -0
  168. data/src/jpeg-7/jctrans.c +381 -0
  169. data/src/jpeg-7/jdapimin.c +396 -0
  170. data/src/jpeg-7/jdapistd.c +275 -0
  171. data/src/jpeg-7/jdarith.c +762 -0
  172. data/src/jpeg-7/jdatadst.c +151 -0
  173. data/src/jpeg-7/jdatasrc.c +212 -0
  174. data/src/jpeg-7/jdcoefct.c +736 -0
  175. data/src/jpeg-7/jdcolor.c +396 -0
  176. data/src/jpeg-7/jdct.h +393 -0
  177. data/src/jpeg-7/jddctmgr.c +382 -0
  178. data/src/jpeg-7/jdhuff.c +1309 -0
  179. data/src/jpeg-7/jdinput.c +384 -0
  180. data/src/jpeg-7/jdmainct.c +512 -0
  181. data/src/jpeg-7/jdmarker.c +1360 -0
  182. data/src/jpeg-7/jdmaster.c +663 -0
  183. data/src/jpeg-7/jdmerge.c +400 -0
  184. data/src/jpeg-7/jdpostct.c +290 -0
  185. data/src/jpeg-7/jdsample.c +361 -0
  186. data/src/jpeg-7/jdtrans.c +136 -0
  187. data/src/jpeg-7/jerror.c +252 -0
  188. data/src/jpeg-7/jerror.h +304 -0
  189. data/src/jpeg-7/jfdctflt.c +174 -0
  190. data/src/jpeg-7/jfdctfst.c +230 -0
  191. data/src/jpeg-7/jfdctint.c +4348 -0
  192. data/src/jpeg-7/jidctflt.c +242 -0
  193. data/src/jpeg-7/jidctfst.c +368 -0
  194. data/src/jpeg-7/jidctint.c +5137 -0
  195. data/src/jpeg-7/jinclude.h +91 -0
  196. data/src/jpeg-7/jmemansi.c +167 -0
  197. data/src/jpeg-7/jmemdos.c +638 -0
  198. data/src/jpeg-7/jmemdosa.asm +379 -0
  199. data/src/jpeg-7/jmemmac.c +289 -0
  200. data/src/jpeg-7/jmemmgr.c +1118 -0
  201. data/src/jpeg-7/jmemname.c +276 -0
  202. data/src/jpeg-7/jmemnobs.c +109 -0
  203. data/src/jpeg-7/jmemsys.h +198 -0
  204. data/src/jpeg-7/jmorecfg.h +369 -0
  205. data/src/jpeg-7/jpegint.h +395 -0
  206. data/src/jpeg-7/jpeglib.h +1135 -0
  207. data/src/jpeg-7/jpegtran.1 +272 -0
  208. data/src/jpeg-7/jpegtran.c +546 -0
  209. data/src/jpeg-7/jquant1.c +856 -0
  210. data/src/jpeg-7/jquant2.c +1310 -0
  211. data/src/jpeg-7/jutils.c +179 -0
  212. data/src/jpeg-7/jversion.h +14 -0
  213. data/src/jpeg-7/libjpeg.map +4 -0
  214. data/src/jpeg-7/libjpeg.txt +3067 -0
  215. data/src/jpeg-7/ltmain.sh +8406 -0
  216. data/src/jpeg-7/makcjpeg.st +36 -0
  217. data/src/jpeg-7/makdjpeg.st +36 -0
  218. data/src/jpeg-7/makeadsw.vc6 +77 -0
  219. data/src/jpeg-7/makeasln.vc9 +33 -0
  220. data/src/jpeg-7/makecdep.vc6 +82 -0
  221. data/src/jpeg-7/makecdsp.vc6 +130 -0
  222. data/src/jpeg-7/makecmak.vc6 +159 -0
  223. data/src/jpeg-7/makecvcp.vc9 +186 -0
  224. data/src/jpeg-7/makeddep.vc6 +82 -0
  225. data/src/jpeg-7/makeddsp.vc6 +130 -0
  226. data/src/jpeg-7/makedmak.vc6 +159 -0
  227. data/src/jpeg-7/makedvcp.vc9 +186 -0
  228. data/src/jpeg-7/makefile.ansi +220 -0
  229. data/src/jpeg-7/makefile.bcc +291 -0
  230. data/src/jpeg-7/makefile.dj +226 -0
  231. data/src/jpeg-7/makefile.manx +220 -0
  232. data/src/jpeg-7/makefile.mc6 +255 -0
  233. data/src/jpeg-7/makefile.mms +224 -0
  234. data/src/jpeg-7/makefile.sas +258 -0
  235. data/src/jpeg-7/makefile.unix +234 -0
  236. data/src/jpeg-7/makefile.vc +217 -0
  237. data/src/jpeg-7/makefile.vms +142 -0
  238. data/src/jpeg-7/makefile.wat +239 -0
  239. data/src/jpeg-7/makejdep.vc6 +423 -0
  240. data/src/jpeg-7/makejdsp.vc6 +285 -0
  241. data/src/jpeg-7/makejdsw.vc6 +29 -0
  242. data/src/jpeg-7/makejmak.vc6 +425 -0
  243. data/src/jpeg-7/makejsln.vc9 +17 -0
  244. data/src/jpeg-7/makejvcp.vc9 +328 -0
  245. data/src/jpeg-7/makeproj.mac +213 -0
  246. data/src/jpeg-7/makerdep.vc6 +6 -0
  247. data/src/jpeg-7/makerdsp.vc6 +78 -0
  248. data/src/jpeg-7/makermak.vc6 +110 -0
  249. data/src/jpeg-7/makervcp.vc9 +133 -0
  250. data/src/jpeg-7/maketdep.vc6 +43 -0
  251. data/src/jpeg-7/maketdsp.vc6 +122 -0
  252. data/src/jpeg-7/maketmak.vc6 +131 -0
  253. data/src/jpeg-7/maketvcp.vc9 +178 -0
  254. data/src/jpeg-7/makewdep.vc6 +6 -0
  255. data/src/jpeg-7/makewdsp.vc6 +78 -0
  256. data/src/jpeg-7/makewmak.vc6 +110 -0
  257. data/src/jpeg-7/makewvcp.vc9 +133 -0
  258. data/src/jpeg-7/makljpeg.st +68 -0
  259. data/src/jpeg-7/maktjpeg.st +30 -0
  260. data/src/jpeg-7/makvms.opt +4 -0
  261. data/src/jpeg-7/missing +376 -0
  262. data/src/jpeg-7/rdbmp.c +439 -0
  263. data/src/jpeg-7/rdcolmap.c +253 -0
  264. data/src/jpeg-7/rdgif.c +38 -0
  265. data/src/jpeg-7/rdjpgcom.1 +63 -0
  266. data/src/jpeg-7/rdjpgcom.c +515 -0
  267. data/src/jpeg-7/rdppm.c +459 -0
  268. data/src/jpeg-7/rdrle.c +387 -0
  269. data/src/jpeg-7/rdswitch.c +365 -0
  270. data/src/jpeg-7/rdtarga.c +500 -0
  271. data/src/jpeg-7/structure.txt +945 -0
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +4 -0
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +1533 -0
  279. data/src/jpeg-7/transupp.h +205 -0
  280. data/src/jpeg-7/usage.txt +605 -0
  281. data/src/jpeg-7/wizard.txt +211 -0
  282. data/src/jpeg-7/wrbmp.c +442 -0
  283. data/src/jpeg-7/wrgif.c +399 -0
  284. data/src/jpeg-7/wrjpgcom.1 +103 -0
  285. data/src/jpeg-7/wrjpgcom.c +583 -0
  286. data/src/jpeg-7/wrppm.c +269 -0
  287. data/src/jpeg-7/wrrle.c +305 -0
  288. data/src/jpeg-7/wrtarga.c +253 -0
  289. data/test/isbn_test.rb +7 -0
  290. data/test/test_helper.rb +7 -0
  291. metadata +345 -0
@@ -0,0 +1,373 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2009 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+
21
+ sometimes I have written comments in german language, sorry for that
22
+
23
+ This file was retrieved from pgm2asc.cc of Joerg, in order to have
24
+ a library of the ocr-engine from Klaas Freitag
25
+
26
+ */
27
+ #include "config.h"
28
+ #include <stdlib.h>
29
+ #include <stdio.h>
30
+ #include <assert.h>
31
+ #include <string.h>
32
+ #ifdef HAVE_GETTIMEOFDAY
33
+ #include <sys/time.h>
34
+ #endif
35
+ #ifdef HAVE_UNISTD_H
36
+ #include <unistd.h>
37
+ #endif
38
+
39
+ #include "pnm.h"
40
+ #include "pgm2asc.h"
41
+ #include "pcx.h"
42
+ #include "ocr0.h" /* only_numbers */
43
+ #include "progress.h"
44
+ #include "version.h"
45
+
46
+ static void out_version(int v) {
47
+ fprintf(stderr, " Optical Character Recognition --- gocr "
48
+ version_string " " release_string "\n"
49
+ " Copyright (C) 2001-2009 Joerg Schulenburg GPG=1024D/53BDFBE3\n"
50
+ " released under the GNU General Public License\n");
51
+ /* as recommended, (c) and license should be part of the binary */
52
+ /* no email because of SPAM, see README for contacting the author */
53
+ if (v)
54
+ fprintf(stderr, " use option -h for help\n");
55
+ if (v & 2)
56
+ exit(1);
57
+ return;
58
+ }
59
+
60
+ static void help(void) {
61
+ out_version(0);
62
+ /* output is shortened to essentials, see manual page for details */
63
+ fprintf(stderr,
64
+ " using: gocr [options] pnm_file_name # use - for stdin\n"
65
+ " options (see gocr manual pages for more details):\n"
66
+ " -h, --help\n"
67
+ " -i name - input image file (pnm,pgm,pbm,ppm,pcx,...)\n"
68
+ " -o name - output file (redirection of stdout)\n"
69
+ " -e name - logging file (redirection of stderr)\n"
70
+ " -x name - progress output to fifo (see manual)\n"
71
+ " -p name - database path including final slash (default is ./db/)\n");
72
+ fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
73
+ " -f fmt - output format (ISO8859_1 TeX HTML XML UTF8 ASCII)\n"
74
+ " -l num - threshold grey level 0<160<=255 (0 = autodetect)\n"
75
+ " -d num - dust_size (remove small clusters, -1 = autodetect)\n"
76
+ " -s num - spacewidth/dots (0 = autodetect)\n"
77
+ " -v num - verbose (see manual page)\n"
78
+ " -c string - list of chars (debugging, see manual)\n"
79
+ " -C string - char filter (ex. hexdigits: ""0-9A-Fx"", only ASCII)\n"
80
+ " -m num - operation modes (bitpattern, see manual)\n");
81
+ fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
82
+ " -a num - value of certainty (in percent, 0..100, default=95)\n"
83
+ " -u string - output this string for every unrecognized character\n");
84
+ fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
85
+ " examples:\n"
86
+ "\tgocr -m 4 text1.pbm # do layout analyzis\n"
87
+ "\tgocr -m 130 -p ./database/ text1.pbm # extend database\n"
88
+ "\tdjpeg -pnm -gray text.jpg | gocr - # use jpeg-file via pipe\n"
89
+ "\n");
90
+ fprintf(stderr, " webpage: http://jocr.sourceforge.net/\n");
91
+ exit(0);
92
+ }
93
+
94
+ #ifdef HAVE_GETTIMEOFDAY
95
+ /* from the glibc documentation */
96
+ static int timeval_subtract (struct timeval *result, struct timeval *x,
97
+ struct timeval *y) {
98
+
99
+ /* Perform the carry for the later subtraction by updating Y. */
100
+ if (x->tv_usec < y->tv_usec) {
101
+ int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
102
+ y->tv_usec -= 1000000 * nsec;
103
+ y->tv_sec += nsec;
104
+ }
105
+ if (x->tv_usec - y->tv_usec > 1000000) {
106
+ int nsec = (x->tv_usec - y->tv_usec) / 1000000;
107
+ y->tv_usec += 1000000 * nsec;
108
+ y->tv_sec -= nsec;
109
+ }
110
+
111
+ /* Compute the time remaining to wait.
112
+ `tv_usec' is certainly positive. */
113
+ result->tv_sec = x->tv_sec - y->tv_sec;
114
+ result->tv_usec = x->tv_usec - y->tv_usec;
115
+
116
+ /* Return 1 if result is negative. */
117
+ return x->tv_sec < y->tv_sec;
118
+ }
119
+ #endif
120
+
121
+ static void process_arguments(job_t *job, int argn, char *argv[])
122
+ {
123
+ int i;
124
+ char *s1;
125
+
126
+ assert(job);
127
+
128
+ if (argn <= 1) {
129
+ out_version(1);
130
+ exit(0);
131
+ }
132
+ #ifdef HAVE_PGM_H
133
+ pnm_init(&argn, &argv);
134
+ #endif
135
+
136
+ /* process arguments */
137
+ for (i = 1; i < argn; i++) {
138
+ if (strcmp(argv[i], "--help") == 0)
139
+ help(); /* and quits */
140
+ if (argv[i][0] == '-' && argv[i][1] != 0) {
141
+ s1 = "";
142
+ if (i + 1 < argn)
143
+ s1 = argv[i + 1];
144
+ switch (argv[i][1]) {
145
+ case 'h': /* help */
146
+ help();
147
+ break;
148
+ case 'i': /* input image file */
149
+ job->src.fname = s1;
150
+ i++;
151
+ break;
152
+ case 'e': /* logging file */
153
+ if (s1[0] == '-' && s1[1] == '\0') {
154
+ #ifdef HAVE_UNISTD_H
155
+ dup2(STDOUT_FILENO, STDERR_FILENO); /* -e /dev/stdout works */
156
+ #else
157
+ fprintf(stderr, "stderr redirection not possible without unistd.h\n");
158
+ #endif
159
+ }
160
+ else if (!freopen(s1, "w", stderr)) {
161
+ fprintf(stderr, "stderr redirection to %s failed\n", s1);
162
+ }
163
+ i++;
164
+ break;
165
+ case 'p': /* database path */
166
+ job->cfg.db_path=s1;
167
+ i++;
168
+ break;
169
+ case 'o': /* output file */
170
+ if (s1[0] == '-' && s1[1] == '\0') { /* default */
171
+ }
172
+ else if (!freopen(s1, "w", stdout)) {
173
+ fprintf(stderr, "stdout redirection to %s failed\n", s1);
174
+ };
175
+ i++;
176
+ break;
177
+ case 'f': /* output format */
178
+ if (strcmp(s1, "ISO8859_1") == 0) job->cfg.out_format=ISO8859_1; else
179
+ if (strcmp(s1, "TeX") == 0) job->cfg.out_format=TeX; else
180
+ if (strcmp(s1, "HTML") == 0) job->cfg.out_format=HTML; else
181
+ if (strcmp(s1, "XML") == 0) job->cfg.out_format=XML; else
182
+ if (strcmp(s1, "SGML") == 0) job->cfg.out_format=SGML; else
183
+ if (strcmp(s1, "UTF8") == 0) job->cfg.out_format=UTF8; else
184
+ if (strcmp(s1, "ASCII") == 0) job->cfg.out_format=ASCII; else
185
+ fprintf(stderr,"Warning: unknown format (-f %s)\n",s1);
186
+ i++;
187
+ break;
188
+ case 'c': /* list of chars (_ = not recognized chars) */
189
+ job->cfg.lc = s1;
190
+ i++;
191
+ break;
192
+ case 'C': /* char filter, default: NULL (all chars) */
193
+ /* ToDo: UTF8 input, wchar */
194
+ job->cfg.cfilter = s1;
195
+ i++;
196
+ break;
197
+ case 'd': /* dust size */
198
+ job->cfg.dust_size = atoi(s1);
199
+ i++;
200
+ break;
201
+ case 'l': /* grey level 0<160<=255, 0 for autodetect */
202
+ job->cfg.cs = atoi(s1);
203
+ i++;
204
+ break;
205
+ case 's': /* spacewidth/dots (0 = autodetect) */
206
+ job->cfg.spc = atoi(s1);
207
+ i++;
208
+ break;
209
+ case 'v': /* verbose mode */
210
+ job->cfg.verbose |= atoi(s1);
211
+ i++;
212
+ break;
213
+ case 'm': /* operation modes */
214
+ job->cfg.mode |= atoi(s1);
215
+ i++;
216
+ break;
217
+ case 'n': /* numbers only */
218
+ job->cfg.only_numbers = atoi(s1);
219
+ i++;
220
+ break;
221
+ case 'x': /* initialize progress output s1=fname */
222
+ ini_progress(s1);
223
+ i++;
224
+ break;
225
+ case 'a': /* set certainty */
226
+ job->cfg.certainty = atoi(s1);;
227
+ i++;
228
+ break;
229
+ case 'u': /* output marker for unrecognized chars */
230
+ job->cfg.unrec_marker = s1;
231
+ i++;
232
+ break;
233
+ default:
234
+ fprintf(stderr, "# unknown option use -h for help\n");
235
+ }
236
+ continue;
237
+ }
238
+ else /* argument can be filename v0.2.5 */ if (argv[i][0] != '-'
239
+ || argv[i][1] == '\0' ) {
240
+ job->src.fname = argv[i];
241
+ }
242
+ }
243
+ }
244
+
245
+ static void mark_start(job_t *job) {
246
+ assert(job);
247
+
248
+ if (job->cfg.verbose) {
249
+ out_version(0);
250
+ /* insert some helpful info for support */
251
+ fprintf(stderr, "# compiled: " __DATE__ );
252
+ #if defined(__GNUC__)
253
+ fprintf(stderr, " GNUC-%d", __GNUC__ );
254
+ #endif
255
+ #ifdef __GNUC_MINOR__
256
+ fprintf(stderr, ".%d", __GNUC_MINOR__ );
257
+ #endif
258
+ #if defined(__linux)
259
+ fprintf(stderr, " linux");
260
+ #elif defined(__unix)
261
+ fprintf(stderr, " unix");
262
+ #endif
263
+ #if defined(__WIN32) || defined(__WIN32__)
264
+ fprintf(stderr, " WIN32");
265
+ #endif
266
+ #if defined(__WIN64) || defined(__WIN64__)
267
+ fprintf(stderr, " WIN64");
268
+ #endif
269
+ #if defined(__VERSION__)
270
+ fprintf(stderr, " version " __VERSION__ );
271
+ #endif
272
+ fprintf(stderr, "\n");
273
+ fprintf(stderr,
274
+ "# options are: -l %d -s %d -v %d -c %s -m %d -d %d -n %d -a %d -C \"%s\"\n",
275
+ job->cfg.cs, job->cfg.spc, job->cfg.verbose, job->cfg.lc, job->cfg.mode,
276
+ job->cfg.dust_size, job->cfg.only_numbers, job->cfg.certainty,
277
+ job->cfg.cfilter);
278
+ fprintf(stderr, "# file: %s\n", job->src.fname);
279
+ #ifdef USE_UNICODE
280
+ fprintf(stderr,"# using unicode\n");
281
+ #endif
282
+ #ifdef HAVE_GETTIMEOFDAY
283
+ gettimeofday(&job->tmp.init_time, NULL);
284
+ #endif
285
+ }
286
+ }
287
+
288
+ static void mark_end(job_t *job) {
289
+ assert(job);
290
+
291
+ #ifdef HAVE_GETTIMEOFDAY
292
+ /* show elapsed time */
293
+ if (job->cfg.verbose) {
294
+ struct timeval end, result;
295
+ gettimeofday(&end, NULL);
296
+ timeval_subtract(&result, &end, &job->tmp.init_time);
297
+ fprintf(stderr,"Elapsed time: %d:%02d:%3.3f.\n", (int)result.tv_sec/60,
298
+ (int)result.tv_sec%60, (float)result.tv_usec/1000);
299
+ }
300
+ #endif
301
+ }
302
+
303
+ static int read_picture(job_t *job) {
304
+ int rc=0;
305
+ assert(job);
306
+
307
+ if (strstr(job->src.fname, ".pcx"))
308
+ readpcx(job->src.fname, &job->src.p, job->cfg.verbose);
309
+ else
310
+ rc=readpgm(job->src.fname, &job->src.p, job->cfg.verbose);
311
+ return rc; /* 1 for multiple images, 0 else */
312
+ }
313
+
314
+ /* subject of change, we need more output for XML (ToDo) */
315
+ void print_output(job_t *job) {
316
+ int linecounter = 0;
317
+ const char *line;
318
+
319
+ assert(job);
320
+
321
+ linecounter = 0;
322
+ line = getTextLine(linecounter++);
323
+ while (line) {
324
+ /* notice: decode() is shiftet to getTextLine since 0.38 */
325
+ fputs(line, stdout);
326
+ if (job->cfg.out_format==HTML) fputs("<br />",stdout);
327
+ if (job->cfg.out_format!=XML) fputc('\n', stdout);
328
+ line = getTextLine(linecounter++);
329
+ }
330
+ free_textlines();
331
+ }
332
+
333
+ /* FIXME jb: remove JOB; */
334
+ job_t *JOB;
335
+
336
+
337
+ /* -------------------------------------------------------------
338
+ // ------ MAIN - replace this by your own aplication!
339
+ // ------------------------------------------------------------- */
340
+ int main(int argn, char *argv[]) {
341
+ int multipnm=1;
342
+ job_t job;
343
+
344
+ JOB = &job;
345
+ setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
346
+
347
+ while (multipnm==1) {
348
+
349
+ job_init(&job);
350
+
351
+ process_arguments(&job, argn, argv);
352
+
353
+ mark_start(&job);
354
+
355
+ multipnm = read_picture(&job);
356
+ /* separation of main and rest for using as lib
357
+ this will be changed later => introduction of set_option()
358
+ for better communication to the engine */
359
+ if (multipnm<0) break; /* read error */
360
+
361
+ /* call main loop */
362
+ pgm2asc(&job);
363
+
364
+ mark_end(&job);
365
+
366
+ print_output(&job);
367
+
368
+ job_free(&job);
369
+
370
+ }
371
+
372
+ return 0;
373
+ }
@@ -0,0 +1,288 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2006 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+
21
+ sometimes I have written comments in german language, sorry for that
22
+
23
+ - look for ??? for preliminary code
24
+ */
25
+
26
+ /* General headerfile with gocr-definitions */
27
+
28
+ #ifndef __GOCR_H__
29
+ #define __GOCR_H__
30
+
31
+ #include "pnm.h"
32
+ #include "unicode.h"
33
+ #include "list.h"
34
+ #include <stddef.h>
35
+ #ifdef HAVE_GETTIMEOFDAY
36
+ #include <sys/time.h>
37
+ #endif
38
+
39
+ /*
40
+ * wchar_t should always exist (ANSI), but WCHAR.H is sometimes missing
41
+ * USE_UNICODE should be removed or replaced by HAVE_WCHAR_H in future
42
+ */
43
+ #ifdef HAVE_WCHAR_H
44
+ #define USE_UNICODE 1
45
+ #endif
46
+
47
+ /* extern "C"{ */
48
+ /* ------------------------ feature extraction ----------------- */
49
+ #define AT 7 /* mark */
50
+ #define M1 1 /* mark */
51
+ enum direction {
52
+ UP=1, DO, RI, LE
53
+ };
54
+ typedef enum direction DIRECTION;
55
+ #define ST 7 /* stop */
56
+ /* ------------------------------------------------------------- */
57
+ /* detect maximas in of line overlapps (return in %) and line koord */
58
+ #define HOR 1 /* horizontal */
59
+ #define VER 2 /* vertikal */
60
+ #define RIS 3 /* rising=steigend */
61
+ #define FAL 4 /* falling=fallend */
62
+
63
+ #define MAXlines 1024
64
+
65
+ /* ToDo: if we have a tree instead of a list, a line could be a node object */
66
+ struct tlines {
67
+ int num;
68
+ int dx, dy; /* direction of text lines (straight/skew) */
69
+ int m1[MAXlines], /* start of line = upper bound of 'A' */
70
+ m2[MAXlines], /* upper bound of 'e' */
71
+ m3[MAXlines], /* lower bound of 'e' = baseline */
72
+ m4[MAXlines]; /* stop of line = lower bound of 'q' */
73
+ /* ToDo: add sureness per m1,m2 etc? */
74
+ int x0[MAXlines],
75
+ x1[MAXlines]; /* left and right border */
76
+ int wt[MAXlines]; /* weight, how sure thats correct in percent, v0.41 */
77
+ int pitch[MAXlines]; /* word pitch (later per box?), v0.41 */
78
+ int mono[MAXlines]; /* spacing type, 0=proportional, 1=monospaced */
79
+ };
80
+
81
+ #define NumAlt 10 /* maximal number of alternative chars (table length) */
82
+ #define MaxNumFrames 8 /* maximum number of frames per char/box */
83
+ #define MaxFrameVectors 128 /* maximum vectors per frame (*8=1KB/box) */
84
+ /* ToDo: use only malloc_box(),free_box(),copybox() for creation, destroy etc.
85
+ * adding reference_counter to avoid pointer pointing to freed box
86
+ */
87
+ struct box { /* this structure should contain all pixel infos of a letter */
88
+ int x0,x1,y0,y1,x,y,dots; /* xmin,xmax,ymin,ymax,reference-pixel,i-dots */
89
+ int num_boxes, /* 1 "abc", 2 "!i?", 3 "&auml;" (composed objects) 0.41 */
90
+ num_subboxes; /* 1 for "abdegopqADOPQR", 2 for "B" (holes) 0.41 */
91
+ wchar_t c; /* detected char (same as tac[0], obsolete?) */
92
+ wchar_t modifier; /* default=0, see compose() in unicode.c */
93
+ int num; /* same number = same char */
94
+ int line; /* line number (points to struct tlines lines) */
95
+ int m1,m2,m3,m4; /* m2 = upper boundary, m3 = baseline */
96
+ /* planed: sizeof hole_1, hole_2, certainty (run1=100%,run2=90%,etc.) */
97
+ pix *p; /* pointer to pixmap (v0.2.5) */
98
+ /* tac, wac is used together with setac() to manage very similar chars */
99
+ int num_ac; /* length of table (alternative chars), default=0 */
100
+ wchar_t tac[NumAlt]; /* alternative chars, only used by setac(),getac() */
101
+ int wac[NumAlt]; /* weight of alternative chars */
102
+ char *tas[NumAlt]; /* alternative UTF8-strings or XML codes if tac[]=0 */
103
+ /* replacing old obj */
104
+ /* ToDo: (*obj)[NumAlt] + olen[NumAlt] ??? */
105
+ /* ToDo: bitmap for possible Picture|Object|Char ??? */
106
+ /* char *obj; */ /* pointer to text-object ... -> replaced by tas[] */
107
+ /* ... (melted chars, barcode, picture coords, ...) */
108
+ /* must be freed before box is freed! */
109
+ /* do _not_ copy only the pointer to object */
110
+ /* --------------------------------------------------------
111
+ * extension since v0.41 js05, Store frame vectors,
112
+ * which is a table of vectors sourrounding the char and its
113
+ * inner white holes. The advantage is the independence from
114
+ * resolution, handling of holes, overlap and rotation.
115
+ * --------------------------------------------------------- */
116
+ int num_frames; /* number of frames: 1 for cfhklmnrstuvwxyz */
117
+ /* 2 for abdegijopq */
118
+ int frame_vol[MaxNumFrames]; /* volume inside frame +/- (black/white) */
119
+ int frame_per[MaxNumFrames]; /* periphery, summed length of vectors */
120
+ int num_frame_vectors[MaxNumFrames]; /* index to next frame */
121
+ /* biggest frame should be stored first (outer frame) */
122
+ /* biggest has the maximum pair distance */
123
+ /* num vector loops */
124
+ int frame_vector[MaxFrameVectors][2]; /* may be 16*int=fixpoint_number */
125
+
126
+ };
127
+ typedef struct box Box;
128
+
129
+ /* true if the coordination pair (a,b) is outside the image p */
130
+ #define outbounds(p, a, b) (a < 0 || b < 0 || a >= (p)->x || b >= (p)->y)
131
+
132
+ /* ToDo: this structure seems to be obsolete, remove it */
133
+ typedef struct path {
134
+ int start; /* color at the beginning of the path, (0=white, 1=black) */
135
+ int *x; /* x coordinates of transitions */
136
+ int *y; /* y coordinates of transitions */
137
+ int num; /* current number of entries in x or y */
138
+ int max; /* maximum number of entries in x or y */
139
+ /* (if more values need to be stored, the arrays are enlarged) */
140
+ } path_t;
141
+
142
+ /* job_t contains all information needed for an OCR task */
143
+ typedef struct job_s {
144
+ struct { /* source data */
145
+ char *fname; /* input filename; default value: "-" */
146
+ pix p; /* source pixel data, pixelmap 8bit gray */
147
+ } src;
148
+ struct { /* temporary stuff, e.g. buffers */
149
+ #ifdef HAVE_GETTIMEOFDAY
150
+ struct timeval init_time; /* starting time of this job */
151
+ #endif
152
+ pix ppo; /* pixmap for visual debugging output, obsolete */
153
+
154
+ /* sometimes recognition function is called again and again, if result was 0
155
+ n_run tells the pixel function to return alternative results */
156
+ int n_run; /* num of run, if run_2 critical pattern get other results */
157
+ /* used for 2nd try, pixel uses slower filter function etc. */
158
+ List dblist; /* list of boxes loaded from the character database */
159
+ } tmp;
160
+ struct { /* results */
161
+ List boxlist; /* store every object in a box, which contains */
162
+ /* the characteristics of the object (see struct box) */
163
+ List linelist; /* recognized text lines after recognition */
164
+
165
+ struct tlines lines; /* used to access to line-data (statistics) */
166
+ /* here the positions (frames) of lines are */
167
+ /* stored for further use */
168
+ int avX,avY; /* average X,Y (avX=sumX/numC) */
169
+ int sumX,sumY,numC; /* sum of all X,Y; num chars */
170
+ } res;
171
+ struct { /* configuration */
172
+ int cs; /* critical grey value (pixel<cs => black pixel) */
173
+ /* range: 0..255, 0 means autodetection */
174
+ int spc; /* spacewidth/dots (0 = autodetect); default value: 0 */
175
+ int mode; /* operation modes; default value: 0 */
176
+ /* operation mode (see --help) */
177
+ int dust_size; /* dust size; default value: 10 */
178
+ int only_numbers; /* numbers only; default value: 0 */
179
+ int verbose; /* verbose mode; default value: 0 */
180
+ /* verbose option (see --help) */
181
+ FORMAT out_format; /* output format; default value: ISO8859_1*/
182
+ char *lc; /* debuglist of chars (_ = not recognized chars) */
183
+ /* default value: "_" */
184
+ char *db_path; /* pathname for database; default value: NULL */
185
+ char *cfilter; /* char filter; default value: NULL, ex: "A-Za-z" */
186
+ /* limit of certainty where chars are accepted as identified */
187
+ int certainty; /* in units of 100 (percent); 0..100; default 95 */
188
+ char *unrec_marker; /* output this string for every unrecognized char */
189
+ } cfg;
190
+ } job_t;
191
+
192
+ /* initialze job structure */
193
+ void job_init(job_t *job);
194
+
195
+ /* free job structure */
196
+ void job_free(job_t *job);
197
+
198
+ /*FIXME jb: remove JOB; */
199
+ extern job_t *JOB;
200
+
201
+ /* calculate the overlapp of the line (0-1) with black points
202
+ * by rekursiv bisection
203
+ * (evl. Fehlertoleranz mit pixel in Umgebung dx,dy suchen) (umschaltbar) ???
204
+ * MidPoint Line Algorithm (Bresenham) Foley: ComputerGraphics better?
205
+ * will be replaced by vector functions
206
+ */
207
+
208
+ /* gerade y=dy/dx*x+b, implizit d=F(x,y)=dy*x-dx*y+b*dx=0
209
+ * incrementell y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y)) */
210
+ int get_line(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
211
+ int get_line2(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
212
+
213
+ /* look for white 0x02 or black 0x01 dots (0x03 = white+black) */
214
+ char get_bw(int x0, int x1, int y0, int y1,
215
+ pix *p, int cs,int mask);
216
+
217
+ /* look for black crossing a line x0,y0,x1,y1
218
+ * follow line and count crossings ([white]-black-transitions)
219
+ */
220
+ int num_cross(int x0, int x1, int y0, int y1,
221
+ pix *p, int cs);
222
+
223
+ /* memory allocation with error checking */
224
+ void *xrealloc(void *ptr, size_t size);
225
+
226
+ /* follow a line x0,y0,x1,y1 recording locations of transitions,
227
+ * return count of transitions
228
+ */
229
+ int follow_path(int x0, int x1, int y0, int y1, pix *p, int cs, path_t *path);
230
+
231
+ /* -------------------------------------------------------------
232
+ * mark edge-points
233
+ * - first move forward until b/w-edge
234
+ * - more than 2 pixel?
235
+ * - loop around
236
+ * - if forward pixel : go up, rotate right
237
+ * - if forward no pixel : rotate left
238
+ * - stop if found first 2 pixel in same order
239
+ * mit an rechter-Wand-entlang-gehen strategie
240
+ * --------------------------------------------------------------
241
+ * turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
242
+ * out: last-position
243
+ * Zaehle dabei, Schritte,Sackgassen,xmax,ymax,ro-,ru-,lo-,lu-Ecken
244
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
245
+ *
246
+ * is this the right place for declaration?
247
+ */
248
+ void turmite(pix *p, int *x, int *y,
249
+ int x0, int x1, int y0, int y1, int cs, int rw, int rb);
250
+
251
+ /* test if points are connected via t-pixel (rekursiv!) */
252
+ int joined(pix *p, int x0, int y0, int x1, int y1, int cs);
253
+
254
+ /* move from x,y to direction r until pixel or l steps
255
+ * return number of steps
256
+ */
257
+ int loop(pix *p, int x, int y, int l, int cs, int col, DIRECTION r);
258
+
259
+ #define MAX_HOLES 3
260
+ typedef struct list_holes {
261
+ int num; /* numbers of holes, initialize with 0 */
262
+ struct hole_s {
263
+ int size,x,y,x0,y0,x1,y1; /* size, start point, outer rectangle */
264
+ } hole[MAX_HOLES];
265
+ } holes_t;
266
+
267
+ /* look for white holes surrounded by black points
268
+ * at moment white point with black in all four directions
269
+ */
270
+ int num_hole(int x0, int x1, int y0, int y1, pix *p, int cs, holes_t *holes);
271
+
272
+ /* count for black nonconnected objects --- used for i,auml,ouml,etc. */
273
+ int num_obj(int x0, int x1, int y0, int y1, pix *p, int cs);
274
+
275
+ int distance( pix *p1, struct box *box1, /* box-frame */
276
+ pix *p2, struct box *box2, int cs);
277
+
278
+ /* call the OCR engine ;) */
279
+ /* char whatletter(struct box *box1,int cs); */
280
+
281
+ /* declared in pixel.c */
282
+ /* getpixel() was pixel() but it may collide with netpnm pixel declaration */
283
+ int getpixel(pix *p, int x, int y);
284
+ int marked(pix *p, int x, int y);
285
+ void put(pix * p, int x, int y, int ia, int io);
286
+
287
+ /* } */ /* extern C */
288
+ #endif /* __GOCR_H__ */