isbn 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. data/{README → README.md} +5 -11
  2. data/Rakefile +20 -14
  3. data/isbn.gemspec +23 -0
  4. data/lib/isbn.rb +2 -0
  5. data/test/isbn_spec.rb +1 -1
  6. metadata +29 -316
  7. data/VERSION +0 -1
  8. data/src/gocr-0.48/.cvsignore +0 -6
  9. data/src/gocr-0.48/AUTHORS +0 -7
  10. data/src/gocr-0.48/BUGS +0 -55
  11. data/src/gocr-0.48/CREDITS +0 -17
  12. data/src/gocr-0.48/HISTORY +0 -243
  13. data/src/gocr-0.48/INSTALL +0 -83
  14. data/src/gocr-0.48/Makefile +0 -193
  15. data/src/gocr-0.48/Makefile.in +0 -193
  16. data/src/gocr-0.48/README +0 -165
  17. data/src/gocr-0.48/READMEde.txt +0 -80
  18. data/src/gocr-0.48/REMARK.txt +0 -18
  19. data/src/gocr-0.48/REVIEW +0 -538
  20. data/src/gocr-0.48/TODO +0 -65
  21. data/src/gocr-0.48/bin/.cvsignore +0 -2
  22. data/src/gocr-0.48/bin/create_db +0 -38
  23. data/src/gocr-0.48/bin/gocr.tcl +0 -527
  24. data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
  25. data/src/gocr-0.48/configure +0 -4689
  26. data/src/gocr-0.48/configure.in +0 -71
  27. data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
  28. data/src/gocr-0.48/doc/.cvsignore +0 -2
  29. data/src/gocr-0.48/doc/Makefile +0 -39
  30. data/src/gocr-0.48/doc/Makefile.in +0 -39
  31. data/src/gocr-0.48/doc/example.dtd +0 -53
  32. data/src/gocr-0.48/doc/example.xml +0 -21
  33. data/src/gocr-0.48/doc/examples.txt +0 -67
  34. data/src/gocr-0.48/doc/gocr.html +0 -578
  35. data/src/gocr-0.48/doc/unicode.txt +0 -57
  36. data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
  37. data/src/gocr-0.48/examples/4x6.png +0 -0
  38. data/src/gocr-0.48/examples/4x6.txt +0 -2
  39. data/src/gocr-0.48/examples/5x7.png +0 -0
  40. data/src/gocr-0.48/examples/5x7.png.txt +0 -2
  41. data/src/gocr-0.48/examples/5x8.png +0 -0
  42. data/src/gocr-0.48/examples/5x8.png.txt +0 -2
  43. data/src/gocr-0.48/examples/Makefile +0 -166
  44. data/src/gocr-0.48/examples/color.fig +0 -20
  45. data/src/gocr-0.48/examples/ex.fig +0 -16
  46. data/src/gocr-0.48/examples/font.tex +0 -22
  47. data/src/gocr-0.48/examples/font1.tex +0 -46
  48. data/src/gocr-0.48/examples/font2.fig +0 -27
  49. data/src/gocr-0.48/examples/font_nw.tex +0 -24
  50. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  51. data/src/gocr-0.48/examples/handwrt1.txt +0 -10
  52. data/src/gocr-0.48/examples/inverse.fig +0 -20
  53. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
  56. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-a.txt +0 -6
  58. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  59. data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
  60. data/src/gocr-0.48/examples/polish.tex +0 -28
  61. data/src/gocr-0.48/examples/rotate45.fig +0 -14
  62. data/src/gocr-0.48/examples/score +0 -36
  63. data/src/gocr-0.48/examples/text.tex +0 -28
  64. data/src/gocr-0.48/gpl.html +0 -537
  65. data/src/gocr-0.48/include/.cvsignore +0 -2
  66. data/src/gocr-0.48/include/config.h +0 -36
  67. data/src/gocr-0.48/include/config.h.in +0 -36
  68. data/src/gocr-0.48/include/version.h +0 -2
  69. data/src/gocr-0.48/install-sh +0 -3
  70. data/src/gocr-0.48/make.bat +0 -57
  71. data/src/gocr-0.48/man/.cvsignore +0 -2
  72. data/src/gocr-0.48/man/Makefile +0 -29
  73. data/src/gocr-0.48/man/Makefile.in +0 -29
  74. data/src/gocr-0.48/man/man1/gocr.1 +0 -166
  75. data/src/gocr-0.48/src/.cvsignore +0 -4
  76. data/src/gocr-0.48/src/Makefile +0 -132
  77. data/src/gocr-0.48/src/Makefile.in +0 -132
  78. data/src/gocr-0.48/src/amiga.h +0 -31
  79. data/src/gocr-0.48/src/barcode.c +0 -846
  80. data/src/gocr-0.48/src/barcode.c.orig +0 -593
  81. data/src/gocr-0.48/src/barcode.h +0 -11
  82. data/src/gocr-0.48/src/box.c +0 -372
  83. data/src/gocr-0.48/src/database.c +0 -462
  84. data/src/gocr-0.48/src/detect.c +0 -943
  85. data/src/gocr-0.48/src/gocr.c +0 -373
  86. data/src/gocr-0.48/src/gocr.h +0 -288
  87. data/src/gocr-0.48/src/jconv.c +0 -168
  88. data/src/gocr-0.48/src/job.c +0 -84
  89. data/src/gocr-0.48/src/lines.c +0 -350
  90. data/src/gocr-0.48/src/list.c +0 -334
  91. data/src/gocr-0.48/src/list.h +0 -90
  92. data/src/gocr-0.48/src/ocr0.c +0 -6756
  93. data/src/gocr-0.48/src/ocr0.h +0 -63
  94. data/src/gocr-0.48/src/ocr0n.c +0 -1475
  95. data/src/gocr-0.48/src/ocr1.c +0 -85
  96. data/src/gocr-0.48/src/ocr1.h +0 -3
  97. data/src/gocr-0.48/src/otsu.c +0 -289
  98. data/src/gocr-0.48/src/otsu.h +0 -23
  99. data/src/gocr-0.48/src/output.c +0 -289
  100. data/src/gocr-0.48/src/output.h +0 -37
  101. data/src/gocr-0.48/src/pcx.c +0 -153
  102. data/src/gocr-0.48/src/pcx.h +0 -9
  103. data/src/gocr-0.48/src/pgm2asc.c +0 -2893
  104. data/src/gocr-0.48/src/pgm2asc.h +0 -105
  105. data/src/gocr-0.48/src/pixel.c +0 -537
  106. data/src/gocr-0.48/src/pnm.c +0 -533
  107. data/src/gocr-0.48/src/pnm.h +0 -35
  108. data/src/gocr-0.48/src/progress.c +0 -87
  109. data/src/gocr-0.48/src/progress.h +0 -42
  110. data/src/gocr-0.48/src/remove.c +0 -703
  111. data/src/gocr-0.48/src/tga.c +0 -87
  112. data/src/gocr-0.48/src/tga.h +0 -6
  113. data/src/gocr-0.48/src/unicode.c +0 -1314
  114. data/src/gocr-0.48/src/unicode.h +0 -1257
  115. data/src/jpeg-7/Makefile.am +0 -133
  116. data/src/jpeg-7/Makefile.in +0 -1089
  117. data/src/jpeg-7/README +0 -322
  118. data/src/jpeg-7/aclocal.m4 +0 -8990
  119. data/src/jpeg-7/ansi2knr.1 +0 -36
  120. data/src/jpeg-7/ansi2knr.c +0 -739
  121. data/src/jpeg-7/cderror.h +0 -132
  122. data/src/jpeg-7/cdjpeg.c +0 -181
  123. data/src/jpeg-7/cdjpeg.h +0 -187
  124. data/src/jpeg-7/change.log +0 -270
  125. data/src/jpeg-7/cjpeg.1 +0 -325
  126. data/src/jpeg-7/cjpeg.c +0 -616
  127. data/src/jpeg-7/ckconfig.c +0 -402
  128. data/src/jpeg-7/coderules.txt +0 -118
  129. data/src/jpeg-7/config.guess +0 -1561
  130. data/src/jpeg-7/config.sub +0 -1686
  131. data/src/jpeg-7/configure +0 -17139
  132. data/src/jpeg-7/configure.ac +0 -317
  133. data/src/jpeg-7/depcomp +0 -630
  134. data/src/jpeg-7/djpeg.1 +0 -251
  135. data/src/jpeg-7/djpeg.c +0 -617
  136. data/src/jpeg-7/example.c +0 -433
  137. data/src/jpeg-7/filelist.txt +0 -215
  138. data/src/jpeg-7/install-sh +0 -520
  139. data/src/jpeg-7/install.txt +0 -1097
  140. data/src/jpeg-7/jaricom.c +0 -148
  141. data/src/jpeg-7/jcapimin.c +0 -282
  142. data/src/jpeg-7/jcapistd.c +0 -161
  143. data/src/jpeg-7/jcarith.c +0 -921
  144. data/src/jpeg-7/jccoefct.c +0 -453
  145. data/src/jpeg-7/jccolor.c +0 -459
  146. data/src/jpeg-7/jcdctmgr.c +0 -482
  147. data/src/jpeg-7/jchuff.c +0 -1612
  148. data/src/jpeg-7/jcinit.c +0 -65
  149. data/src/jpeg-7/jcmainct.c +0 -293
  150. data/src/jpeg-7/jcmarker.c +0 -667
  151. data/src/jpeg-7/jcmaster.c +0 -770
  152. data/src/jpeg-7/jcomapi.c +0 -106
  153. data/src/jpeg-7/jconfig.bcc +0 -48
  154. data/src/jpeg-7/jconfig.cfg +0 -45
  155. data/src/jpeg-7/jconfig.dj +0 -38
  156. data/src/jpeg-7/jconfig.mac +0 -43
  157. data/src/jpeg-7/jconfig.manx +0 -43
  158. data/src/jpeg-7/jconfig.mc6 +0 -52
  159. data/src/jpeg-7/jconfig.sas +0 -43
  160. data/src/jpeg-7/jconfig.st +0 -42
  161. data/src/jpeg-7/jconfig.txt +0 -155
  162. data/src/jpeg-7/jconfig.vc +0 -45
  163. data/src/jpeg-7/jconfig.vms +0 -37
  164. data/src/jpeg-7/jconfig.wat +0 -38
  165. data/src/jpeg-7/jcparam.c +0 -632
  166. data/src/jpeg-7/jcprepct.c +0 -358
  167. data/src/jpeg-7/jcsample.c +0 -545
  168. data/src/jpeg-7/jctrans.c +0 -381
  169. data/src/jpeg-7/jdapimin.c +0 -396
  170. data/src/jpeg-7/jdapistd.c +0 -275
  171. data/src/jpeg-7/jdarith.c +0 -762
  172. data/src/jpeg-7/jdatadst.c +0 -151
  173. data/src/jpeg-7/jdatasrc.c +0 -212
  174. data/src/jpeg-7/jdcoefct.c +0 -736
  175. data/src/jpeg-7/jdcolor.c +0 -396
  176. data/src/jpeg-7/jdct.h +0 -393
  177. data/src/jpeg-7/jddctmgr.c +0 -382
  178. data/src/jpeg-7/jdhuff.c +0 -1309
  179. data/src/jpeg-7/jdinput.c +0 -384
  180. data/src/jpeg-7/jdmainct.c +0 -512
  181. data/src/jpeg-7/jdmarker.c +0 -1360
  182. data/src/jpeg-7/jdmaster.c +0 -663
  183. data/src/jpeg-7/jdmerge.c +0 -400
  184. data/src/jpeg-7/jdpostct.c +0 -290
  185. data/src/jpeg-7/jdsample.c +0 -361
  186. data/src/jpeg-7/jdtrans.c +0 -136
  187. data/src/jpeg-7/jerror.c +0 -252
  188. data/src/jpeg-7/jerror.h +0 -304
  189. data/src/jpeg-7/jfdctflt.c +0 -174
  190. data/src/jpeg-7/jfdctfst.c +0 -230
  191. data/src/jpeg-7/jfdctint.c +0 -4348
  192. data/src/jpeg-7/jidctflt.c +0 -242
  193. data/src/jpeg-7/jidctfst.c +0 -368
  194. data/src/jpeg-7/jidctint.c +0 -5137
  195. data/src/jpeg-7/jinclude.h +0 -91
  196. data/src/jpeg-7/jmemansi.c +0 -167
  197. data/src/jpeg-7/jmemdos.c +0 -638
  198. data/src/jpeg-7/jmemdosa.asm +0 -379
  199. data/src/jpeg-7/jmemmac.c +0 -289
  200. data/src/jpeg-7/jmemmgr.c +0 -1118
  201. data/src/jpeg-7/jmemname.c +0 -276
  202. data/src/jpeg-7/jmemnobs.c +0 -109
  203. data/src/jpeg-7/jmemsys.h +0 -198
  204. data/src/jpeg-7/jmorecfg.h +0 -369
  205. data/src/jpeg-7/jpegint.h +0 -395
  206. data/src/jpeg-7/jpeglib.h +0 -1135
  207. data/src/jpeg-7/jpegtran.1 +0 -272
  208. data/src/jpeg-7/jpegtran.c +0 -546
  209. data/src/jpeg-7/jquant1.c +0 -856
  210. data/src/jpeg-7/jquant2.c +0 -1310
  211. data/src/jpeg-7/jutils.c +0 -179
  212. data/src/jpeg-7/jversion.h +0 -14
  213. data/src/jpeg-7/libjpeg.map +0 -4
  214. data/src/jpeg-7/libjpeg.txt +0 -3067
  215. data/src/jpeg-7/ltmain.sh +0 -8406
  216. data/src/jpeg-7/makcjpeg.st +0 -36
  217. data/src/jpeg-7/makdjpeg.st +0 -36
  218. data/src/jpeg-7/makeadsw.vc6 +0 -77
  219. data/src/jpeg-7/makeasln.vc9 +0 -33
  220. data/src/jpeg-7/makecdep.vc6 +0 -82
  221. data/src/jpeg-7/makecdsp.vc6 +0 -130
  222. data/src/jpeg-7/makecmak.vc6 +0 -159
  223. data/src/jpeg-7/makecvcp.vc9 +0 -186
  224. data/src/jpeg-7/makeddep.vc6 +0 -82
  225. data/src/jpeg-7/makeddsp.vc6 +0 -130
  226. data/src/jpeg-7/makedmak.vc6 +0 -159
  227. data/src/jpeg-7/makedvcp.vc9 +0 -186
  228. data/src/jpeg-7/makefile.ansi +0 -220
  229. data/src/jpeg-7/makefile.bcc +0 -291
  230. data/src/jpeg-7/makefile.dj +0 -226
  231. data/src/jpeg-7/makefile.manx +0 -220
  232. data/src/jpeg-7/makefile.mc6 +0 -255
  233. data/src/jpeg-7/makefile.mms +0 -224
  234. data/src/jpeg-7/makefile.sas +0 -258
  235. data/src/jpeg-7/makefile.unix +0 -234
  236. data/src/jpeg-7/makefile.vc +0 -217
  237. data/src/jpeg-7/makefile.vms +0 -142
  238. data/src/jpeg-7/makefile.wat +0 -239
  239. data/src/jpeg-7/makejdep.vc6 +0 -423
  240. data/src/jpeg-7/makejdsp.vc6 +0 -285
  241. data/src/jpeg-7/makejdsw.vc6 +0 -29
  242. data/src/jpeg-7/makejmak.vc6 +0 -425
  243. data/src/jpeg-7/makejsln.vc9 +0 -17
  244. data/src/jpeg-7/makejvcp.vc9 +0 -328
  245. data/src/jpeg-7/makeproj.mac +0 -213
  246. data/src/jpeg-7/makerdep.vc6 +0 -6
  247. data/src/jpeg-7/makerdsp.vc6 +0 -78
  248. data/src/jpeg-7/makermak.vc6 +0 -110
  249. data/src/jpeg-7/makervcp.vc9 +0 -133
  250. data/src/jpeg-7/maketdep.vc6 +0 -43
  251. data/src/jpeg-7/maketdsp.vc6 +0 -122
  252. data/src/jpeg-7/maketmak.vc6 +0 -131
  253. data/src/jpeg-7/maketvcp.vc9 +0 -178
  254. data/src/jpeg-7/makewdep.vc6 +0 -6
  255. data/src/jpeg-7/makewdsp.vc6 +0 -78
  256. data/src/jpeg-7/makewmak.vc6 +0 -110
  257. data/src/jpeg-7/makewvcp.vc9 +0 -133
  258. data/src/jpeg-7/makljpeg.st +0 -68
  259. data/src/jpeg-7/maktjpeg.st +0 -30
  260. data/src/jpeg-7/makvms.opt +0 -4
  261. data/src/jpeg-7/missing +0 -376
  262. data/src/jpeg-7/rdbmp.c +0 -439
  263. data/src/jpeg-7/rdcolmap.c +0 -253
  264. data/src/jpeg-7/rdgif.c +0 -38
  265. data/src/jpeg-7/rdjpgcom.1 +0 -63
  266. data/src/jpeg-7/rdjpgcom.c +0 -515
  267. data/src/jpeg-7/rdppm.c +0 -459
  268. data/src/jpeg-7/rdrle.c +0 -387
  269. data/src/jpeg-7/rdswitch.c +0 -365
  270. data/src/jpeg-7/rdtarga.c +0 -500
  271. data/src/jpeg-7/structure.txt +0 -945
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +0 -4
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +0 -1533
  279. data/src/jpeg-7/transupp.h +0 -205
  280. data/src/jpeg-7/usage.txt +0 -605
  281. data/src/jpeg-7/wizard.txt +0 -211
  282. data/src/jpeg-7/wrbmp.c +0 -442
  283. data/src/jpeg-7/wrgif.c +0 -399
  284. data/src/jpeg-7/wrjpgcom.1 +0 -103
  285. data/src/jpeg-7/wrjpgcom.c +0 -583
  286. data/src/jpeg-7/wrppm.c +0 -269
  287. data/src/jpeg-7/wrrle.c +0 -305
  288. data/src/jpeg-7/wrtarga.c +0 -253
@@ -1,35 +0,0 @@
1
- /* Handle PNM-files Dez98 JS
2
- * 0,0 = left up
3
- * PAM-formats
4
- * PAM any P7
5
- * PNM-formats
6
- * PGM gray ASCII=P2 RAW=P5 dx dy col gray
7
- * PPM RGB ASCII=P3 RAW=P6 dx dy col RGB
8
- * PBM B/W ASCII=P1 RAW=P4 dx dy bitmap
9
- */
10
-
11
- #ifndef GOCR_PNM_H
12
- #define GOCR_PNM_H 1
13
-
14
- #include "config.h"
15
-
16
- struct pixmap {
17
- unsigned char *p; /* pointer of image buffer (pixmap) */
18
- int x; /* xsize */
19
- int y; /* ysize */
20
- int bpp; /* bytes per pixel: 1=gray 3=rgb */
21
- };
22
- typedef struct pixmap pix;
23
-
24
- /* return 1 on multiple images (holding file open), 0 else */
25
- int readpgm(char *name, pix *p, int vvv);
26
-
27
- /* write pgm-map to pnm-file */
28
- int writepgm(char *nam, pix *p);
29
- int writepbm(char *nam, pix *p);
30
- int writeppm(char *nam, pix *p); /* use lowest 3 bits for farbcoding */
31
-
32
- /* ----- count colors ------ create histogram ------- */
33
- void makehisto(pix p, unsigned col[256], int vvv);
34
-
35
- #endif
@@ -1,87 +0,0 @@
1
- /* ---------------------------- progress output ---------------------- */
2
- #include <stdlib.h>
3
- #include <stdio.h>
4
- #include "progress.h"
5
-
6
- FILE *fp=NULL; /* output stream for progress info */
7
- time_t printinterval = 10; /* approx. seconds between printouts, 1.. */
8
-
9
- /* initialization of progress output, fname="<fileID>","<filename>","-" */
10
- int ini_progress(char *fname){
11
- int fd;
12
- if (fp) { fclose(fp); fp=NULL; }
13
- if (fname) if (fname[0]) {
14
- fd=atoi(fname);
15
- if(fd>255 || fname[((fd>99)?3:((fd>9)?2:1))]) fd=-1; /* be sure */
16
- if (fname[0]=='-' && fname[1]==0) { fp=stdout; }
17
- #ifdef __USE_POSIX
18
- else if (fd>0) { fp=fdopen(fd,"w"); } /* not sure that "w" is ok ???? */
19
- #endif
20
- else { fp=fopen(fname,"w");if(!fp)fp=fopen(fname,"a"); }
21
- if (!fp) {
22
- fprintf(stderr,"could not open %s for progress output\n",fname);
23
- return -1; /* no success */
24
- }
25
- }
26
- /* fprintf(stderr,"# progress: fd=%d\n",fileno(fp)); */
27
- return 0; /* no error */
28
- }
29
-
30
- progress_counter_t *open_progress(int maxcount, const char *name){
31
- progress_counter_t *pc;
32
- pc = (progress_counter_t*) malloc( sizeof(progress_counter_t) );
33
- if (!pc) return 0; /* nonfatal */
34
- pc->starttime = time(NULL);
35
- pc->maxcount = maxcount;
36
- pc->numskip = 0;
37
- pc->lastprintcount = -1;
38
- pc->name = name;
39
- pc->lastprinttime = pc->starttime;
40
- return pc;
41
- }
42
- /* free counter */
43
- int close_progress(progress_counter_t *counter){
44
- if (counter) free(counter);
45
- return 0;
46
- }
47
- /* progress meter output
48
- * only 1output/10s, + estimated endtime (test on pixelfields)
49
- * ToDo: to stderr by default? remove subprogress, ini_progress? rm_progress?
50
- * test on tcl
51
- */
52
- int progress(int counter, progress_counter_t *pc){
53
- /* we try to save computing time, so we skip early */
54
- if ((!fp) || counter - pc->lastprintcount <= pc->numskip) return 0;
55
- {
56
- char cr='\n';
57
- time_t now = time(NULL);
58
- #if 0 /* debugging */
59
- if (counter)
60
- fprintf(fp," progress %s %3d / %d time %d skip %d\n",
61
- pc->name,counter,pc->maxcount,(int)(now - pc->starttime),
62
- pc->numskip); fflush(fp);
63
- #endif
64
- if (5*(now - pc->lastprinttime) < 2*printinterval
65
- && counter - pc->lastprintcount >= pc->numskip) { /* save for tests */
66
- if (pc->numskip < 1024) pc->numskip += pc->numskip+1;
67
- }
68
- if (3*(now - pc->lastprinttime) < 2*printinterval ) {
69
- return 0; /* to early for printing */
70
- }
71
- if (2*(now - pc->lastprinttime) > 3*printinterval ) {
72
- pc->numskip >>= 1; /* to late for printing */
73
- }
74
- if (fileno(fp)<3) cr='\r'; /* may be choosen in ini? */
75
- if (counter)
76
- fprintf(fp," progress %s %5d / %d time[s] %5d / %5d (skip=%d)%c",
77
- pc->name,counter,pc->maxcount,
78
- (int)(now - pc->starttime), /* time gone since start */
79
- (int)(now - pc->starttime)*pc->maxcount/(counter), /* estimated */
80
- pc->numskip, cr);
81
- fflush(fp);
82
- pc->lastprintcount=counter;
83
- pc->lastprinttime=now;
84
- }
85
- return 0; /* no error */
86
- }
87
- /* --------------------- end of progress output ---------------------- */
@@ -1,42 +0,0 @@
1
- /*
2
- ---------------------- progress output ----------------------
3
- output progress for GUIs to a pipe
4
- format: "counter_name" counter maxcounter time estimated_time \r|\n
5
- */
6
- #ifndef GOCR_PROGRESS_H
7
- #define GOCR_PROGRESS_H "Oct06"
8
- #include <time.h>
9
-
10
- /* initialization of progress output, fname="<fileID>","<filename>","-" */
11
- int ini_progress(char *fname);
12
-
13
- /* ToDo: add by open_* and close_* */
14
- /* place to store values for progress calculation, called often, but
15
- * dont call systime so often
16
- */
17
- typedef struct progress_counter {
18
- const char *name; /* name of counter */
19
- int lastprintcount; /* last counter printed for extrapolation */
20
- int maxcount; /* max counter */
21
- int numskip; /* num of counts to skip before timecall 0..maxcount */
22
- time_t starttime; /* start time of this counter */
23
- time_t lastprinttime; /* last time printed in seconds */
24
-
25
- } progress_counter_t;
26
-
27
- /* progress output p1=main_progress_0..100% p2=sub_progress_0..100% */
28
- /* ToDo: improved_progress: counter, maxcount(ini), counter_name(ini),
29
- * printinterval=10 # time before printing out progressmeter
30
- * *numskip=1 # if (counter-lastprintcounter<numskip) return; gettime() ...
31
- * *startutime, *lastprintutime, *lastprintcounter # numskip*=2 or /=2
32
- * only 1output/10s, + estimated endtime (test on pixelfields)
33
- * to stderr by default? remove subprogress, ini_progress? rm_progress?
34
- * test on tcl
35
- */
36
- progress_counter_t *open_progress(int maxcount, const char *name);
37
- /* free counter */
38
- int close_progress(progress_counter_t *counter);
39
- /* output progress for pc */
40
- int progress(int counter, progress_counter_t *pc);
41
- /* --------------------- end of progress output ---------------------- */
42
- #endif
@@ -1,703 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2009 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
- */
21
-
22
- #include <stdlib.h>
23
- #include <stdio.h>
24
- #include "pgm2asc.h"
25
- #include "gocr.h"
26
- #include "progress.h"
27
-
28
- /* measure mean thickness as an criteria for big chars */
29
- int mean_thickness( struct box *box2 ){
30
- int mt=0, i, y, dx=box2->x1-box2->x0+1, dy;
31
- for (y=box2->y0+1; y<box2->y1; y++) {
32
- i=loop(box2->p,box2->x0+0,y,dx,JOB->cfg.cs,0,RI);
33
- i=loop(box2->p,box2->x0+i,y,dx,JOB->cfg.cs,1,RI);
34
- mt+=i;
35
- }
36
- dy = box2->y1 - box2->y0 - 1;
37
- if (dy) mt=(mt+dy/2)/dy;
38
- return mt;
39
- }
40
-
41
- /* ---- remove dust ---------------------------------
42
- What is dust? I think, this is a very small pixel cluster without
43
- neighbours. Of course not all dust clusters can be detected correct.
44
- This feature should be possible to switch off via option.
45
- -> may be, all clusters should be stored here?
46
- speed is very slow, I know, but I am happy that it is working well
47
- */
48
- int remove_dust( job_t *job ){
49
- /* new dust removing */
50
- /* FIXME jb:remove pp */
51
- pix *pp = &job->src.p;
52
- int i1,i,j,x,y,x0,x1,y0,y1,nC,sX,sY,sP, cs,vvv=job->cfg.verbose;
53
- struct box *box2;
54
- #define HISTSIZE 220 /* histogramm size */
55
- int histo[HISTSIZE];
56
- cs=job->cfg.cs; sP=sX=sY=nC=0;
57
- /*
58
- * count number of black pixels within a box and store it in .dots
59
- * later .dots is re-used for number of objects belonging to the character
60
- * should be done in the flood-fill algorithm
61
- * volume of white pixels is estimated to big here (left/right rot)
62
- * ToDo: mean thickness of char lines?
63
- * or interval nesting (minP..maxP) to remove outriders
64
- */
65
- j=0;
66
- for (i1=0;i1<HISTSIZE;i1++) histo[i1]=0;
67
- /* mean value over every black object which is big enough */
68
- for_each_data(&(job->res.boxlist)) {
69
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
70
- if (!box2->num_frames) continue;
71
- if (box2->frame_vol[0]<0) continue; /* don't count inner holes */
72
- j = abs(box2->frame_vol[0]);
73
- if ((box2->y1-box2->y0+1)>3) {
74
- nC++; /* only count potential chars v0.42 */
75
- sX+=box2->x1 - box2->x0 + 1;
76
- sY+=box2->y1 - box2->y0 + 1;
77
- sP+=j;
78
- }
79
- if (j<HISTSIZE) histo[j]++;
80
- } end_for_each(&(job->res.boxlist));
81
-
82
- if (job->cfg.dust_size < 0 && nC > 0) { /* auto detection */
83
- /* this formula is empirically, high resolution scans have bigger dust */
84
- /* maximum allowed dustsize (min=4*7 ca. 32)
85
- * does not work for background pattern!
86
- */
87
- job->cfg.dust_size = ( ( sX/nC ) * ( sY/nC ) + 16) / 32;
88
- if (vvv) fprintf(stderr, "# remove.c remove_dust(): ");
89
- if (vvv) fprintf(stderr, "\n# dust size detection, vol num"
90
- " #obj=%d maxDust=%d mpixel= %3d mxy= %2d %2d",
91
- nC, job->cfg.dust_size, sP/nC, sX/nC, sY/nC);
92
- /* we assume that for random dust applies histo[i+1]<histo[i] */
93
- for (i=1;i+3<HISTSIZE;i++){
94
- if (vvv) fprintf(stderr,"\n# dust size histogram %3d %5d",i,histo[i]);
95
- if (histo[i]>=nC) continue; /* v0.42 lot of pixels -> bg pattern < 3 */
96
- if (i>=job->cfg.dust_size) break; /* maximum = mean size / 32 */
97
- if (histo[i/*+1*/]==0) break; /* bad statistic */
98
- if ((histo[i+2]+histo[i+3])
99
- >=(histo[i] +histo[i+1])) break; /* no noise, but to late? */
100
- if ( histo[i-1] > 1024*histo[i] &&
101
- 2*histo[i+1] >=histo[i]) break; /* bg pattern */
102
- }
103
- if (vvv) fprintf(stderr," break");
104
- if (vvv) for (i1=0,j=i+1;j<HISTSIZE;j++) {
105
- /* compressed, output only if something is changing */
106
- if (j==HISTSIZE-1 || histo[j]!=histo[j-1] || histo[j]!=histo[j+1]) {
107
- fprintf(stderr,"\n# dust size histogram %3d %5d",j,histo[j]);
108
- if (++i1>20) break; /* dont do excessive output */
109
- }
110
- }
111
- job->cfg.dust_size=i-1;
112
- /* what is the statistic of random dust?
113
- * if we have p pixels on a x*y image we should have
114
- * (p/(x*y))^1 * (x*y) = p singlets
115
- * (p/(x*y))^2 * (x*y) = p^2/(x*y) doublets and
116
- * (p/(x*y))^3 * (x*y) = p^3/(x*y)^2 triplets
117
- */
118
- if (vvv) fprintf(stderr,"\n# auto dust size = %d nC= %3d .. %3d"
119
- " avD= %2d %2d .. %2d %2d\n",
120
- job->cfg.dust_size, nC, job->res.numC,
121
- (job->res.sumX+job->res.numC/2)/job->res.numC,
122
- (job->res.sumY+job->res.numC/2)/job->res.numC, sX/nC, sY/nC);
123
- }
124
- if (job->cfg.dust_size)
125
- { i=0;
126
- if(vvv){
127
- fprintf(stderr,"# remove dust of size %2d",job->cfg.dust_size);
128
- /* Warning: better use (1/(x*y))^2 as 1/((x*y)^2),
129
- * because (x*y)^2 may overflow */
130
- fprintf(stderr," histo=%d,%d(?=%d),%d(?=%d),...\n# ...",
131
- histo[1],histo[2],histo[1]*histo[1]/(pp->x*pp->y),
132
- histo[3], histo[1]*histo[1]/(pp->x*pp->y)
133
- *histo[1]/(pp->x*pp->y));
134
- }
135
- i = 0;
136
- for_each_data(&(job->res.boxlist)) {
137
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
138
- x0=box2->x0;x1=box2->x1;y0=box2->y0;y1=box2->y1; /* box */
139
- j=abs(box2->frame_vol[0]);
140
- if(j<=job->cfg.dust_size) /* remove this tiny object */
141
- { /* here we should distinguish dust and i-dots,
142
- * may be we should sort out dots to a seperate dot list and
143
- * after line detection decide, which is dust and which not
144
- * dust should be removed to make recognition easier (ToDo)
145
- */
146
- #if 0
147
- if(get_bw((3*x0+x1)/4,(x0+3*x1)/4,y1+y1-y0+1,y1+8*(y1-y0+1),pp,cs,1))
148
- continue; /* this idea was to simple, see kscan003.jpg sample */
149
- #endif
150
- /* remove from average */
151
- job->res.numC--;
152
- job->res.sumX-=x1-x0+1;
153
- job->res.sumY-=y1-y0+1;
154
- /* remove pixels (should only be done with dust) */
155
- for(x=x0;x<=x1;x++)
156
- for(y=y0;y<=y1;y++){ put(pp,x,y,0,255&~7); }
157
- /* remove from list */
158
- list_del(&(job->res.boxlist),box2);
159
- /* free memory */
160
- free_box(box2);
161
- i++; /* count as dust particle */
162
- continue;
163
- }
164
- } end_for_each(&(job->res.boxlist));
165
- if(vvv)fprintf(stderr," %3d cluster removed, nC= %3d\n",i,job->res.numC);
166
- }
167
- /* reset dots to 0 and remove white pixels (new) */
168
- i=0;
169
- for_each_data(&(job->res.boxlist)) {
170
- box2 = ((struct box *)list_get_current(&(job->res.boxlist)));
171
- if (box2->frame_vol[0]<0) continue; /* for black areas only */
172
- x0=box2->x0;x1=box2->x1;y0=box2->y0;y1=box2->y1; /* box */
173
- if (x1-x0>16 && y1-y0>30) /* only on large enough chars */
174
- for(x=x0+1;x<=x1-1;x++)
175
- for(y=y0+1;y<=y1-1;y++){
176
- if( pixel_atp(pp,x ,y )>=cs
177
- && pixel_atp(pp,x-1,y ) <cs
178
- && pixel_atp(pp,x+1,y ) <cs
179
- && pixel_atp(pp,x ,y-1) <cs
180
- && pixel_atp(pp,x ,y+1) <cs ) /* remove it */
181
- {
182
- put(pp,x,y,0,0); i++; /* (x and 0) or 0 */
183
- }
184
- }
185
- } end_for_each(&(job->res.boxlist));
186
- if (vvv) fprintf(stderr,"# ... %3d white pixels removed, cs=%d nC= %3d\n",
187
- i,cs,job->res.numC);
188
- return 0;
189
- }
190
-
191
- /* ---- smooth big chars ---------------------------------
192
- * Big chars often do not have smooth borders, which let fail
193
- * the engine. Here we smooth the borders of big chars (>7x16).
194
- * Smoothing is important for b/w scans, where we often have
195
- * comb like pattern on a vertikal border. I also received
196
- * samples with lot of white pixels (sample: 04/02/25).
197
- * ToDo: obsolete if vector code is complete
198
- */
199
- int smooth_borders( job_t *job ){
200
- pix *pp = &job->src.p;
201
- int ii=0,x,y,x0,x1,y0,y1,dx,dy,cs,i0,i1,i2,i3,i4,n1,n2,
202
- cn[8],cm,vvv=job->cfg.verbose; /* dust found */
203
- struct box *box2;
204
- cs=job->cfg.cs; n1=n2=0;
205
- if(vvv){ fprintf(stderr,"# smooth big chars 7x16 cs=%d",cs); }
206
- /* filter for each big box */
207
- for_each_data(&(job->res.boxlist)) { n2++; /* count boxes */
208
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
209
- /* do not touch small characters! but how we define small characters? */
210
- if (box2->x1-box2->x0+1<7 || box2->y1-box2->y0+1<16 ) continue;
211
- if (box2->c==PICTURE) continue;
212
- if (mean_thickness(box2)<3) continue;
213
- n1++; /* count boxes matching big-char criteria */
214
- x0=box2->x0; y0=box2->y0;
215
- x1=box2->x1; y1=box2->y1;
216
- dx=x1-x0+1; dy=y1-y0-1;
217
- /* out_x(box2);
218
- * dont change to much! only change if absolutely sure!
219
- * ....... 1 2 3
220
- * ex: .?##### 0 * 4
221
- * ....... 7 6 5
222
- * we should also avoid removing lines by sytematic remove
223
- * from left end to the right, so we concern also about distance>1
224
- */
225
- for(x=box2->x0;x<=box2->x1;x++)
226
- for(y=box2->y0;y<=box2->y1;y++){ /* filter out high frequencies */
227
- /* this is a very primitive solution, only for learning */
228
- cn[0]=getpixel(pp,x-1,y);
229
- cn[4]=getpixel(pp,x+1,y); /* horizontal */
230
- cn[2]=getpixel(pp,x,y-1);
231
- cn[6]=getpixel(pp,x,y+1); /* vertical */
232
- cn[1]=getpixel(pp,x-1,y-1);
233
- cn[3]=getpixel(pp,x+1,y-1); /* diagonal */
234
- cn[7]=getpixel(pp,x-1,y+1);
235
- cn[5]=getpixel(pp,x+1,y+1);
236
- cm=getpixel(pp,x,y);
237
- /* check for 5 other and 3 same surrounding pixels */
238
- for (i0=0;i0<8;i0++)
239
- if ((cn[i0 ]<cs)==(cm<cs)
240
- && (cn[(i0+7) & 7]<cs)!=(cm<cs)) break; /* first same */
241
- for (i1=0;i1<8;i1++)
242
- if ((cn[(i0+i1) & 7]<cs)!=(cm<cs)) break; /* num same */
243
- for (i2=0;i2<8;i2++)
244
- if ((cn[(i0+i1+i2) & 7]<cs)==(cm<cs)) break; /* num other */
245
- cn[0]=getpixel(pp,x-2,y);
246
- cn[4]=getpixel(pp,x+2,y); /* horizontal */
247
- cn[2]=getpixel(pp,x,y-2);
248
- cn[6]=getpixel(pp,x,y+2); /* vertical */
249
- cn[1]=getpixel(pp,x-2,y-2);
250
- cn[3]=getpixel(pp,x+2,y-2); /* diagonal */
251
- cn[7]=getpixel(pp,x-2,y+2);
252
- cn[5]=getpixel(pp,x+2,y+2);
253
- /* check for 5 other and 3 same surrounding pixels */
254
- for (i0=0;i0<8;i0++)
255
- if ((cn[i0 ]<cs)==(cm<cs)
256
- && (cn[(i0+7) & 7]<cs)!=(cm<cs)) break; /* first same */
257
- for (i3=0;i3<8;i3++)
258
- if ((cn[(i0+i3) & 7]<cs)!=(cm<cs)) break; /* num same */
259
- for (i4=0;i4<8;i4++)
260
- if ((cn[(i0+i3+i4) & 7]<cs)==(cm<cs)) break; /* num other */
261
- if (i1<=3 && i2>=5 && i3>=3 && i4>=3) { /* change only on borders */
262
- ii++; /* white : black */
263
- put(pp,x,y,7,((cm<cs)?(cs|32):cs/2)&~7);
264
- #if 0
265
- printf(" x y i0 i1 i2 i3 i4 cm new cs %3d %3d"
266
- " %3d %3d %3d %3d %3d %3d %3d %3d\n",
267
- x-box2->x0,y-box2->y0,i0,i1,i2,i3,i3,cm,getpixel(pp,x,y),cs);
268
- #endif
269
- }
270
- }
271
- #if 0 /* debugging */
272
- out_x(box2);
273
- #endif
274
- } end_for_each(&(job->res.boxlist));
275
- if(vvv)fprintf(stderr," ... %3d changes in %d of %d\n",ii,n1,n2);
276
- return 0;
277
- }
278
-
279
- /* test if a corner of box1 is within box2 */
280
- int box_nested( struct box *box1, struct box *box2){
281
- /* box1 in box2, +1..-1 frame for pixel-patterns */
282
- if ( ( ( box1->x0>=box2->x0-1 && box1->x0<=box2->x1+1 )
283
- || ( box1->x1>=box2->x0-1 && box1->x1<=box2->x1+1 ) )
284
- && ( ( box1->y0>=box2->y0-1 && box1->y0<=box2->y1+1 )
285
- || ( box1->y1>=box2->y0-1 && box1->y1<=box2->y1+1 ) ) )
286
- return 1;
287
- return 0;
288
- }
289
-
290
- /* test if box1 is within box2 */
291
- int box_covered( struct box *box1, struct box *box2){
292
- /* box1 in box2, +1..-1 frame for pixel-patterns */
293
- if ( ( box1->x0>=box2->x0-1 && box1->x1<=box2->x1+1 )
294
- && ( box1->y0>=box2->y0-1 && box1->y1<=box2->y1+1 ) )
295
- return 1;
296
- return 0;
297
- }
298
-
299
- /* ---- remove pictures ------------------------------------------
300
- * may be, not deleting or moving to another list is much better!
301
- * should be renamed to remove_pictures and border boxes
302
- */
303
- int remove_pictures( job_t *job){
304
- struct box *box4,*box2;
305
- int j=0, j2=0, num_del=0;
306
-
307
- if (job->cfg.verbose)
308
- fprintf(stderr, "# "__FILE__" L%d: remove pictures\n# ...",
309
- __LINE__);
310
-
311
- /* ToDo: output a list for picture handle scripts */
312
- j=0; j2=0;
313
- if(job->cfg.verbose)
314
- for_each_data(&(job->res.boxlist)) {
315
- box4 = (struct box *)list_get_current(&(job->res.boxlist));
316
- if (box4->c==PICTURE) j++; else j2++;
317
- } end_for_each(&(job->res.boxlist));
318
- if (job->cfg.verbose)
319
- fprintf(stderr," status: pictures= %d other= %d nC= %d\n# ...",
320
- j, j2, job->res.numC);
321
-
322
- /* remove table frames */
323
- if (job->res.numC > 8)
324
- for_each_data(&(job->res.boxlist)) {
325
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
326
- if (box2->c==PICTURE
327
- && box2->num_ac==0 /* dont remove barcodes */
328
- && box2->x1-box2->x0+1>box2->p->x/2 /* big table? */
329
- && box2->y1-box2->y0+1>box2->p->y/2 ){ j=0;
330
- /* count boxes nested with the picture */
331
- for_each_data(&(job->res.boxlist)) {
332
- box4 = (struct box *)list_get_current(&(job->res.boxlist));
333
- if( box4 != box2 ) /* not count itself */
334
- if (box_nested(box4,box2)) j++; /* box4 in box2 */
335
- } end_for_each(&(job->res.boxlist));
336
- if( j>8 ){ /* remove box if more than 8 chars are within box */
337
- list_del(&(job->res.boxlist), box2); /* does not work proper ?! */
338
- free_box(box2); num_del++;
339
- }
340
- }
341
- } end_for_each(&(job->res.boxlist));
342
- if (job->cfg.verbose)
343
- fprintf(stderr, " deleted= %d pictures (table frames)\n# ...",
344
- num_del);
345
- num_del=0;
346
-
347
- /* remove dark-border-boxes (typical for hard copy of book site,
348
- * or spam random border) */
349
- if (job->res.numC > 1) /* dont remove the only char */
350
- for_each_data(&(job->res.boxlist)) {
351
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
352
- if (box2->c!=PICTURE) continue; // ToDo: PICTUREs set already?
353
- if ( box2->x1-box2->x0+1 > box2->p->x/2
354
- && box2->y1-box2->y0+1 > box2->p->y/2 ) continue;
355
- j=0;
356
- if (box2->x0==0) j++;
357
- if (box2->y0==0) j++; /* on border? */
358
- if (box2->x1==box2->p->x-1) j++;
359
- if (box2->y1==box2->p->y-1) j++;
360
- if (j>2){ /* ToDo: check corner pixel */
361
- int cs=job->cfg.cs;
362
- j=0;
363
- if (getpixel(box2->p,box2->x0,box2->y0)<cs) j++;
364
- if (getpixel(box2->p,box2->x1,box2->y0)<cs) j++;
365
- if (getpixel(box2->p,box2->x0,box2->y1)<cs) j++;
366
- if (getpixel(box2->p,box2->x1,box2->y1)<cs) j++;
367
- if (j>2) {
368
- list_del(&(job->res.boxlist), box2);
369
- free_box(box2); num_del++;
370
- }
371
- }
372
- } end_for_each(&(job->res.boxlist));
373
- if (job->cfg.verbose)
374
- fprintf(stderr, " deleted= %d pictures (on border)\n# ...",
375
- num_del);
376
- num_del=0;
377
-
378
- j=0; j2=0;
379
- if(job->cfg.verbose)
380
- for_each_data(&(job->res.boxlist)) {
381
- box4 = (struct box *)list_get_current(&(job->res.boxlist));
382
- if( box4->c==PICTURE ) j++; else j2++;
383
- } end_for_each(&(job->res.boxlist));
384
- if (job->cfg.verbose)
385
- fprintf(stderr," status: pictures= %d other= %d nC= %d\n# ...",
386
- j, j2, job->res.numC);
387
-
388
- for(j=1;j;){ j=0; /* this is only because list_del does not work */
389
- /* can be slow on gray images */
390
- for_each_data(&(job->res.boxlist)) {
391
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
392
- if( box2->c==PICTURE && box2->num_ac==0)
393
- for(j=1;j;){ /* let it grow to max before leave */
394
- j=0; box4=NULL;
395
- /* find boxes nested with the picture and remove */
396
- /* its for pictures build by compounds */
397
- for_each_data(&(job->res.boxlist)) {
398
- box4 = (struct box *)list_get_current(&(job->res.boxlist));
399
- if( box4!=box2 /* not destroy self */
400
- && (box4->num_ac==0) /* dont remove barcodes etc. */
401
- && (/* box4->c==UNKNOWN || */
402
- box4->c==PICTURE) ) /* dont remove valid chars */
403
- if(
404
- /* box4 in box2, +1..-1 frame for pixel-patterns */
405
- box_nested(box4,box2)
406
- /* or box2 in box4 */
407
- || box_nested(box2,box4) /* same? */
408
- )
409
- if ( box4->x1-box4->x0+1>2*job->res.avX
410
- || box4->x1-box4->x0+1<job->res.avX/2
411
- || box4->y1-box4->y0+1>2*job->res.avY
412
- || box4->y1-box4->y0+1<job->res.avY/2
413
- || box_covered(box4,box2) ) /* box4 completely within box2 */
414
- /* dont remove chars! see rotate45.fig */
415
- {
416
- /* do not remove boxes in inner loop (bug?) ToDo: check why! */
417
- /* instead we leave inner loop and mark box4 as valid */
418
- if( box4->x0<box2->x0 ) box2->x0=box4->x0;
419
- if( box4->x1>box2->x1 ) box2->x1=box4->x1;
420
- if( box4->y0<box2->y0 ) box2->y0=box4->y0;
421
- if( box4->y1>box2->y1 ) box2->y1=box4->y1;
422
- j=1; /* mark box4 as valid */
423
- break; /* and leave inner loop */
424
- }
425
- } end_for_each(&(job->res.boxlist));
426
- if (j!=0 && box4!=NULL) { /* check for valid box4 */
427
- /* ToDo: melt */
428
- list_del(&(job->res.boxlist), box4); /* does not work proper ?! */
429
- free_box(box4); /* break; ToDo: necessary to leave after del??? */
430
- num_del++;
431
- }
432
-
433
- }
434
- } end_for_each(&(job->res.boxlist));
435
- }
436
-
437
- if (job->cfg.verbose)
438
- fprintf(stderr, " deleted= %d nested pictures\n# ...", num_del);
439
-
440
- /* output a list for picture handle scripts */
441
- j=0; j2=0;
442
- if(job->cfg.verbose)
443
- for_each_data(&(job->res.boxlist)) {
444
- box4 = (struct box *)list_get_current(&(job->res.boxlist));
445
- if( box4->c==PICTURE ) {
446
- fprintf(stderr," found picture at %4d %4d size %4d %4d\n# ...",
447
- box4->x0, box4->y0, box4->x1-box4->x0+1, box4->y1-box4->y0+1 );
448
- j++;
449
- } else j2++;
450
- } end_for_each(&(job->res.boxlist));
451
- if (job->cfg.verbose)
452
- fprintf(stderr," status: pictures= %d other= %d nC= %d\n",
453
- j, j2, job->res.numC);
454
- return 0;
455
- }
456
-
457
-
458
-
459
- /* ---- remove melted serifs --------------------------------- v0.2.5
460
- >>v<<
461
- ##########.######## <-y0
462
- ################### like X VW etc.
463
- ...###.......###... <-y
464
- ...###......###....
465
- j1 j2 j3
466
- - can generate new boxes if two characters were glued
467
- */
468
- int remove_melted_serifs( pix *pp ){
469
- int x,y,j1,j2,j3,j4,i2,i3,i,ii,ni,cs,x0,x1,xa,xb,y0,y1,vvv=JOB->cfg.verbose;
470
- struct box *box2, *box3;
471
- progress_counter_t *pc = NULL;
472
-
473
- cs=JOB->cfg.cs; i=0; ii=0; ni=0;
474
- for_each_data(&(JOB->res.boxlist)) {
475
- ni++;
476
- } end_for_each(&(JOB->res.boxlist));
477
- pc = open_progress(ni,"remove_melted_serifs");
478
- ni = 0;
479
-
480
- if(vvv){ fprintf(stderr,"# searching melted serifs ..."); }
481
- for_each_data(&(JOB->res.boxlist)) {
482
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
483
- if (box2->c != UNKNOWN) continue; /* dont try on pictures */
484
- x0=box2->x0; x1=box2->x1;
485
- y0=box2->y0; y1=box2->y1; /* box */
486
- /* upper serifs */
487
- for(j1=x0;j1+4<x1;){
488
- j1+=loop(pp,j1,y0 ,x1-x0,cs,0,RI);
489
- x =loop(pp,j1,y0 ,x1-x0,cs,1,RI); if(j1+x>x1+1) break;
490
- y =loop(pp,j1,y0+1,x1-x0,cs,1,RI); if(y>x) x=y; if(j1+x>x1+1) break;
491
- /* measure mean thickness of serif pos: (j1,y0)-(j1+x,y0) */
492
- for(j2=j3=j4=0,i2=j1;i2<j1+x;i2++){
493
- /* 2009-07: bug, j1 used instead of i2 */
494
- i3 =loop(pp,i2,y0 ,y1-y0,cs,0,DO); if(8*i3>y1-y0) break;
495
- i3+=loop(pp,i2,y0+i3,y1-y0,cs,1,DO); if(8*i3>y1-y0) continue;
496
- if(8*i3<y1-y0){ j2+=i3; j3++; } /* sum vert. thickness */
497
- } if(j3==0){ j1+=x; continue; } /* no serif, skip this object */
498
- y = y0+(j2+j3-1)/j3+(y1-y0+1)/32; /* y0 + mean thickness + dy/32 + 1 */
499
- if (vvv&1)
500
- fprintf(stderr, "\n# upper serif x0,y0,j1-x0+x,y-y0 %4d %4d %2d+%2d %2d",
501
- x0,y0,j1-x0,x,y-y0);
502
-
503
- /* check if really melted serifs */
504
- if (loop(pp,j1,y,x1-x0,cs,0,RI)<1) { j1+=x; continue; }
505
- if(num_cross(j1 ,j1+x,y,y,pp,cs) < 2 ){ j1+=x;continue; }
506
- if (vvv&1)
507
- fprintf(stderr, " ok1");
508
- j2 = j1 + loop(pp,j1,y,x1-x0,cs,0,RI);
509
- j2 = j2 + loop(pp,j2,y,x1-x0,cs,1,RI);
510
- i3 = loop(pp,j2,y,x1-x0,cs,0,RI); if(i3<2){j1+=x;continue;}
511
- j2 += i3/2;
512
- j3 = j2 + loop(pp,j2,y ,x1-j2,cs,0,RI);
513
- i3 = j2 + loop(pp,j2,y+1,x1-j2,cs,0,RI); if(i3>j3)j3=i3;
514
- j3 = j3 + loop(pp,j3,y ,x1-j3,cs,1,RI);
515
- i3 = loop(pp,j3,y ,x1-j3,cs,0,RI);
516
- if(i3<2 || j3>=j1+x){j1+=x;continue;}
517
- j3 += i3/2;
518
-
519
- if(x>5)
520
- {
521
- i++; /* snip! */
522
- for(y=0;y<(y1-y0+1+4)/8;y++)put(pp,j2,y0+y,255,128+64); /* clear highest bit */
523
- if(vvv&4){
524
- fprintf(stderr,"\n");
525
- out_x(box2);
526
- fprintf(stderr,"# melted serifs corrected on %d %d j1=%d j3=%d",
527
- j2-x0, y, j1-x0, j3-x0);
528
- // ToDo: vector cut with line from xa,ya to xb,yb
529
- // two frames of double melted MN become one frame if cut one
530
- // of the melted serifs (new function cut_frames_at_line())
531
- }
532
- for(xb=0,xa=0;xa<(x1-x0+4)/8;xa++){ /* detect vertical gap */
533
- i3=y1;
534
- if(box2->m3>y0 && 2*y1>box2->m3+box2->m4) i3=box2->m3; /* some IJ */
535
- if( loop(pp,j2-xa,i3,i3-y0,cs,0,UP) > (y1-y0+1)/2
536
- && loop(pp,j2,(y0+y1)/2,xa+1,cs,0,LE) >=xa ){ xb=-xa; break; }
537
- if( loop(pp,j2+xa,i3,i3-y0,cs,0,UP) > (y1-y0+1)/2
538
- && loop(pp,j2,(y0+y1)/2,xa+1,cs,0,RI) >=xa ){ xb= xa; break; }
539
- }
540
- if( get_bw(j2 ,j2 ,y0,(y0+y1)/2,pp,cs,1) == 0
541
- && get_bw(j2+xb,j2+xb,(y0+y1)/2,i3,pp,cs,1) == 0 )
542
- { /* divide */
543
- box3=malloc_box(box2);
544
- box3->x1=j2-1;
545
- box2->x0=j2+1; x1=box2->x1;
546
- cut_box(box2); /* cut vectors outside the box, see box.c */
547
- cut_box(box3);
548
- box3->num=JOB->res.numC;
549
- list_ins(&(JOB->res.boxlist),box2,box3); JOB->res.numC++; ii++; /* insert box3 before box2 */
550
- if(vvv&4) fprintf(stderr," => splitted");
551
- j1=x0=box2->x0; x=0; /* hopefully ok, UVW */
552
- }
553
- }
554
- j1+=x;
555
- }
556
- /* same on lower serifs -- change this later to better function
557
- // #### ###
558
- // #### v ### # <-y
559
- // #################### <-y1
560
- // j1 j2 j3
561
- */
562
- for(j1=x0;j1<x1;){
563
- j1+=loop(pp,j1,y1 ,x1-x0,cs,0,RI);
564
- x =loop(pp,j1,y1 ,x1-x0,cs,1,RI); if(j1+x>x1+1) break;
565
- y =loop(pp,j1,y1-1,x1-x0,cs,1,RI); if(y>x) x=y; if(j1+x>x1+1) break;
566
- /* measure mean thickness of serif */
567
- for(j2=j3=j4=0,i2=j1;i2<j1+x;i2++){
568
- /* 2009-07: bug, j1 used instead of i2 */
569
- i3 =loop(pp,i2,y1 ,y1-y0,cs,0,UP); if(8*i3>y1-y0) break;
570
- i3+=loop(pp,i2,y1-i3,y1-y0,cs,1,UP); if(8*i3>y1-y0) continue;
571
- if(8*i3<y1-y0){ j2+=i3; j3++; }
572
- } if(j3==0){ j1+=x; continue; }
573
- y = y1-(j2+j3-1)/j3-(y1-y0+1)/32;
574
- if (vvv&1)
575
- fprintf(stderr, "\n# lower serif x0,y0,j1-x0+x,y1-y %4d %4d %2d+%2d %2d",
576
- x0,y0,j1-x0,x,y1-y);
577
-
578
- /* check if really melted serifs */
579
- if( loop(pp,j1,y,x1-x0,cs,0,RI)<1 ) { j1+=x; continue; }
580
- if(num_cross(j1 ,j1+x,y,y,pp,cs) < 2 ){ j1+=x;continue; }
581
- if (vvv&1) fprintf(stderr, " ok1");
582
- j2 = j1 + loop(pp,j1,y,x1-x0,cs,0,RI);
583
- j2 = j2 + loop(pp,j2,y,x1-x0,cs,1,RI);
584
- i3 = loop(pp,j2,y,x1-x0,cs,0,RI); if(i3<2){j1+=x;continue;}
585
- j2 += i3/2;
586
- j3 = j2 + loop(pp,j2,y ,x1-j2,cs,0,RI);
587
- i3 = j2 + loop(pp,j2,y-1,x1-j2,cs,0,RI); if(i3>j3)j3=i3;
588
- j3 = j3 + loop(pp,j3,y ,x1-j3,cs,1,RI);
589
- i3 = loop(pp,j3,y,x1-j3,cs,0,RI);
590
- if(i3<2 || j3>=j1+x){j1+=x;continue;}
591
- j3 += i3/2;
592
-
593
- /* y =y1-(y1-y0+1+4)/8; */
594
- if(x>5)
595
- {
596
- i++; /* snip! */
597
- for(i3=0;i3<(y1-y0+1+4)/8;i3++)
598
- put(pp,j2,y1-i3,255,128+64); /* clear highest bit */
599
- if(vvv&4){
600
- fprintf(stderr,"\n");
601
- out_x(box2);
602
- fprintf(stderr,"# melted serifs corrected on %d %d j1=%d j3=%d",j2-x0,y-y0,j1-x0,j3-x0);
603
- }
604
- for(xb=0,xa=0;xa<(x1-x0+4)/8;xa++){ /* detect vertical gap */
605
- if( loop(pp,j2-xa,y0,y1-y0,cs,0,DO) > (y1-y0+1)/2
606
- && loop(pp,j2,(y0+y1)/2,xa+1,cs,0,LE) >=xa ){ xb=-xa; break; }
607
- if( loop(pp,j2+xa,y0,y1-y0,cs,0,DO) > (y1-y0+1)/2
608
- && loop(pp,j2,(y0+y1)/2,xa+1,cs,0,RI) >=xa ){ xb= xa; break; }
609
- }
610
- if( get_bw(j2 ,j2 ,(y0+y1)/2,y1,pp,cs,1) == 0
611
- && get_bw(j2+xb,j2+xb,y0,(y0+y1)/2,pp,cs,1) == 0 )
612
- { /* divide */
613
- box3=malloc_box(box2);
614
- box3->x1=j2-1;
615
- box2->x0=j2; x1=box2->x1;
616
- cut_box(box2); /* cut vectors outside the box */
617
- cut_box(box3);
618
- box3->num=JOB->res.numC;
619
- list_ins(&(JOB->res.boxlist),box2,box3); JOB->res.numC++; ii++;
620
- /* box3,box2 in correct order??? */
621
- if(vvv&4) fprintf(stderr," => splitted");
622
- j1=x0=box2->x0; x=0; /* hopefully ok, NMK */
623
- }
624
- }
625
- j1+=x;
626
- }
627
- progress(ni++,pc);
628
- } end_for_each(&(JOB->res.boxlist));
629
- close_progress(pc);
630
- if(vvv)fprintf(stderr," %3d cluster corrected, %d new boxes\n",i,ii);
631
- return 0;
632
- }
633
-
634
- /* remove black borders often seen on bad scanned copies of books
635
- - dust around the border
636
- */
637
- int remove_rest_of_dust() {
638
- int i1, i2, vvv = JOB->cfg.verbose, x0, x1, y0, y1, cnt=0;
639
- struct box *box2, *box4;
640
- progress_counter_t *pc = NULL;
641
-
642
- i1 = i2 = 0; /* counter for removed boxes */
643
- if (vvv)
644
- fprintf(stderr, "# detect dust (avX,nC), ... ");
645
- /* remove fragments from border */
646
- for_each_data(&(JOB->res.boxlist)) {
647
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
648
- if (box2->c == UNKNOWN) {
649
- x0 = box2->x0; x1 = box2->x1;
650
- y0 = box2->y0; y1 = box2->y1; /* box */
651
- /* box in char ??? */
652
- if ( 2 * JOB->res.numC * (y1 - y0 + 1) < 3 * JOB->res.sumY
653
- && ( y1 < box2->p->y/4 || y0 > 3*box2->p->y/4 ) /* not single line */
654
- && JOB->res.numC > 1 /* do not remove everything */
655
- && ( box2->m4 == 0 ) ) /* remove this */
656
- {
657
- JOB->res.numC--; /* ToDo: dont count tiny pixels */
658
- /* ToDo: res.sumX,Y must also be corrected */
659
- i1++;
660
- list_del(&(JOB->res.boxlist), box2);
661
- free_box(box2);
662
- }
663
- }
664
- } end_for_each(&(JOB->res.boxlist));
665
-
666
- pc = open_progress(JOB->res.boxlist.n,"remove_dust2");
667
- for_each_data(&(JOB->res.boxlist)) {
668
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
669
- progress(cnt++,pc);
670
- if (box2->c == PICTURE) continue;
671
- x0 = box2->x0; x1 = box2->x1;
672
- y0 = box2->y0; y1 = box2->y1; /* box */
673
- /* remove tiny box2 if to far away from bigger boxes */
674
- /* ToDo: remove clouds of tiny pixels (count near small, compare with num bigger) */
675
- /* 0.42: remove far away pixel? ToDo: do it at earlier? */
676
- if (x1-x0+1<3 && y1-y0+1<3){
677
- int xn, yn, xs, ys;
678
- int found=0; /* nearest bigger box */
679
- /* search near bigger box */
680
- for_each_data(&(JOB->res.boxlist)) {
681
- box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
682
- if (found || box4 == box2) continue;
683
- if (box4->x1-box4->x0+1<3 && box4->y1-box4->y0+1<3) continue;
684
- xs = box4->x1-box4->x0+1;
685
- ys = box4->y1-box4->y0+1;
686
- xn = abs((box4->x0+box4->x1)/2 - box2->x0);
687
- yn = abs((box4->y0+box4->y1)/2 - box2->y0);
688
- if (2*xn < 3*xs && 2*yn < 3*ys) { found=1; }
689
- } end_for_each(&(JOB->res.boxlist));
690
- if (!found) { /* found nothing, box2 to far from big boxes */
691
- i2++;
692
- list_del(&(JOB->res.boxlist), box2);
693
- free_box(box2);
694
- }
695
- }
696
- } end_for_each(&(JOB->res.boxlist));
697
- close_progress(pc);
698
- if (vvv)
699
- fprintf(stderr, " %3d + %3d boxes deleted, nC= %d ?\n",
700
- i1, i2, JOB->res.numC);
701
-
702
- return 0;
703
- }