isbn 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. data/{README → README.md} +5 -11
  2. data/Rakefile +20 -14
  3. data/isbn.gemspec +23 -0
  4. data/lib/isbn.rb +2 -0
  5. data/test/isbn_spec.rb +1 -1
  6. metadata +29 -316
  7. data/VERSION +0 -1
  8. data/src/gocr-0.48/.cvsignore +0 -6
  9. data/src/gocr-0.48/AUTHORS +0 -7
  10. data/src/gocr-0.48/BUGS +0 -55
  11. data/src/gocr-0.48/CREDITS +0 -17
  12. data/src/gocr-0.48/HISTORY +0 -243
  13. data/src/gocr-0.48/INSTALL +0 -83
  14. data/src/gocr-0.48/Makefile +0 -193
  15. data/src/gocr-0.48/Makefile.in +0 -193
  16. data/src/gocr-0.48/README +0 -165
  17. data/src/gocr-0.48/READMEde.txt +0 -80
  18. data/src/gocr-0.48/REMARK.txt +0 -18
  19. data/src/gocr-0.48/REVIEW +0 -538
  20. data/src/gocr-0.48/TODO +0 -65
  21. data/src/gocr-0.48/bin/.cvsignore +0 -2
  22. data/src/gocr-0.48/bin/create_db +0 -38
  23. data/src/gocr-0.48/bin/gocr.tcl +0 -527
  24. data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
  25. data/src/gocr-0.48/configure +0 -4689
  26. data/src/gocr-0.48/configure.in +0 -71
  27. data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
  28. data/src/gocr-0.48/doc/.cvsignore +0 -2
  29. data/src/gocr-0.48/doc/Makefile +0 -39
  30. data/src/gocr-0.48/doc/Makefile.in +0 -39
  31. data/src/gocr-0.48/doc/example.dtd +0 -53
  32. data/src/gocr-0.48/doc/example.xml +0 -21
  33. data/src/gocr-0.48/doc/examples.txt +0 -67
  34. data/src/gocr-0.48/doc/gocr.html +0 -578
  35. data/src/gocr-0.48/doc/unicode.txt +0 -57
  36. data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
  37. data/src/gocr-0.48/examples/4x6.png +0 -0
  38. data/src/gocr-0.48/examples/4x6.txt +0 -2
  39. data/src/gocr-0.48/examples/5x7.png +0 -0
  40. data/src/gocr-0.48/examples/5x7.png.txt +0 -2
  41. data/src/gocr-0.48/examples/5x8.png +0 -0
  42. data/src/gocr-0.48/examples/5x8.png.txt +0 -2
  43. data/src/gocr-0.48/examples/Makefile +0 -166
  44. data/src/gocr-0.48/examples/color.fig +0 -20
  45. data/src/gocr-0.48/examples/ex.fig +0 -16
  46. data/src/gocr-0.48/examples/font.tex +0 -22
  47. data/src/gocr-0.48/examples/font1.tex +0 -46
  48. data/src/gocr-0.48/examples/font2.fig +0 -27
  49. data/src/gocr-0.48/examples/font_nw.tex +0 -24
  50. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  51. data/src/gocr-0.48/examples/handwrt1.txt +0 -10
  52. data/src/gocr-0.48/examples/inverse.fig +0 -20
  53. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
  56. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-a.txt +0 -6
  58. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  59. data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
  60. data/src/gocr-0.48/examples/polish.tex +0 -28
  61. data/src/gocr-0.48/examples/rotate45.fig +0 -14
  62. data/src/gocr-0.48/examples/score +0 -36
  63. data/src/gocr-0.48/examples/text.tex +0 -28
  64. data/src/gocr-0.48/gpl.html +0 -537
  65. data/src/gocr-0.48/include/.cvsignore +0 -2
  66. data/src/gocr-0.48/include/config.h +0 -36
  67. data/src/gocr-0.48/include/config.h.in +0 -36
  68. data/src/gocr-0.48/include/version.h +0 -2
  69. data/src/gocr-0.48/install-sh +0 -3
  70. data/src/gocr-0.48/make.bat +0 -57
  71. data/src/gocr-0.48/man/.cvsignore +0 -2
  72. data/src/gocr-0.48/man/Makefile +0 -29
  73. data/src/gocr-0.48/man/Makefile.in +0 -29
  74. data/src/gocr-0.48/man/man1/gocr.1 +0 -166
  75. data/src/gocr-0.48/src/.cvsignore +0 -4
  76. data/src/gocr-0.48/src/Makefile +0 -132
  77. data/src/gocr-0.48/src/Makefile.in +0 -132
  78. data/src/gocr-0.48/src/amiga.h +0 -31
  79. data/src/gocr-0.48/src/barcode.c +0 -846
  80. data/src/gocr-0.48/src/barcode.c.orig +0 -593
  81. data/src/gocr-0.48/src/barcode.h +0 -11
  82. data/src/gocr-0.48/src/box.c +0 -372
  83. data/src/gocr-0.48/src/database.c +0 -462
  84. data/src/gocr-0.48/src/detect.c +0 -943
  85. data/src/gocr-0.48/src/gocr.c +0 -373
  86. data/src/gocr-0.48/src/gocr.h +0 -288
  87. data/src/gocr-0.48/src/jconv.c +0 -168
  88. data/src/gocr-0.48/src/job.c +0 -84
  89. data/src/gocr-0.48/src/lines.c +0 -350
  90. data/src/gocr-0.48/src/list.c +0 -334
  91. data/src/gocr-0.48/src/list.h +0 -90
  92. data/src/gocr-0.48/src/ocr0.c +0 -6756
  93. data/src/gocr-0.48/src/ocr0.h +0 -63
  94. data/src/gocr-0.48/src/ocr0n.c +0 -1475
  95. data/src/gocr-0.48/src/ocr1.c +0 -85
  96. data/src/gocr-0.48/src/ocr1.h +0 -3
  97. data/src/gocr-0.48/src/otsu.c +0 -289
  98. data/src/gocr-0.48/src/otsu.h +0 -23
  99. data/src/gocr-0.48/src/output.c +0 -289
  100. data/src/gocr-0.48/src/output.h +0 -37
  101. data/src/gocr-0.48/src/pcx.c +0 -153
  102. data/src/gocr-0.48/src/pcx.h +0 -9
  103. data/src/gocr-0.48/src/pgm2asc.c +0 -2893
  104. data/src/gocr-0.48/src/pgm2asc.h +0 -105
  105. data/src/gocr-0.48/src/pixel.c +0 -537
  106. data/src/gocr-0.48/src/pnm.c +0 -533
  107. data/src/gocr-0.48/src/pnm.h +0 -35
  108. data/src/gocr-0.48/src/progress.c +0 -87
  109. data/src/gocr-0.48/src/progress.h +0 -42
  110. data/src/gocr-0.48/src/remove.c +0 -703
  111. data/src/gocr-0.48/src/tga.c +0 -87
  112. data/src/gocr-0.48/src/tga.h +0 -6
  113. data/src/gocr-0.48/src/unicode.c +0 -1314
  114. data/src/gocr-0.48/src/unicode.h +0 -1257
  115. data/src/jpeg-7/Makefile.am +0 -133
  116. data/src/jpeg-7/Makefile.in +0 -1089
  117. data/src/jpeg-7/README +0 -322
  118. data/src/jpeg-7/aclocal.m4 +0 -8990
  119. data/src/jpeg-7/ansi2knr.1 +0 -36
  120. data/src/jpeg-7/ansi2knr.c +0 -739
  121. data/src/jpeg-7/cderror.h +0 -132
  122. data/src/jpeg-7/cdjpeg.c +0 -181
  123. data/src/jpeg-7/cdjpeg.h +0 -187
  124. data/src/jpeg-7/change.log +0 -270
  125. data/src/jpeg-7/cjpeg.1 +0 -325
  126. data/src/jpeg-7/cjpeg.c +0 -616
  127. data/src/jpeg-7/ckconfig.c +0 -402
  128. data/src/jpeg-7/coderules.txt +0 -118
  129. data/src/jpeg-7/config.guess +0 -1561
  130. data/src/jpeg-7/config.sub +0 -1686
  131. data/src/jpeg-7/configure +0 -17139
  132. data/src/jpeg-7/configure.ac +0 -317
  133. data/src/jpeg-7/depcomp +0 -630
  134. data/src/jpeg-7/djpeg.1 +0 -251
  135. data/src/jpeg-7/djpeg.c +0 -617
  136. data/src/jpeg-7/example.c +0 -433
  137. data/src/jpeg-7/filelist.txt +0 -215
  138. data/src/jpeg-7/install-sh +0 -520
  139. data/src/jpeg-7/install.txt +0 -1097
  140. data/src/jpeg-7/jaricom.c +0 -148
  141. data/src/jpeg-7/jcapimin.c +0 -282
  142. data/src/jpeg-7/jcapistd.c +0 -161
  143. data/src/jpeg-7/jcarith.c +0 -921
  144. data/src/jpeg-7/jccoefct.c +0 -453
  145. data/src/jpeg-7/jccolor.c +0 -459
  146. data/src/jpeg-7/jcdctmgr.c +0 -482
  147. data/src/jpeg-7/jchuff.c +0 -1612
  148. data/src/jpeg-7/jcinit.c +0 -65
  149. data/src/jpeg-7/jcmainct.c +0 -293
  150. data/src/jpeg-7/jcmarker.c +0 -667
  151. data/src/jpeg-7/jcmaster.c +0 -770
  152. data/src/jpeg-7/jcomapi.c +0 -106
  153. data/src/jpeg-7/jconfig.bcc +0 -48
  154. data/src/jpeg-7/jconfig.cfg +0 -45
  155. data/src/jpeg-7/jconfig.dj +0 -38
  156. data/src/jpeg-7/jconfig.mac +0 -43
  157. data/src/jpeg-7/jconfig.manx +0 -43
  158. data/src/jpeg-7/jconfig.mc6 +0 -52
  159. data/src/jpeg-7/jconfig.sas +0 -43
  160. data/src/jpeg-7/jconfig.st +0 -42
  161. data/src/jpeg-7/jconfig.txt +0 -155
  162. data/src/jpeg-7/jconfig.vc +0 -45
  163. data/src/jpeg-7/jconfig.vms +0 -37
  164. data/src/jpeg-7/jconfig.wat +0 -38
  165. data/src/jpeg-7/jcparam.c +0 -632
  166. data/src/jpeg-7/jcprepct.c +0 -358
  167. data/src/jpeg-7/jcsample.c +0 -545
  168. data/src/jpeg-7/jctrans.c +0 -381
  169. data/src/jpeg-7/jdapimin.c +0 -396
  170. data/src/jpeg-7/jdapistd.c +0 -275
  171. data/src/jpeg-7/jdarith.c +0 -762
  172. data/src/jpeg-7/jdatadst.c +0 -151
  173. data/src/jpeg-7/jdatasrc.c +0 -212
  174. data/src/jpeg-7/jdcoefct.c +0 -736
  175. data/src/jpeg-7/jdcolor.c +0 -396
  176. data/src/jpeg-7/jdct.h +0 -393
  177. data/src/jpeg-7/jddctmgr.c +0 -382
  178. data/src/jpeg-7/jdhuff.c +0 -1309
  179. data/src/jpeg-7/jdinput.c +0 -384
  180. data/src/jpeg-7/jdmainct.c +0 -512
  181. data/src/jpeg-7/jdmarker.c +0 -1360
  182. data/src/jpeg-7/jdmaster.c +0 -663
  183. data/src/jpeg-7/jdmerge.c +0 -400
  184. data/src/jpeg-7/jdpostct.c +0 -290
  185. data/src/jpeg-7/jdsample.c +0 -361
  186. data/src/jpeg-7/jdtrans.c +0 -136
  187. data/src/jpeg-7/jerror.c +0 -252
  188. data/src/jpeg-7/jerror.h +0 -304
  189. data/src/jpeg-7/jfdctflt.c +0 -174
  190. data/src/jpeg-7/jfdctfst.c +0 -230
  191. data/src/jpeg-7/jfdctint.c +0 -4348
  192. data/src/jpeg-7/jidctflt.c +0 -242
  193. data/src/jpeg-7/jidctfst.c +0 -368
  194. data/src/jpeg-7/jidctint.c +0 -5137
  195. data/src/jpeg-7/jinclude.h +0 -91
  196. data/src/jpeg-7/jmemansi.c +0 -167
  197. data/src/jpeg-7/jmemdos.c +0 -638
  198. data/src/jpeg-7/jmemdosa.asm +0 -379
  199. data/src/jpeg-7/jmemmac.c +0 -289
  200. data/src/jpeg-7/jmemmgr.c +0 -1118
  201. data/src/jpeg-7/jmemname.c +0 -276
  202. data/src/jpeg-7/jmemnobs.c +0 -109
  203. data/src/jpeg-7/jmemsys.h +0 -198
  204. data/src/jpeg-7/jmorecfg.h +0 -369
  205. data/src/jpeg-7/jpegint.h +0 -395
  206. data/src/jpeg-7/jpeglib.h +0 -1135
  207. data/src/jpeg-7/jpegtran.1 +0 -272
  208. data/src/jpeg-7/jpegtran.c +0 -546
  209. data/src/jpeg-7/jquant1.c +0 -856
  210. data/src/jpeg-7/jquant2.c +0 -1310
  211. data/src/jpeg-7/jutils.c +0 -179
  212. data/src/jpeg-7/jversion.h +0 -14
  213. data/src/jpeg-7/libjpeg.map +0 -4
  214. data/src/jpeg-7/libjpeg.txt +0 -3067
  215. data/src/jpeg-7/ltmain.sh +0 -8406
  216. data/src/jpeg-7/makcjpeg.st +0 -36
  217. data/src/jpeg-7/makdjpeg.st +0 -36
  218. data/src/jpeg-7/makeadsw.vc6 +0 -77
  219. data/src/jpeg-7/makeasln.vc9 +0 -33
  220. data/src/jpeg-7/makecdep.vc6 +0 -82
  221. data/src/jpeg-7/makecdsp.vc6 +0 -130
  222. data/src/jpeg-7/makecmak.vc6 +0 -159
  223. data/src/jpeg-7/makecvcp.vc9 +0 -186
  224. data/src/jpeg-7/makeddep.vc6 +0 -82
  225. data/src/jpeg-7/makeddsp.vc6 +0 -130
  226. data/src/jpeg-7/makedmak.vc6 +0 -159
  227. data/src/jpeg-7/makedvcp.vc9 +0 -186
  228. data/src/jpeg-7/makefile.ansi +0 -220
  229. data/src/jpeg-7/makefile.bcc +0 -291
  230. data/src/jpeg-7/makefile.dj +0 -226
  231. data/src/jpeg-7/makefile.manx +0 -220
  232. data/src/jpeg-7/makefile.mc6 +0 -255
  233. data/src/jpeg-7/makefile.mms +0 -224
  234. data/src/jpeg-7/makefile.sas +0 -258
  235. data/src/jpeg-7/makefile.unix +0 -234
  236. data/src/jpeg-7/makefile.vc +0 -217
  237. data/src/jpeg-7/makefile.vms +0 -142
  238. data/src/jpeg-7/makefile.wat +0 -239
  239. data/src/jpeg-7/makejdep.vc6 +0 -423
  240. data/src/jpeg-7/makejdsp.vc6 +0 -285
  241. data/src/jpeg-7/makejdsw.vc6 +0 -29
  242. data/src/jpeg-7/makejmak.vc6 +0 -425
  243. data/src/jpeg-7/makejsln.vc9 +0 -17
  244. data/src/jpeg-7/makejvcp.vc9 +0 -328
  245. data/src/jpeg-7/makeproj.mac +0 -213
  246. data/src/jpeg-7/makerdep.vc6 +0 -6
  247. data/src/jpeg-7/makerdsp.vc6 +0 -78
  248. data/src/jpeg-7/makermak.vc6 +0 -110
  249. data/src/jpeg-7/makervcp.vc9 +0 -133
  250. data/src/jpeg-7/maketdep.vc6 +0 -43
  251. data/src/jpeg-7/maketdsp.vc6 +0 -122
  252. data/src/jpeg-7/maketmak.vc6 +0 -131
  253. data/src/jpeg-7/maketvcp.vc9 +0 -178
  254. data/src/jpeg-7/makewdep.vc6 +0 -6
  255. data/src/jpeg-7/makewdsp.vc6 +0 -78
  256. data/src/jpeg-7/makewmak.vc6 +0 -110
  257. data/src/jpeg-7/makewvcp.vc9 +0 -133
  258. data/src/jpeg-7/makljpeg.st +0 -68
  259. data/src/jpeg-7/maktjpeg.st +0 -30
  260. data/src/jpeg-7/makvms.opt +0 -4
  261. data/src/jpeg-7/missing +0 -376
  262. data/src/jpeg-7/rdbmp.c +0 -439
  263. data/src/jpeg-7/rdcolmap.c +0 -253
  264. data/src/jpeg-7/rdgif.c +0 -38
  265. data/src/jpeg-7/rdjpgcom.1 +0 -63
  266. data/src/jpeg-7/rdjpgcom.c +0 -515
  267. data/src/jpeg-7/rdppm.c +0 -459
  268. data/src/jpeg-7/rdrle.c +0 -387
  269. data/src/jpeg-7/rdswitch.c +0 -365
  270. data/src/jpeg-7/rdtarga.c +0 -500
  271. data/src/jpeg-7/structure.txt +0 -945
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +0 -4
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +0 -1533
  279. data/src/jpeg-7/transupp.h +0 -205
  280. data/src/jpeg-7/usage.txt +0 -605
  281. data/src/jpeg-7/wizard.txt +0 -211
  282. data/src/jpeg-7/wrbmp.c +0 -442
  283. data/src/jpeg-7/wrgif.c +0 -399
  284. data/src/jpeg-7/wrjpgcom.1 +0 -103
  285. data/src/jpeg-7/wrjpgcom.c +0 -583
  286. data/src/jpeg-7/wrppm.c +0 -269
  287. data/src/jpeg-7/wrrle.c +0 -305
  288. data/src/jpeg-7/wrtarga.c +0 -253
@@ -1,37 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address */
20
-
21
- #ifndef OUTPUT_H
22
- #define OUTPUT_H
23
-
24
- #include <stdlib.h>
25
- #include <stdio.h>
26
- #include "pnm.h"
27
- #include "gocr.h"
28
- #include "list.h"
29
-
30
- void out_b(struct box *px, pix *b, int x0, int y0, int dx, int dy, int cs );
31
- void out_x(struct box *px);
32
- void out_x2(struct box *box1,struct box *box2);
33
- int output_list(job_t *job);
34
- int debug_img(char *fname, struct job_s *job, int opt);
35
-
36
-
37
- #endif
@@ -1,153 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 1999 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
- */
21
- /* plan: use popen("ppm2pcx -packed ...","w"); for writing pcx */
22
-
23
- #include <stdio.h>
24
- #include <stdlib.h>
25
- /* #include <assert.h> */
26
-
27
- #include "pcx.h"
28
-
29
- typedef unsigned char byte;
30
-
31
- #define ERR(x) { fprintf(stderr,"ERROR "__FILE__" L%d: " x "\n",__LINE__);exit(1);}
32
-
33
- int err;
34
- /* --- needed for reading PCX-files */
35
- unsigned char read_b(FILE *f1){
36
- unsigned char c=0; c=fgetc(f1); if(feof(f1) || ferror(f1))err=1; return c;
37
- }
38
-
39
- /* something here is wrong! */
40
- void readpcx(char *name,pix *p,int vvv){ /* see pcx.format.txt */
41
- int page,pages,nx,ny,i,j,b,x,y,bpl,bits,pal[256][3];
42
- FILE *f1;
43
- unsigned char *pic,h[128],bb,b1,b2,b3;
44
- err=0;
45
- for(i=0;i<256;i++)for(j=0;j<3;j++)pal[i][j]=i;
46
- f1=fopen(name,"rb"); if(!f1) ERR("open");
47
- if(fread(h,1,128,f1)!=128)ERR("read PCX header"); /* 128 Byte lesen -> h[] */
48
- if(h[0]!=10)ERR("no ZSoft sign"); /* ZSoft sign */
49
- if(h[2]> 1)ERR("unknown coding"); /* run length encoding */
50
- bits = h[3]; /* 1 or 8 */
51
- if(bits!=1 && bits!=8)ERR("only 1 or 8 bits supported");
52
- nx = h[ 9]*256+h[ 8] - h[ 5]*256-h[ 4] +1; /* Xmax-Xmin */
53
- ny = h[11]*256+h[10] - h[ 7]*256-h[ 6] +1; /* Ymax-Ymin */
54
- pages=h[65]; bpl=h[66]+256*h[67]; /* bytes per line */
55
- if(vvv)
56
- fprintf(stderr,"# PCX version=%d bits=%d x=%d y=%d HRes=%d VRes=%d\n"
57
- "# NPlanes=%d BytesPerLine=%d Palette=%s",
58
- h[1],bits,nx,ny,h[12]+256*h[13],h[14]+256*h[15],
59
- pages,bpl,((h[68]==1)?"1=color/bw":"2=gray"));
60
- /* line1(NP=4): RRRRR...,GGGG....,BBBBB...,IIII...., line2: RRRR...,GGGG.... */
61
- /* C4 EF = (C4&3F)*EF = EF EF EF EF */
62
- fflush(stdout);
63
- /* palette: for(i=0;i<16;i++) for(j=0;j<3;j++) h[16+3*i+j] */
64
- if(pages>1)for(b=0;b<16;b++) for(i=0;i<16;i++)
65
- for(j=0;j< 3;j++) pal[b*16+i][j]=h[16+3*i+j]>>2;
66
- if(bits>7){
67
- fseek(f1,-3*256,2); if(fread(pal,3,256,f1)!=256)ERR("read palette");
68
- for(i=0;i<256;i++) for(j=0;j<3;j++) pal[i][j]>>=2;
69
- }
70
- fseek(f1,128,0);
71
- pic=(unsigned char *)malloc( nx*ny );
72
- if(pic==NULL)ERR("no memory"); /* no memory */
73
- x=y=0;
74
- do {
75
- for(page=0;page<pages;page++) /* 192 == 0xc0 => b1=counter */
76
- do {
77
- b1=1; bb=read_b(f1); b2=bb; if(b1==192)fprintf(stderr,"?");
78
- if((b2>=192) && (h[2]==1)){b1=b2&63;bb=read_b(f1);b2=bb;}
79
- if(err){fprintf(stderr,"\nread error x=%d y=%d\n",x,y);x=nx;y=ny;break;}
80
- for(b3=0;b3<b1;b3++)for(b=0;b<8;b+=bits,x++)if(x<nx){
81
- bb=(b2>>(8-bits-b)) & ~((~0)<<bits);
82
- if(bits==1 && bb==1) bb=240;
83
- if(page==0) pic[x+nx*y] =(byte)bb;
84
- else pic[x+nx*y]|=(byte)bb<<(page*bits);
85
- }
86
- } while(x<(9-bits)*bpl); x=0; y++;
87
- } while(y<ny);
88
- /* */
89
- fclose(f1);
90
- p->p=pic; p->x=nx; p->y=ny; p->bpp=1;
91
- if(vvv)fprintf(stderr,"\n");
92
- }
93
-
94
- /* -----------------------------------------------------------------------
95
- // write bmp 8bit palette no RLE
96
- // bit 2+3 used for color coding (markers)
97
- // replaced by writeppm (ppm.gz) and is obsolate now, removed later
98
- */
99
- void writebmp(char *name,pix p,int vvv){ /* see pcx.format.txt */
100
- int nx,ny,i,y,rest[4]={0,0,0,0};
101
- FILE *f1;
102
- /*FIXME jb static*/static unsigned char *pic, h[54+4*256];
103
- long fs,fo,hs,is; /* filesize, offset, headersize, imagesize */
104
-
105
- nx=p.x; ny=p.y; pic=p.p;
106
- if (nx&3) nx+=4-(nx&3); /* must be mod4 ? */
107
- hs=40; /* bmi headersize fix */
108
- is=nx*ny; /* imagesize */
109
- fo=14+hs+4*256;
110
- fs=fo+is;
111
- for(i=0;i<54;i++){ h[i]=0; }
112
- /* BITMAPFILEHEADER */
113
- h[ 0]='B'; h[ 1]='M'; /* type of file BMP */
114
- h[ 2]= fs &255; h[ 3]=(fs>> 8)&255;
115
- h[ 4]=(fs>>16)&255; h[ 5]=(fs>>24)&255; /* size of file */
116
- h[10]= fo &255; h[11]=(fo>> 8)&255;
117
- h[12]=(fo>>16)&255; h[13]=(fo>>24)&255; /* offset to image data */
118
- /* BITMAPINFO (BITMAPCOREHEADER not used here) */
119
- /* 14 - HEADER */
120
- h[14]= hs &255; h[15]=(hs>> 8)&255;
121
- h[16]=(hs>>16)&255; h[17]=(hs>>24)&255; /* bmi-header size */
122
- h[18]= nx &255; h[19]=(nx>> 8)&255;
123
- h[20]=(0l>>16)&255; h[21]=(0l>>24)&255; /* WIDTH/pixel */
124
- h[22]= ny &255; h[23]=(ny>> 8)&255;
125
- h[24]=(0l>>16)&255; h[25]=(0l>>24)&255; /* HIGH/pixel */
126
- h[26]=1; /* planes */
127
- h[28]=8; /* bits/pixel 1,4,8,24 */
128
- h[30]=0; /* compression */
129
- h[34]= is &255; h[35]=(is>> 8)&255;
130
- h[36]=(is>>16)&255; h[37]=(is>>24)&255; /* sizeImage (can be 0 if ~RLE) */
131
- h[38]=0;h[39]=1; /* ca 100dpi, x/meter */
132
- h[42]=0;h[43]=1; /* y/meter */
133
- h[46]=0;h[47]=1; /* colorused (0=maximum) */
134
- h[50]=0;h[51]=1; /* colorimportand (0=all) */
135
- /* 54 - endofheader */
136
- for(i=0;i<256;i++){
137
- h[54+4*i+0]=((~((i & 2)*64)) & (i & (128+64)))|63;
138
- h[54+4*i+1]=((~((i & 2)*64)) & (~((i & 4)*32)) & (i & (128+64)))|63;
139
- h[54+4*i+2]=( ((i & 2)* 8) | ((~((i & 4)*32)) & (i & (128+64)))|63);
140
- } /* blue-green-red */
141
- f1=fopen(name,"wb"); if(!f1) fprintf(stderr," error opening file\n");
142
- if(!f1)ERR("open"); /* open-error */
143
- if(fwrite(h,1,54+4*256,f1)!=54+4*256)ERR("write head");
144
- if(vvv) fprintf(stderr,"# write BMP x=%d y=%d\n",nx,ny);
145
- for(y=ny-1;y>=0;y--){
146
- if(((int)fwrite(pic+p.x*y,1,p.x,f1))!=p.x)ERR("write");
147
- if(nx>p.x)
148
- if(((int)fwrite(rest,1,nx-p.x,f1))!=nx-p.x)ERR("write");
149
- }
150
- fclose(f1);
151
- }
152
-
153
- /* ---------------------------------------------------------------------- */
@@ -1,9 +0,0 @@
1
-
2
- #include "pnm.h"
3
-
4
- void readpcx(char *name,pix *p,int vvv);
5
-
6
- /* write 8bit palette no RLE, ToDo: obsolete? */
7
- void writebmp(char *name,pix p,int vvv);
8
-
9
- /* ------------------------------------------------------------------------ */
@@ -1,2893 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2009 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
-
21
- sometimes I have written comments in german language, sorry for that
22
-
23
- - look for ??? for preliminary code
24
- - space: avX=22 11-13 (empirical estimated)
25
- avX=16 5-7
26
- avX= 7 5-6
27
-
28
- ToDo: - add filter (r/s mismatch) g300c1
29
- - better get_line2 function (problems on high resolution)
30
- - write parallelizable code!
31
- - learnmode (optimize filter)
32
- - use ispell for final control or if unsure
33
- - better line scanning (if not even)
34
- - step 5: same chars differ? => expert mode
35
- - chars dx>dy and above 50% hor-crossing > 4 is char-group ?
36
- - detect color of chars and background
37
- - better word space calculation (look at the examples)
38
- (distance: left-left, middle-middle, left-right, thickness of e *0.75)
39
-
40
- GLOBAL DATA (mostly structures)
41
- - pix : image - one byte per pixel bits0-2=working
42
- - lines : rows of the text (points to pix)
43
- - box : list of bounding box for character
44
- - obj : objects (lines, splines, etc. building a character)
45
- */
46
-
47
-
48
- #include <stdlib.h>
49
- #include <stdio.h>
50
- #include <assert.h>
51
- #include <string.h>
52
- #include <ctype.h>
53
- #include "config.h"
54
- #ifdef HAVE_WCHAR_H
55
- #include <wchar.h>
56
- #endif
57
-
58
- #include "amiga.h"
59
- #include "list.h"
60
- #include "pgm2asc.h"
61
- // #include "pcx.h" /* needed for writebmp (removed later) */
62
- /* ocr1 is the test-engine - remember: this is development version */
63
- #include "ocr1.h"
64
- /* first engine */
65
- #include "ocr0.h"
66
- #include "otsu.h"
67
- #include "barcode.h"
68
- #include "progress.h"
69
-
70
- #include "gocr.h"
71
-
72
- /* wew: will be exceeded by capitals at 1200dpi */
73
- #define MaxBox (100*200) // largest possible letter (buffersize)
74
- #define MAX(a,b) ((a) >= (b) ? (a) : (b))
75
-
76
- /* if the system does not know about wchar.h, define functions here */
77
- #ifndef HAVE_WCHAR_H
78
- /* typedef unsigned wchar_t; */
79
- /* Find the first occurrence of WC in WCS. */
80
- wchar_t *wcschr (wchar_t *wcs, wchar_t wc) {
81
- int i; for(i=0;wcs[i];i++) if (wcs[i]==wc) return wcs+i; return NULL;
82
- }
83
- wchar_t *wcscpy (wchar_t *dest, const wchar_t *src) {
84
- int i; for(i=0;src[i];i++) dest[i]=src[i]; dest[i]=0; return dest;
85
- }
86
- size_t wcslen (const wchar_t *s){
87
- size_t i; for(i=0;s[i];i++); return i;
88
- }
89
- #endif
90
- #ifndef HAVE_WCSDUP
91
- wchar_t * wcsdup (const wchar_t *WS) { /* its a gnu extension */
92
- wchar_t *copy;
93
- copy = (wchar_t *) malloc((wcslen(WS)+1)*sizeof(wchar_t));
94
- if (!copy)return NULL;
95
- wcscpy(copy, WS);
96
- return copy;
97
- }
98
- #endif
99
-
100
- // ------------------------ feature extraction -----------------
101
- // -------------------------------------------------------------
102
- // detect maximas in of line overlaps (return in %) and line coordinates
103
- // this is for future use
104
- #define HOR 1 // horizontal
105
- #define VER 2 // vertical
106
- #define RIS 3 // rising=steigend
107
- #define FAL 4 // falling=fallend
108
-
109
- /* exchange two variables */
110
- static void swap(int *a, int *b) {
111
- int c = *a;
112
- *a = *b;
113
- *b = c;
114
- }
115
-
116
- // calculate the overlapping of the line (0-1) with black points
117
- // by recursive bisection
118
- // line: y=dy/dx*x+b, implicit form: d=F(x,y)=dy*x-dx*y+b*dx=0
119
- // incremental y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y))
120
- // ret & 1 => inverse pixel!
121
- // d=2*F(x,y) integer numbers
122
- int get_line(int x0, int y0, int x1, int y1, pix *p, int cs, int ret){
123
- int dx,dy,incrE,incrNE,d,x,y,r0,r1,ty,tx,
124
- *px,*py,*pdx,*pdy,*ptx,*pty,*px1;
125
- dx=abs(x1-x0); tx=((x1>x0)?1:-1); // tx=x-spiegelung (new)
126
- dy=abs(y1-y0); ty=((y1>y0)?1:-1); // ty=y-spiegelung (new)
127
- // rotate coordinate system if dy>dx
128
- /*bbg: can be faster if instead of pointers we use the variables and swaps? */
129
- /*js: Do not know, I am happy that the current code is working and is small */
130
- if(dx>dy){ pdx=&dx;pdy=&dy;px=&x;py=&y;ptx=&tx;pty=&ty;px1=&x1; }
131
- else { pdx=&dy;pdy=&dx;px=&y;py=&x;ptx=&ty;pty=&tx;px1=&y1; }
132
- if( *ptx<0 ){ swap(&x0,&x1);swap(&y0,&y1);tx=-tx;ty=-ty; }
133
- d=((*pdy)<<1)-(*pdx); incrE=(*pdy)<<1; incrNE=((*pdy)-(*pdx))<<1;
134
- x=x0; y=y0; r0=r1=0; /* dd=tolerance (store max drift) */
135
- while( (*px)<=(*px1) ){
136
- if( ((getpixel(p,x,y)<cs)?1:0)^(ret&1) ) r0++; else r1++;
137
- (*px)++; if( d<=0 ){ d+=incrE; } else { d+=incrNE; (*py)+=(*pty); }
138
- }
139
- return (r0*(ret&~1))/(r0+r1); // ret==100 => percentage %
140
- }
141
-
142
- // this function should detect whether a direct connection between points
143
- // exists or not, not finally implemented
144
- // ret & 1 => inverse pixel!
145
- // d=2*F(x,y) integer numbers, ideal line: ,I pixel: I@
146
- // ..@ @@@ .@. ...,@2@. +1..+3 floodfill around line ???
147
- // ..@ .@@ .@. ...,.@@@ +2..+4 <= that's not implemented yet
148
- // ..@ ..@ .@. ...,.@@@ +2..+4
149
- // @.@ @.. .@. ...,@@@. +1..+3
150
- // @.@ @@. .@. ...I@@@. 0..+3
151
- // @@@ @@@ .@. ..@1@@.. 0..+2
152
- // 90% 0% 100% 90% r1-r2
153
- // I am not satisfied with it
154
- int get_line2(int x0, int y0, int x1, int y1, pix *p, int cs, int ret){
155
- int dx,dy,incrE,incrNE,d,x,y,r0,r1,ty,tx,q,ddy,rx,ry,
156
- *px,*py,*pdx,*pdy,*ptx,*pty,*px1;
157
- dx=abs(x1-x0); tx=((x1>x0)?1:-1); // tx=x-spiegelung (new)
158
- dy=abs(y1-y0); ty=((y1>y0)?1:-1); // ty=y-spiegelung (new)
159
- // rotate coordinate system if dy>dx
160
- if(dx>dy){ pdx=&dx;pdy=&dy;px=&x;py=&y;ptx=&tx;pty=&ty;px1=&x1;rx=1;ry=0; }
161
- else { pdx=&dy;pdy=&dx;px=&y;py=&x;ptx=&ty;pty=&tx;px1=&y1;rx=0;ry=1; }
162
- if( *ptx<0 ){ swap(&x0,&x1);swap(&y0,&y1);tx=-tx;ty=-ty; }
163
- d=((*pdy)<<1)-(*pdx); incrE=(*pdy)<<1; incrNE=((*pdy)-(*pdx))<<1;
164
- x=x0; y=y0; r0=r1=0; ddy=3; // tolerance = bit 1 + bit 0 = left+right
165
- // int t=(*pdx)/16,tl,tr; // tolerance, left-,right delimiter
166
- while( (*px)<=(*px1) ){ // not finaly implemented
167
- q=((getpixel(p,x,y)<cs)?1:0)^(ret&1);
168
- if ( !q ){ // tolerance one pixel perpenticular to the line
169
- // what about 2 or more pixels tolerance???
170
- ddy&=(~1)|(((getpixel(p,x+ry,y+rx)<cs)?1:0)^(ret&1));
171
- ddy&=(~2)|(((getpixel(p,x-ry,y-rx)<cs)?1:0)^(ret&1))*2;
172
- } else ddy=3;
173
- if( ddy ) r0++; else r1++;
174
- (*px)++; if( d<=0 ){ d+=incrE; } else { d+=incrNE; (*py)+=(*pty); }
175
- }
176
- return (r0*(ret&~1))/(r0+r1); // ret==100 => percentage %
177
- }
178
-
179
- /* Look for dots in the rectangular region x0 <= x <= x1 and y0 <= y
180
- <= y1 in pixmap p. The two low order bits in mask indicate the color
181
- of dots to look for: If mask==1 then look for black dots (where a
182
- pixel value less than cs is considered black). If mask==2 then look
183
- for white dots. If mask==3 then look for both black and white dots.
184
- If the dots are found, the corresponding bits are set in the returned
185
- value. Heavily used by the engine ocr0*.cc */
186
- char get_bw(int x0, int x1, int y0, int y1, pix * p, int cs, int mask) {
187
- char rc = 0; // later with error < 2% (1 dot)
188
- int x, y;
189
-
190
- if (x0 < 0) x0 = 0;
191
- if (x1 >= p->x) x1 = p->x - 1;
192
- if (y0 < 0) y0 = 0;
193
- if (y1 >= p->y) y1 = p->y - 1;
194
-
195
- for ( y = y0; y <= y1; y++)
196
- for ( x = x0; x <= x1; x++) {
197
- rc |= ((getpixel(p, x, y) < cs) ? 1 : 2); // break if rc==3
198
- if ((rc & mask) == mask)
199
- return mask; // break loop
200
- }
201
- return (rc & mask);
202
- }
203
-
204
- /* more general Mar2000 (x0,x1,y0,y1 instead of x0,y0,x1,y1! (history))
205
- * look for black crossings throw a line from x0,y0 to x1,y1 and count them
206
- * follow line and count crossings ([white]-black-transitions)
207
- * ex: horizontal num_cross of 'm' would return 3
208
- *
209
- * fail for: .a... a-to-b counts no transitions, but there is
210
- * ...#.
211
- * ..#..
212
- * .#..b
213
- */
214
- int num_cross(int x0, int x1, int y0, int y1, pix *p, int cs) {
215
- int rc = 0, col = 0, k, x, y, i, d; // rc=crossings col=0=white
216
- int dx = x1 - x0, dy = y1 - y0;
217
-
218
- d = MAX(abs(dx), abs(dy));
219
- for (i = 0, x = x0, y = y0; i <= d; i++) {
220
- if (d) {
221
- x = x0 + i * dx / d;
222
- y = y0 + i * dy / d;
223
- }
224
- k = ((getpixel(p, x, y) < cs) ? 1 : 0); // 0=white 1=black
225
- if (col == 0 && k == 1) // found a white-black transition
226
- rc++;
227
- col = k; // last color
228
- }
229
- return rc;
230
- }
231
-
232
- /* check if test matches pattern
233
- * possible pattern: "a-zA-Z0-9+--\\" (x-y dont work for c>127)
234
- * return: 0 means dont fit, 1 means found
235
- * ToDo: wchar_t cc + matching UTF-8 pattern for nonASCII
236
- */
237
- int my_strchr( char *pattern, wchar_t cc ) {
238
- char *s1;
239
- if (pattern==(char *)NULL) return 0;
240
-
241
- /* if (!(cc&0x80)) s1=strchr(pattern,(char)cc); else */
242
- switch (cc) {
243
- case '-': /* used as a special character */
244
- s1=strstr(pattern,"--"); /* search string -- in pattern */
245
- if (s1) return 1; break;
246
- default:
247
- s1=strstr(pattern,decode(cc, UTF8)); /* search string cc in pattern */
248
- if (s1) return 1; /* cc simply matches */
249
- /* single char not found, now check the ranges */
250
- s1=pattern;
251
- while (s1) {
252
- s1=strchr(s1+1,'-'); /* look for next '-' */
253
- if ((!s1) || (!s1[0]) || (!s1[1])) return 0; /* nothing found or end */
254
- if (*(s1-1)=='-' || *(s1+1)=='-') continue; /* skip -- pattern */
255
- if (*(s1-1)<=cc && *(s1+1)>=cc) return 1; /* within range */
256
- }
257
- }
258
- return 0;
259
- }
260
-
261
- /* set alternate chars and its weight, called from the engine
262
- if a char is recognized to (weight) percent
263
- can be used for filtering (only numbers etc)
264
- often usefull if Il1 are looking very similar
265
- should this function stay in box.c ???
266
- weight is between 0 and 100 in percent, 100 means absolutely sure
267
- - not final, not time critical (js)
268
- - replace it by a string-function setaobj(*b,"string",weight)
269
- and let call setac the setas function
270
- */
271
-
272
- int setas(struct box *b, char *as, int weight){
273
- int i,j;
274
- if (b->num_ac > NumAlt || b->num_ac<0) {
275
- fprintf(stderr,"\nDBG: There is something wrong with setas()!");
276
- b->num_ac=0;
277
- }
278
- if (as==NULL) {
279
- fprintf(stderr,"\nDBG: setas(NULL) makes no sense!"); return 0; }
280
- if (as[0]==0) {
281
- fprintf(stderr,"\nDBG: setas(\"\") makes no sense!"
282
- " x= %d %d", b->x0, b->y0);
283
- // out_x(b);
284
- return 0;
285
- }
286
-
287
- /* char filter (ex: only numbers) ToDo: cfilter as UTF-8 */
288
- if (JOB->cfg.cfilter) {
289
- /* do not accept chars which are not in the cfilter string */
290
- if ( as[0]>0 && as[1]==0 )
291
- if ( !my_strchr(JOB->cfg.cfilter,as[0]) ) return 0;
292
- }
293
- #if 0 /* obsolete, done in setac */
294
- /* not sure that this is the right place, but where else? */
295
- if ( as[0]>0 && as[1]==0 )
296
- if (b->modifier != SPACE && b->modifier != 0) {
297
- wchar_t newac;
298
- newac = compose(as[0], b->modifier);
299
- as = (char *)decode(newac, UTF8); /* was (const char *) */
300
- if (newac == as[0]) { /* nothing composed */
301
- fprintf(stderr, "\nDBG setas compose was useless %d %d",b->x0,b->y0);
302
- // out_x(b);
303
- }
304
- }
305
- #endif
306
-
307
- /* only the first run gets the full weight */
308
- weight=(100-JOB->tmp.n_run)*weight/100;
309
-
310
- /* remove same entries from table */
311
- for (i=0;i<b->num_ac;i++)
312
- if (b->tas[i])
313
- if (strcmp(as,b->tas[i])==0) break;
314
- if (b->num_ac>0 && i<b->num_ac){
315
- if (weight<=b->wac[i]) return 0; /* if found + less weight ignore it */
316
- /* to insert the new weigth on the right place, we remove it first */
317
- if (b->tas[i]) free(b->tas[i]);
318
- for (j=i;j<b->num_ac-1;j++){ /* shift lower entries */
319
- b->tac[j]=b->tac[j+1]; /* copy the char */
320
- b->tas[j]=b->tas[j+1]; /* copy the pointer to the string */
321
- b->wac[j]=b->wac[j+1]; /* copy the weight */
322
- }
323
- b->num_ac--; /* shrink table */
324
- }
325
- /* sorting and add it to the table */
326
- for (i=0;i<b->num_ac;i++) if (weight>b->wac[i]) break;
327
- if (b->num_ac<NumAlt-1) b->num_ac++; /* enlarge table */
328
- for (j=b->num_ac-1;j>i;j--){ /* shift lower entries */
329
- b->tac[j]=b->tac[j-1]; /* copy the char */
330
- b->tas[j]=b->tas[j-1]; /* copy the pointer to the string */
331
- b->wac[j]=b->wac[j-1]; /* copy the weight */
332
- }
333
- if (i<b->num_ac) { /* insert new entry */
334
- b->tac[i]=0; /* insert the char=0 ... */
335
- b->tas[i]=(char *)malloc(strlen(as)+1); /* ... string */
336
- if (b->tas[i]) memcpy(b->tas[i],as,strlen(as)+1);
337
- b->wac[i]=weight; /* ... and its weight */
338
- }
339
- if (i==0) b->c=b->tac[0]; /* char or 0 for string */
340
- return 0;
341
- }
342
-
343
- /* ToDo: this function will be replaced by a call of setas() later */
344
- int setac(struct box *b, wchar_t ac, int weight){
345
- int i,j;
346
- if ((!b) || b->num_ac > NumAlt || b->num_ac<0) {
347
- fprintf(stderr,"\nDBG: This is a bad call to setac()!");
348
- if(b && (JOB->cfg.verbose & 6)) out_x(b);
349
- b->num_ac=0;
350
- }
351
- if (ac==0 || ac==UNKNOWN) {
352
- fprintf(stderr,"\nDBG: setac(0) makes no sense!");
353
- return 0;
354
- }
355
- /* char filter (ex: only numbers) ToDo: cfilter as UTF-8 */
356
- if (JOB->cfg.cfilter) {
357
- /* do not accept chars which are not in the cfilter string */
358
- /* if ( ac>255 || !strchr(JOB->cfg.cfilter,(char)ac) ) return 0; */
359
- if ( !my_strchr(JOB->cfg.cfilter,ac) ) return 0;
360
- }
361
- /* not sure that this is the right place, but where else? */
362
- if (b->modifier != SPACE && b->modifier != 0) {
363
- wchar_t newac;
364
- newac = compose(ac, b->modifier);
365
- if (newac == ac) { /* nothing composed */
366
- if(JOB->cfg.verbose & 7)
367
- fprintf(stderr, "\nDBG setac(%s): compose was useless @ %d %d",
368
- decode(ac,ASCII), b->x0, b->y0);
369
- /* if(JOB->cfg.verbose & 6) out_x(b); */
370
- }
371
- ac = newac;
372
- }
373
-
374
- /* only the first run gets the full weight */
375
- weight=(100-JOB->tmp.n_run)*weight/100;
376
-
377
- /* remove same entries from table */
378
- for (i=0;i<b->num_ac;i++) if (ac==b->tac[i]) break;
379
- if (b->num_ac>0 && i<b->num_ac){
380
- if (weight<=b->wac[i]) return 0;
381
- if (b->tas[i]) free(b->tas[i]);
382
- for (j=i;j<b->num_ac-1;j++){ /* shift lower entries */
383
- b->tac[j]=b->tac[j+1]; /* copy the char */
384
- b->tas[j]=b->tas[j+1]; /* copy the pointer to the string */
385
- b->wac[j]=b->wac[j+1]; /* copy the weight */
386
- }
387
- b->num_ac--; /* shrink table */
388
- }
389
- /* sorting it to the table */
390
- for (i=0;i<b->num_ac;i++) if (weight>b->wac[i]) break;
391
- if (b->num_ac<NumAlt-1) b->num_ac++; /* enlarge table */
392
- for (j=b->num_ac-1;j>i;j--){ /* shift lower entries */
393
- b->tac[j]=b->tac[j-1]; /* copy the char */
394
- b->tas[j]=b->tas[j-1]; /* copy the pointer to the string */
395
- b->wac[j]=b->wac[j-1]; /* copy the weight */
396
- }
397
- if (i<b->num_ac) { /* insert new entry */
398
- b->tac[i]=ac; /* insert the char ... */
399
- b->tas[j]=NULL; /* ... no string (?) */
400
- b->wac[i]=weight; /* ... and its weight */
401
- }
402
- if (i==0) b->c=ac; /* store best result to b->c (will be obsolete) */
403
-
404
- return 0;
405
- }
406
-
407
- /* test if ac in wac-table
408
- usefull for contextcorrection and box-splitting
409
- return 0 if not found
410
- return wac if found (wac>0)
411
- */
412
- int testac(struct box *b, wchar_t ac){
413
- int i;
414
- if (b->num_ac > NumAlt || b->num_ac<0) {
415
- fprintf(stderr,"\n#DEBUG: There is something wrong with testac()!");
416
- b->num_ac=0;
417
- }
418
- /* search entries in table */
419
- for (i=0;i<b->num_ac;i++) if (ac==b->tac[i]) return b->wac[i];
420
- return 0;
421
- }
422
-
423
-
424
- /* look for edges: follow a line from x0,y0 to x1,y1, record the
425
- * location of each transition, and return their number.
426
- * ex: horizontal num_cross of 'm' would return 6
427
- * remark: this function is not used, obsolete? ToDo: remove?
428
- */
429
- int follow_path(int x0, int x1, int y0, int y1, pix *p, int cs, path_t *path) {
430
- int rc = 0, prev, x, y, i, d, color; // rc=crossings col=0=white
431
- int dx = x1 - x0, dy = y1 - y0;
432
-
433
- d = MAX(abs(dx), abs(dy));
434
- prev = getpixel(p, x0, y0) < cs; // 0=white 1=black
435
- path->start = prev;
436
- for (i = 1, x = x0, y = y0; i <= d; i++) {
437
- if (d) {
438
- x = x0 + i * dx / d;
439
- y = y0 + i * dy / d;
440
- }
441
- color = getpixel(p, x, y) < cs; // 0=white 1=black
442
- if (color != prev){
443
- if (rc>=path->max){
444
- int n=path->max*2+10;
445
- path->x = (int *) xrealloc(path->x, n*sizeof(int));
446
- path->y = (int *) xrealloc(path->y, n*sizeof(int));
447
- path->max = n;
448
- }
449
- path->x[rc]=x;
450
- path->y[rc]=y;
451
- rc++;
452
- }
453
- prev = color;
454
- }
455
- path->num=rc;
456
- return rc;
457
- }
458
-
459
- /* ToDo: only used in follow_path, which is obsolete, remove? */
460
- void *xrealloc(void *ptr, size_t size){
461
- void *p;
462
- p = realloc(ptr, size);
463
- if (size>0 && (!p)){
464
- fprintf(stderr, "insufficient memory");
465
- exit(1);
466
- }
467
- return p;
468
- }
469
-
470
- /*
471
- * -------------------------------------------------------------
472
- * mark edge-points
473
- * - first move forward until b/w-edge
474
- * - more than 2 pixel?
475
- * - loop around
476
- * - if forward pixel : go up, rotate right
477
- * - if forward no pixel : rotate left
478
- * - stop if found first 2 pixel in same order
479
- * go_along_the_right_wall strategy is very similar and used otherwhere
480
- * --------------------------------------------------------------
481
- * turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
482
- * out: last-position
483
- *
484
- * could be used to extract more features:
485
- * by counting stepps, dead-end streets ,xmax,ymax,ro-,ru-,lo-,lu-edges
486
- *
487
- * use this little animal to find features, I first was happy about it
488
- * but now I prefer the loop() function
489
- */
490
-
491
- void turmite(pix *p, int *x, int *y,
492
- int x0, int x1, int y0, int y1, int cs, int rw, int rb) {
493
- int r;
494
- if (outbounds(p, x0, y0)) // out of pixmap
495
- return;
496
- while (*x >= x0 && *y >= y0 && *x <= x1 && *y <= y1) {
497
- r = ((getpixel(p, *x, *y) < cs) ? rb : rw); // select rule
498
- switch (r) {
499
- case UP: (*y)--; break;
500
- case DO: (*y)++; break;
501
- case RI: (*x)++; break;
502
- case LE: (*x)--; break;
503
- case ST: break;
504
- default: assert(0);
505
- }
506
- if( r==ST ) break; /* leave the while-loop */
507
- }
508
- }
509
-
510
- /* search a way from p0 to p1 without crossing pixels of type t
511
- * only two directions, useful to test if there is a gap 's'
512
- * labyrinth algorithm - do you know a faster way? */
513
- int joined(pix *p, int x0, int y0, int x1, int y1, int cs){
514
- int t,r,x,y,dx,dy,xa,ya,xb,yb;
515
- x=x0;y=y0;dx=1;dy=0;
516
- if(x1>x0){xa=x0;xb=x1;} else {xb=x0;xa=x1;}
517
- if(y1>y0){ya=y0;yb=y1;} else {yb=y0;ya=y1;}
518
- t=((getpixel(p,x,y)<cs)?1:0);
519
- for(;;){
520
- if( t==((getpixel(p,x+dy,y-dx)<cs)?1:0) // right free?
521
- && x+dy>=xa && x+dy<=xb && y-dx>=ya && y-dx<=yb) // wall
522
- { r=dy;dy=-dx;dx=r;x+=dx;y+=dy; } // rotate right and step forward
523
- else { r=dx;dx=-dy;dy=r; } // rotate left
524
- // fprintf(stderr," path xy %d-%d %d-%d %d %d %d %d\n",xa,xb,ya,yb,x,y,dx,dy);
525
- if( x==x1 && y==y1 ) return 1;
526
- if( x==x0 && y==y0 && dx==1) return 0;
527
- }
528
- // return 0; // endless loop ?
529
- }
530
-
531
- /* move from x,y to direction r until pixel of color col is found
532
- * or maximum of l steps
533
- * return the number of steps done */
534
- int loop(pix *p,int x,int y,int l,int cs,int col, DIRECTION r){
535
- int i=0;
536
- if(x>=0 && y>=0 && x<p->x && y<p->y){
537
- switch (r) {
538
- case UP:
539
- for( ;i<l && y>=0;i++,y--)
540
- if( (getpixel(p,x,y)<cs)^col )
541
- break;
542
- break;
543
- case DO:
544
- for( ;i<l && y<p->y;i++,y++)
545
- if( (getpixel(p,x,y)<cs)^col )
546
- break;
547
- break;
548
- case LE:
549
- for( ;i<l && x>=0;i++,x--)
550
- if( (getpixel(p,x,y)<cs)^col )
551
- break;
552
- break;
553
- case RI:
554
- for( ;i<l && x<p->x;i++,x++)
555
- if( (getpixel(p,x,y)<cs)^col )
556
- break;
557
- break;
558
- default:;
559
- }
560
- }
561
- return i;
562
- }
563
-
564
- /* Given a point, frames a rectangle containing all points of the same
565
- * color surrounding it, and mark these points.
566
- * ToDo: obsolate and replaced by frame_vector
567
- *
568
- * looking for better algo: go horizontally and look for upper/lower non_marked_pixel/nopixel
569
- * use lowest three bits for mark
570
- * - recursive version removed! AmigaOS has no Stack-OVL-Event
571
- * run around the chape using laby-robot
572
- * bad changes can lead to endless loop!
573
- * - this is not absolutely sure but mostly works well
574
- * diag - 0: only pi/2 direction, 1: pi/4 directions (diagonal)
575
- * mark - 3 bit marker, mark each valid pixel with it
576
- */
577
- int frame_nn(pix *p, int x, int y,
578
- int *x0, int *x1, int *y0, int *y1, // enlarge frame
579
- int cs, int mark,int diag){
580
- #if 1 /* flood-fill to detect black objects, simple and faster? */
581
- int rc = 0, dx, col, maxstack=0; static int overflow=0;
582
- int bmax=1024, blen=0, *buf; /* buffer as replacement for recursion stack */
583
-
584
- /* check bounds */
585
- if (outbounds(p, x, y)) return 0;
586
- /* check if already marked (with mark since v0.4) */
587
- if ((marked(p,x,y)&mark)==mark) return 0;
588
-
589
- col = ((getpixel(p, x, y) < cs) ? 0 : 1);
590
- buf=(int *)malloc(bmax*sizeof(int)*2);
591
- if (!buf) { fprintf(stderr,"malloc failed (frame_nn)\n");return 0;}
592
- buf[0]=x;
593
- buf[1]=y;
594
- blen=1;
595
-
596
- g_debug(fprintf(stderr,"\nframe_nn x=%4d y=%4d",x,y);)
597
- for ( ; blen ; ) {
598
- /* max stack depth is complexity of the object */
599
- if (blen>maxstack) maxstack=blen;
600
- blen--; /* reduce the stack */
601
- x=buf[blen*2+0];
602
- y=buf[blen*2+1];
603
- if (y < *y0) *y0 = y;
604
- if (y > *y1) *y1 = y;
605
- /* first go to leftmost pixel */
606
- for ( ; x>0 && (col == ((getpixel(p, x-1, y) < cs) ? 0 : 1)) ; x--);
607
- if ((marked(p,x,y)&mark)==mark) continue; /* already scanned */
608
- for (dx=-1;dx<2;dx+=2) /* look at upper and lower line, left */
609
- if ( diag && x<p->x && x-1>0 && y+dx >=0 && y+dx < p->y
610
- && col != ((getpixel(p, x , y+dx) < cs) ? 0 : 1)
611
- && col == ((getpixel(p, x-1, y+dx) < cs) ? 0 : 1)
612
- && !((marked(p,x-1,y+dx)&mark)==mark)
613
- ) {
614
- if (blen+1>=bmax) { overflow|=1; continue; }
615
- buf[blen*2+0]=x-1;
616
- buf[blen*2+1]=y+dx;
617
- blen++;
618
- }
619
- if (x < *x0) *x0 = x;
620
- /* second go right, mark and get new starting points */
621
- for ( ; x<p->x && (col == ((getpixel(p, x , y) < cs) ? 0 : 1)) ; x++) {
622
- p->p[x + y * p->x] |= (mark & 7); rc++; /* mark pixel */
623
- /* enlarge frame */
624
- if (x > *x1) *x1 = x;
625
- for (dx=-1;dx<2;dx+=2) /* look at upper and lower line */
626
- if ( col == ((getpixel(p, x , y+dx) < cs) ? 0 : 1)
627
- && (
628
- col != ((getpixel(p, x-1, y ) < cs) ? 0 : 1)
629
- || col != ((getpixel(p, x-1, y+dx) < cs) ? 0 : 1) )
630
- && !((marked(p,x,y+dx)&mark)==mark) && y+dx<p->y && y+dx>=0
631
- ) {
632
- if (blen+1>=bmax) { overflow|=1; continue; }
633
- buf[blen*2+0]=x;
634
- buf[blen*2+1]=y+dx;
635
- blen++;
636
- }
637
- }
638
- for (dx=-1;dx<2;dx+=2) /* look at upper and lower line, right */
639
- if ( diag && x<p->x && x-1>0 && y+dx >=0 && y+dx < p->y
640
- && col == ((getpixel(p, x-1, y ) < cs) ? 0 : 1)
641
- && col != ((getpixel(p, x , y ) < cs) ? 0 : 1)
642
- && col != ((getpixel(p, x-1, y+dx) < cs) ? 0 : 1)
643
- && col == ((getpixel(p, x , y+dx) < cs) ? 0 : 1)
644
- && !((marked(p,x,y+dx)&mark)==mark)
645
- ) {
646
- if (blen+1>=bmax) { overflow|=1; continue; }
647
- buf[blen*2+0]=x;
648
- buf[blen*2+1]=y+dx;
649
- blen++;
650
- }
651
- }
652
-
653
- /* debug, ToDo: use info maxstack and pixels for image classification */
654
- g_debug(fprintf(stderr," maxstack= %4d pixels= %6d",maxstack,rc);)
655
- if (overflow==1){
656
- overflow|=2;
657
- fprintf(stderr,"# Warning: frame_nn stack oerflow\n");
658
- }
659
- free(buf);
660
- #else /* old version, ToDo: improve it for tmp04/005*.pgm.gz */
661
- int i, j, d, dx, ox, oy, od, nx, ny, rc = 0, rot = 0, x2 = x, y2 = y, ln;
662
-
663
- static const int d0[8][2] = { { 0, -1} /* up */, {-1, -1},
664
- {-1, 0} /* left */, {-1, 1},
665
- { 0, 1} /* down */, { 1, 1},
666
- { 1, 0} /* right */, { 1, -1}};
667
-
668
- /* check bounds */
669
- if (outbounds(p, x, y))
670
- return 0;
671
- /* check if already marked */
672
- if ((marked(p,x,y)&mark)==mark)
673
- return 0;
674
-
675
- i = ((getpixel(p, x, y) < cs) ? 0 : 1);
676
- rc = 0;
677
-
678
- g_debug(fprintf(stderr," start frame:");)
679
-
680
- for (ln = 0; ln < 2 && rot >= 0; ln++) { // repeat if right-loop
681
- g_debug(fprintf(stderr," ln=%d diag=%d cs=%d x=%d y=%d - go to border\n",ln,diag,cs,x,y);)
682
-
683
- od=d=(8+4*ln-diag)&7; // start robot looks up, right is a wall
684
- // go to right (left) border
685
- if (ln==1) {
686
- x=x2; y=y2;
687
- }
688
- /* start on leftmost position */
689
- for (dx = 1 - 2*ln; x + dx < p->x && x + dx >= 0 /* bounds */ &&
690
- i == ((getpixel(p, x + dx, y) < cs) ? 0 : 1) /* color */;
691
- x += dx);
692
-
693
- g_debug(fprintf(stderr," ln=%d diag=%d cs=%d x=%d y=%d\n",ln,diag,cs,x,y);)
694
-
695
- /* robot stores start-position */
696
- ox = x; oy = y;
697
- for (rot = 0; abs(rot) <= 64; ) { /* for sure max. 8 spirals */
698
- /* leftmost position */
699
- if (ln == 0 && x < x2) {
700
- x2 = x; y2 = y;
701
- }
702
-
703
- g_debug(fprintf(stderr," x=%3d y=%3d d=%d i=%d p=%3d rc=%d\n",x,y,d,i,getpixel(p,x,y),rc);)
704
-
705
- if ( abs(d0[d][1]) ) { /* mark left (right) pixels */
706
- for (j = 0, dx = d0[d][1]; x + j >= 0 && x + j < p->x
707
- && i == ((getpixel(p, x + j, y) < cs) ? 0 : 1); j += dx) {
708
- if (!((marked(p, x + j, y)&mark)==mark))
709
- rc++;
710
- p->p[x + j + y * p->x] |= (mark & 7);
711
- }
712
- }
713
- /* look to the front of robot */
714
- nx = x + d0[d][0];
715
- ny = y + d0[d][1];
716
- /* if right is a wall */
717
- if ( outbounds(p, nx, ny) || i != ((getpixel(p,nx,ny)<cs) ? 0 : 1) ) {
718
- /* rotate left */
719
- d=(d+2-diag) & 7; rot-=2-diag;
720
- }
721
- else { /* if no wall, go, turn back and rotate left */
722
- x=nx; y=ny; d=(d+4+2-diag) & 7; rot+=2-diag+4;
723
- /* enlarge frame */
724
- if (x < *x0) *x0 = x;
725
- if (x > *x1) *x1 = x;
726
- if (y < *y0) *y0 = y;
727
- if (y > *y1) *y1 = y;
728
- }
729
- if(x==ox && y==oy && d==od) break; // round trip finished
730
- }
731
- }
732
- g_debug(fprintf(stderr," rot=%d\n",rot);)
733
- #endif
734
- return rc;
735
- }
736
-
737
- /* obsolete! replaced by vectors
738
- * mark neighbouring pixel of same color, return number
739
- * better with neighbours of same color (more general) ???
740
- * parameters: (&~7)-pixmap, start-point, critical_value, mark
741
- * recursion is removed */
742
- int mark_nn(pix * p, int x, int y, int cs, int r) {
743
- /* out of bounds or already marked? */
744
- if (outbounds(p, x, y) || (marked(p, x, y)&r)==r)
745
- return 0;
746
- {
747
- int x0, x1, y0, y1;
748
- x0 = x1 = x;
749
- y0 = y1 = y; // not used
750
- return frame_nn(p, x, y, &x0, &x1, &y0, &y1, cs, r, JOB->tmp.n_run & 1);
751
- // using same scheme
752
- }
753
- }
754
-
755
- /* ToDo: finish to replace old frame by this new one
756
- *
757
- * @...........#@@@@@@@. # = marked as already scanned black pixels
758
- * @........@@@@@@@@@@@# only left and right border
759
- * .......#@@@@@@@@@@@@@ left side on even y
760
- * ......@@@@@@@@#.@@@@# right side on odd y
761
- * .....#@@@@@......#@@@ no border is marked twice
762
- * ....@@@@@#......@@@#. works also for thinn lines
763
- * ...#@@@@........#@@@. - outer loop is stored as first
764
- * ..@@@@#........@@@#.. - inner loop is stored as second
765
- * .#@@@@........#@@@@.. 1st in an extra box (think on white chars)
766
- * @@@@#.......@@@@#.... 2nd merge in an extra step
767
- * #@@@@@....#@@@@@.....
768
- * @@@@@@@@@@@@@@#......
769
- * .#@@@@@@@@@@@@.......
770
- *
771
- * run around the chape using laby-robot
772
- * - used for scanning boxes, look for horizontal b/w transitions
773
- * with unmarked black pixels and call this routine
774
- * - stop if crossing a marked box in same direction (left=up, right=down)
775
- * box - char box, store frame_vectors and box
776
- * x,y - starting point
777
- * mark - 3 bit marker, mark each valid pixel with it
778
- * diag - 0: only pi/2 direction, 1: pi/4 directions (diagonal)
779
- * ds - start direction, 6=right of right border, 2=left of left border
780
- * ret - 0=ok, -1=already marked, -2=max_num_frames_exceeded
781
- * -7=no border in direction ds
782
- */
783
- #if 0
784
- #undef g_debug
785
- #define g_debug(x) x
786
- #endif
787
- /* grep keywords: scan_vectors frame_vector */
788
- int frame_vector(struct box *box1, int x, int y,
789
- int cs, int mark, int diag, int ds) {
790
- int i1, i2, i2o,
791
- new_x=1, /* flag for storing the vector x,y */
792
- steps=1, /* steps between stored vectors, speedup for big frames */
793
- d, /* direction */
794
- ox, oy, /* starting point */
795
- nx, ny, mx, my, /* used for simplification */
796
- /* ToDo: add periphery to box (german: Umfang?) */
797
- rc = 1, /* return code, circumference, sum vector lengths */
798
- rot = 0, /* memory for rotation, rot=8 means one full rotation */
799
- vol = 0; /* volume inside frame, negative for white inside black */
800
- pix *p=box1->p;
801
-
802
- /* translate the 8 directions to (x,y) pairs,
803
- * if only four directions are used, only every 2nd vector is accessed,
804
- * +1 turn left, -1 turn right
805
- */
806
- static const int d0[8][2] =
807
- { { 0, -1}, /* up */ {-1, -1}, /* up-le */
808
- {-1, 0}, /* left */ {-1, 1}, /* do-le */
809
- { 0, 1}, /* down */ { 1, 1}, /* do-ri */
810
- { 1, 0}, /* right */ { 1, -1} }; /* up-ri */
811
-
812
- /* check bounds */
813
- if (outbounds(p, x, y))
814
- return 0;
815
-
816
- /* pixel color we are looking for, 0=black, 1=white */
817
- d = ds;
818
- i1 = ((getpixel(p, x, y ) < cs) ? 0 : 1);
819
- i2 = ((getpixel(p, x + d0[d][0], y + d0[d][1]) < cs) ? 0 : 1);
820
-
821
- g_debug(fprintf(stderr,"\nLEV2 frame_vector @ %3d %3d d%d %2d %2d"
822
- " %d-%d pix=%3d mark=%d cs=%d",\
823
- x,y,ds,d0[ds][0],d0[ds][1],i1,i2,getpixel(p,x,y),mark,cs);)
824
-
825
- if (i1==i2){
826
- fprintf(stderr,"ERROR frame_vector: no border\n");
827
- return -7; /* no border detected */
828
- }
829
-
830
- /* initialize boxframe outside this function
831
- box1->x0=box1->x1=x;
832
- box1->y0=box1->y1=y;
833
- */
834
-
835
- /* initialize boxvector outside this function
836
- box1->num_frames=0
837
- num_frame_vectors[0]=0 ???
838
- and store start value
839
- */
840
- if (box1->num_frames > MaxNumFrames) return -2;
841
- /* index to next (x,y) */
842
- i2o=i2=( (box1->num_frames==0)?0:
843
- box1->num_frame_vectors[ box1->num_frames ] );
844
- #if 0 // obsolete v0.43
845
- box1->frame_vector[i2][0]=x;
846
- box1->frame_vector[i2][1]=y;
847
- i2++;
848
- box1->num_frame_vectors[ box1->num_frames ]=i2;
849
- #endif
850
- box1->num_frames++;
851
-
852
- /* robot stores start-position */
853
- ox = x; oy = y; /* look forward to white pixel */
854
-
855
- for (;;) { /* stop if same marked pixel touched */
856
-
857
- g_debug(fprintf(stderr,"\nLEV3: x= %3d %3d d= %d rot= %2d %3d",x,y,d,rot,i2);)
858
-
859
- /* ToDo: store max. abs(rot) ??? for better recognition */
860
- if (new_x) {
861
- g_debug(fprintf(stderr,"\nLEV2: markB xy= %3d %3d ", x, y);)
862
- p->p[x + y * p->x] |= (mark & 7); /* mark black pixel */
863
- }
864
-
865
- /* store a new vector or enlarge the predecessor */
866
- if (new_x && (rc%steps)==0) { /* dont store everything on big chars */
867
- if (i2>=MaxFrameVectors) {
868
- box1->num_frame_vectors[ box1->num_frames-1 ]=i2;
869
- reduce_vectors(box1,1); /* simplify loop */
870
- i2=box1->num_frame_vectors[ box1->num_frames-1 ];
871
- /* enlarge steps on big chars getting speedup */
872
- steps=(box1->y1-box1->y0+box1->x1-box1->x0)/32+1;
873
- }
874
- /* store frame-vector */
875
- if (i2<MaxFrameVectors) {
876
- box1->frame_vector[i2][0]=x;
877
- box1->frame_vector[i2][1]=y;
878
- /* test if older vector points to the same direction */
879
- if (i2>1) {
880
- /* get predecessor */
881
- nx=box1->frame_vector[i2-1][0]-box1->frame_vector[i2-2][0];
882
- ny=box1->frame_vector[i2-1][1]-box1->frame_vector[i2-2][1];
883
- mx=x -box1->frame_vector[i2-1][0];
884
- my=y -box1->frame_vector[i2-1][1];
885
- /* same direction? */
886
- if (nx*my-ny*mx==0 && nx*mx>=0 && ny*my>=0) {
887
- /* simplify by removing predecessor */
888
- i2--;
889
- box1->frame_vector[i2][0]=x;
890
- box1->frame_vector[i2][1]=y;
891
- } /* do not simplify */
892
- }
893
- i2++;
894
- box1->num_frame_vectors[ box1->num_frames-1 ]=i2;
895
- }
896
- g_debug(fprintf(stderr," stored @ %3d steps= %d", i2-1, steps);)
897
- }
898
- new_x=0; /* work for new pixel (x,y) done */
899
-
900
- /* check if round trip is finished */
901
- if (x==ox && y==oy && abs(rot)>=8) break;
902
-
903
- /* look to the front of robot (turtle or ant) */
904
- nx = x + d0[d][0];
905
- ny = y + d0[d][1];
906
-
907
- /* next step, if right is a wall turn the turtle left */
908
- if ( outbounds(p, nx, ny) || i1 != ((getpixel(p,nx,ny)<cs) ? 0 : 1) ) {
909
- if (y==ny && nx>=0 && nx<p->x) { /* if inbound */
910
- g_debug(fprintf(stderr,"\nLEV2: markW xy= %3d %3d ", nx, ny);)
911
- p->p[nx + ny * p->x] |= (mark & 7); /* mark white pixel */
912
- }
913
- /* rotate left 90 or 45 degrees */
914
- d=(d+2-diag) & 7; rot+=2-diag;
915
- /* calculate volume inside frame */
916
- switch (d+diag) {
917
- case 2+2: vol-=x-1; break;
918
- case 6+2: vol+=x; break;
919
- }
920
- }
921
- else { /* if no wall, go forward and turn right (90 or 45 degrees) */
922
- x=nx; y=ny;
923
- /* turn back and rotate left */
924
- d=(d+4+2-diag) & 7; rot+=2-diag-4;
925
- rc++; /* counting steps, used for speedup */
926
-
927
- /* enlarge frame */
928
- if (x < box1->x0) box1->x0 = x;
929
- if (x > box1->x1) box1->x1 = x;
930
- if (y < box1->y0) box1->y0 = y;
931
- if (y > box1->y1) box1->y1 = y;
932
-
933
- new_x=1;
934
- }
935
- }
936
-
937
- /* to distinguish inner and outer frames, store volume as +v or -v */
938
- box1->frame_vol[ box1->num_frames-1 ] = vol;
939
- box1->frame_per[ box1->num_frames-1 ] = rc-1;
940
-
941
- /* dont count and store the first vector twice */
942
- if (i2-i2o>1) {
943
- i2--; rc--; box1->num_frame_vectors[ box1->num_frames-1 ]=i2;
944
- }
945
- /* output break conditions */
946
- g_debug(fprintf(stderr,"\nLEV2 o= %3d %3d x= %3d %3d r=%d v=%d",ox,oy,x,y,rot,vol);)
947
- /* rc=1 for a single point, rc=2 for a two pixel sized point */
948
- g_debug(fprintf(stderr," steps= %3d vectors= %3d",rc,i2);)
949
- /* out_x(box1); ToDo: output only the first thousend */
950
- return rc; /* return number of bordering pixels = periphery? */
951
- }
952
-
953
-
954
-
955
- /* clear lowest 3 (marked) bits (they are used for marking) */
956
- void clr_bits(pix * p, int x0, int x1, int y0, int y1) {
957
- int x, y;
958
- for ( y=y0; y <= y1; y++)
959
- for ( x=x0; x <= x1; x++)
960
- p->p[x+y*p->x] &= ~7;
961
- }
962
-
963
- /* look for white holes surrounded by black points
964
- * at the moment look for white point with black in all four directions
965
- * - store position of hole in coordinates relativ to box!
966
- * ToDo: count only holes with vol>10% ???
967
- * ToDo: rewrite for frame vectors (faster, no malloc)
968
- * holes are frames rotating left hand
969
- * obsolete, do it with vectors
970
- */
971
- int num_hole(int x0, int x1, int y0, int y1, pix * p, int cs, holes_t *holes) {
972
- int num_holes = 0, x, y, hole_size;
973
- pix b; // temporary mini-page
974
- int dx = x1 - x0 + 1, dy = y1 - y0 + 1;
975
- unsigned char *buf; // 2nd copy of picture, for working
976
-
977
- if (holes) holes->num=0;
978
- if(dx<3 || dy<3) return 0;
979
- b.p = buf = (unsigned char *) malloc( dx * dy );
980
- if( !buf ){
981
- fprintf( stderr, "\nFATAL: malloc(%d) failed, skip num_hole", dx*dy );
982
- return 0;
983
- }
984
- if (copybox(p, x0, y0, dx, dy, &b, dx * dy))
985
- { free(b.p); return -1;}
986
-
987
- // printf(" num_hole(");
988
- /* --- mark white-points connected with border */
989
- for (x = 0; x < b.x; x++) {
990
- if (getpixel(&b, x, 0) >= cs)
991
- mark_nn(&b, x, 0, cs, AT);
992
- if (getpixel(&b, x, b.y - 1) >= cs)
993
- mark_nn(&b, x, b.y - 1, cs, AT);
994
- }
995
- for (y = 0; y < b.y; y++) {
996
- if (getpixel(&b, 0, y) >= cs)
997
- mark_nn(&b, 0, y, cs, AT);
998
- if (getpixel(&b, b.x - 1, y) >= cs)
999
- mark_nn(&b, b.x - 1, y, cs, AT);
1000
- }
1001
-
1002
- g_debug(out_b(NULL,&b,0,0,b.x,b.y,cs);)
1003
- // --- look for unmarked white points => hole
1004
- for (x = 0; x < b.x; x++)
1005
- for (y = 0; y < b.y; y++)
1006
- if (!((marked(&b, x, y)&AT)==AT)) // unmarked
1007
- if (getpixel(&b, x, y) >= cs) { // hole found
1008
- #if 0
1009
- hole_size=mark_nn(&b, x, y, cs, AT); /* old version */
1010
- if (hole_size > 1 || dx * dy <= 40)
1011
- num_holes++;
1012
- #else
1013
- { /* new version, for future store of hole characteristics */
1014
- int x0, x1, y0, y1, i, j;
1015
- x0 = x1 = x;
1016
- y0 = y1 = y; // not used
1017
- hole_size=frame_nn(&b, x, y, &x0, &x1, &y0, &y1, cs, AT, JOB->tmp.n_run & 1);
1018
- // store hole for future use, num is initialized with 0
1019
- if (hole_size > 1 || dx * dy <= 40){
1020
- num_holes++;
1021
- if (holes) {
1022
- // sort in table
1023
- for (i=0;i<holes->num && i<MAX_HOLES;i++)
1024
- if (holes->hole[i].size < hole_size) break;
1025
- for (j=MAX_HOLES-2;j>=i;j--)
1026
- holes->hole[j+1]=holes->hole[j];
1027
- if (i<MAX_HOLES) {
1028
- // printf(" i=%d size=%d\n",i,hole_size);
1029
- holes->hole[i].size=hole_size;
1030
- holes->hole[i].x=x;
1031
- holes->hole[i].y=y;
1032
- holes->hole[i].x0=x0;
1033
- holes->hole[i].y0=y0;
1034
- holes->hole[i].x1=x1;
1035
- holes->hole[i].y1=y1;
1036
- }
1037
- holes->num++;
1038
- }
1039
- }
1040
- }
1041
- #endif
1042
- }
1043
- free(b.p);
1044
- // printf(")=%d",num_holes);
1045
- return num_holes;
1046
- }
1047
-
1048
- /* count for black nonconnected objects --- used for i,auml,ouml,etc. */
1049
- /* ToDo: obsolete, replaced by vectors and box.num_boxes */
1050
- int num_obj(int x0, int x1, int y0, int y1, pix * p, int cs) {
1051
- int x, y, rc = 0; // rc=num_obj
1052
- unsigned char *buf; // 2nd copy of picture, for working
1053
- pix b;
1054
-
1055
- if(x1<x0 || y1<y0) return 0;
1056
- b.p = buf = (unsigned char *) malloc( (x1-x0+1) * (y1-y0+1) );
1057
- if( !buf ){
1058
- fprintf( stderr, "\nFATAL: malloc(%d) failed, skip num_obj",(x1-x0+1)*(y1-y0+1) );
1059
- return 0;
1060
- }
1061
- if (copybox(p, x0, y0, x1 - x0 + 1, y1 - y0 + 1, &b, (x1-x0+1) * (y1-y0+1)))
1062
- { free(b.p); return -1; }
1063
- // --- mark black-points connected with neighbours
1064
- for (x = 0; x < b.x; x++)
1065
- for (y = 0; y < b.y; y++)
1066
- if (getpixel(&b, x, y) < cs)
1067
- if (!((marked(&b, x, y)&AT)==AT)) {
1068
- rc++;
1069
- mark_nn(&b, x, y, cs, AT);
1070
- }
1071
- free(b.p);
1072
- return rc;
1073
- }
1074
-
1075
- #if 0
1076
- // ----------------------------------------------------------------------
1077
- // first idea for making recognition based on probability
1078
- // - start with a list of all possible chars
1079
- // - call recognition_of_char(box *)
1080
- // - remove chars from list which could clearly excluded
1081
- // - reduce probability of chars which have wrong features
1082
- // - font types list could also build
1083
- // at the moment it is only an idea, I should put it to the todo list
1084
- //
1085
- char *list="0123456789,.\0xe4\0xf6\0xfc" // "a=228 o=246 u=252
1086
- "abcdefghijklmnopqrstuvwxyz"
1087
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1088
- int wert[100];
1089
- int listlen=0,numrest=0;
1090
- // initialize a new character list (for future)
1091
- void ini_list(){ int i;
1092
- for(i=0;list[i]!=0 && i<100;i++) wert[i]=0;
1093
- numrest=listlen=i; }
1094
- // exclude??? (for future) oh it was long time ago, I wrote that :/
1095
- void exclude(char *filt){ int i,j;
1096
- for(j=0;filt[j]!=0 && j<100;j++)
1097
- for(i=0;list[i]!=0 && i<100;i++)
1098
- if( filt[j]==list[i] ) { if(!wert[i])numrest--; wert[i]++; } }
1099
- // get the result after all the work (for future)
1100
- char getresult(){ int i;
1101
- if( numrest==1 )
1102
- for(i=0;list[i]!=0 && i<100;i++) if(!wert[i]) return list[i];
1103
- return '_';
1104
- }
1105
- #endif
1106
-
1107
- // look at the environment of the pixel too (contrast etc.)
1108
- // detailed analysis only of diff pixels!
1109
- //
1110
- // 100% * "distance", 0 is ideal fit
1111
- // = similarity of two chars for recognition of garbled (verstuemmelter) chars
1112
- // weight of pixels with only one same neighbour set to 0
1113
- // look at contours too! v0.2.4: B==H
1114
- // changed for v0.41, Mar06
1115
- int distance( pix *p1, struct box *box1,
1116
- pix *p2, struct box *box2, int cs){
1117
- int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2;
1118
- x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
1119
- dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);
1120
- dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);
1121
- if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) return 100;
1122
- // compare relations to baseline and upper line
1123
- if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
1124
- if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
1125
- // compare pixels
1126
- for( y=0;y<dy;y++ )
1127
- for( x=0;x<dx;x++ ) { // try global shift too ???
1128
- v1 =((getpixel(p1,x1+x ,y1+y )<cs)?1:0); i1=8; // better gray?
1129
- v2 =((getpixel(p2,x2+x ,y2+y )<cs)?1:0); i2=8; // better gray?
1130
- if(v1==v2) { rgood+=8; continue; } // all things are right!
1131
- // what about different pixel???
1132
- // test overlap of 8 surounding pixels ??? bad if two nb. are bad
1133
- v1=-1;
1134
- for(i1=-1;i1<2;i1++)
1135
- for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
1136
- if( ((getpixel(p1,x1+x+i1*(1+dx/32),y1+y+i2*(1+dy/32))<cs)?1:0)
1137
- !=((getpixel(p2,x2+x+i1*(1+dx/32),y2+y+i2*(1+dy/32))<cs)?1:0) ) v1++;
1138
- }
1139
- if (v1>0) rbad+=16*v1;
1140
- else rbad++;
1141
- }
1142
- if(rgood+rbad) rc= (100*rbad+(rgood+rbad-1))/(rgood+rbad); else rc=99;
1143
- if(rc<10 && JOB->cfg.verbose & 7){
1144
- fprintf(stderr,"\n# distance rc=%d good=%d bad=%d",rc,rgood,rbad);
1145
- // out_x(box1);out_x(box2);
1146
- }
1147
- return rc;
1148
- }
1149
-
1150
-
1151
-
1152
- // ============================= call OCR engine ================== ;)
1153
- // nrun=0 from outside, nrun=1 from inside (allows modifications, oobsolete)
1154
- wchar_t whatletter(struct box *box1, int cs, int nrun){
1155
- wchar_t bc=UNKNOWN; // best letter
1156
- wchar_t um=SPACE; // umlaut? '" => modifier
1157
- pix *p=box1->p; // whole image
1158
- int x,y,dots,xa,ya,x0,x1,y0,y1,dx,dy,i;
1159
- pix b; // box
1160
- struct box bbuf=*box1; // restore after modifikation!
1161
-
1162
- if (box1->num_ac>0 && box1->wac[0]>=JOB->cfg.certainty && bc==UNKNOWN) {
1163
- bc=box1->tac[0];
1164
- }
1165
- // if (bc!=UNKNOWN) return bc;
1166
- // if whatletter() called again, only unknown chars are processed
1167
- // bad for splitting!
1168
-
1169
- // store box data, which can be modified for modified chars in 2nd run
1170
- bbuf.x0=box1->x0; bbuf.y0=box1->y0;
1171
- bbuf.x1=box1->x1; bbuf.y1=box1->y1;
1172
-
1173
- xa=box1->x; ya=box1->y;
1174
- x0=box1->x0; y0=box1->y0;
1175
- x1=box1->x1; y1=box1->y1;
1176
- // int vol=(y1-y0+1)*(x1-x0+1); // volume
1177
- // crossed l-m , divided chars
1178
- while( get_bw(x0,x1,y0,y0,p,cs,1)!=1 && y0+1<y1) y0++;
1179
- while( get_bw(x0,x1,y1,y1,p,cs,1)!=1 && y0+1<y1) y1--;
1180
- dx=x1-x0+1;
1181
- dy=y1-y0+1; // size
1182
-
1183
- // better to proof the white frame too!!! ????
1184
- // --- test for german umlaut and points above, not robust enough???
1185
- // if three chars are connected i-dots (ari) sometimes were not detected
1186
- // - therefore after division a test could be useful
1187
- // modify y0 only in second run!?
1188
- // we need it here to have the right copybox
1189
- if (um==SPACE && dy>5 && box1->num_boxes>1)
1190
- testumlaut(box1,cs,2,&um); /* set box1->modifier + new y0 */
1191
-
1192
- dots=box1->dots;
1193
- y0 =box1->y0; // dots==2 => y0 below double dots
1194
- dy =y1-y0+1;
1195
-
1196
- // move upper and lower border (for divided letters)
1197
- while( get_bw(x0,x1,y0,y0,p,cs,1)==0 && y0+1<y1) y0++;
1198
- while( get_bw(x0,x1,y1,y1,p,cs,1)==0 && y0+1<y1) y1--;
1199
- while( get_bw(x0,x0,y0,y1,p,cs,1)==0 && x0+1<x1) x0++;
1200
- while( get_bw(x1,x1,y0,y1,p,cs,1)==0 && x0+1<x1) x1--;
1201
- dx=x1-x0+1;
1202
- dy=y1-y0+1; // size
1203
- box1->x0=x0; box1->y0=y0; // set reduced frame
1204
- box1->x1=x1; box1->y1=y1;
1205
-
1206
- // set good startpoint (probably bad from division)?
1207
- if( xa<x0 || xa>x1 || ya<y0 || ya>y1
1208
- || getpixel(p,xa,ya)>=cs /* || 2*ya<y0+y1 */ || dots>0 ){
1209
- // subfunction? also called after division of two glued chars?
1210
- for(y=y1;y>=y0;y--) // low to high (not i-dot)
1211
- for(x=(x0+x1)/2,i=0;x>=x0 && x<=x1;i++,x+=((2*i&2)-1)*i) /* is that ok? */
1212
- if (getpixel(p,x,y)<cs && (getpixel(p,x+1,y)<cs
1213
- || getpixel(p,x,y+1)<cs)){ xa=x;ya=y;y=-1;break; }
1214
- /* should box1->x,y be set? */
1215
- }
1216
-
1217
- // ----- create char-only-box -------------------------------------
1218
- // ToDo: this will be obsolete if vectors are used only
1219
- if(dx<1 || dy<1) return bc; /* should not happen */
1220
- b.p = (unsigned char *) malloc( dx * dy );
1221
- if (!b.p) fprintf(stderr,"Warning: malloc failed L%d\n",__LINE__);
1222
- if( copybox(p,x0,y0,dx,dy,&b,dx*dy) )
1223
- { free(b.p); return bc; }
1224
- // clr_bits(&b,0,b.x-1,0,b.y-1);
1225
- // ------ use diagonal too (only 2nd run?)
1226
- /* following code failes on ! and ? obsolete if vectors are used
1227
- ToDo:
1228
- - mark pixels neighoured to pixels outside and remove them from &b
1229
- v0.40
1230
- will be replaced by list of edge vectors
1231
- - mark accents, dots and remove them from &b
1232
- */
1233
- #if 1 /* becomes obsolate by vector code */
1234
- if (y0>0) // mark upper overlap
1235
- for ( x=x0; x<=x1; x++) {
1236
- if (getpixel(p,x,y0-1)<cs
1237
- && getpixel(p,x,y0 )<cs && (marked(&b,x-x0,0)&1)!=1)
1238
- mark_nn(&b,x-x0,0,cs,1);
1239
- }
1240
- if (x0>0) // mark left overlap
1241
- for ( y=y0; y<=y1; y++) {
1242
- if (getpixel(p,x0-1,y)<cs
1243
- && getpixel(p,x0 ,y)<cs && (marked(&b,0,y-y0 )&1)!=1)
1244
- mark_nn(&b,0,y-y0,cs,1);
1245
- }
1246
- if (x1<p->x-1) // mark right overlap
1247
- for ( y=y0; y<=y1; y++) {
1248
- if (getpixel(p,x1+1,y)<cs
1249
- && getpixel(p,x1 ,y)<cs && (marked(&b,x1-x0,y-y0)&1)!=1)
1250
- mark_nn(&b,x1-x0,y-y0,cs,1);
1251
- }
1252
- mark_nn(&b,xa-x0,ya-y0,cs,2); // not glued chars
1253
- for(x=0;x<b.x;x++)
1254
- for(y=0;y<b.y;y++){
1255
- if ( (marked(&b,x,y )&3)==1 && getpixel(&b,x,y )<cs )
1256
- b.p[x+y*b.x] = 255&~7; /* reset pixel */
1257
- }
1258
- #endif
1259
-
1260
- // if (bc == UNKNOWN) // cause split to fail
1261
- bc=ocr0(box1,&b,cs);
1262
-
1263
- /* ToDo: try to change pixels near cs?? or melt? */
1264
- if (box1->num_ac>0 && box1->wac[0]>=JOB->cfg.certainty && bc==UNKNOWN) {
1265
- bc=box1->tac[0];
1266
- }
1267
-
1268
- if (um!=0 && um!=SPACE && bc<127) { /* ToDo: is that obsolete now? */
1269
- wchar_t newbc;
1270
- newbc = compose(bc, um );
1271
- if (newbc == bc) { /* nothing composed */
1272
- if(JOB->cfg.verbose & 7)
1273
- fprintf(stderr, "\nDBG whatletter: compose(%s) was useless (%d,%d)",
1274
- decode(bc,ASCII), box1->x0, box1->y0);
1275
- // if(JOB->cfg.verbose & 6) out_x(box1);
1276
- }
1277
- bc = newbc;
1278
- }
1279
- // restore modified boxes
1280
- box1->x0=bbuf.x0; box1->y0=bbuf.y0;
1281
- box1->x1=bbuf.x1; box1->y1=bbuf.y1;
1282
- // if (box1->c==UNKNOWN) out_b(box1,&b,0,0,dx,dy,cs); // test
1283
-
1284
- free(b.p);
1285
- return bc;
1286
- }
1287
-
1288
- /*
1289
- ** creates a list of boxes/frames around objects detected
1290
- ** on the pixmap p for further work
1291
- ** returns number of boxes created.
1292
- ** - by the way: get average X, Y (avX=sumX/numC,..)
1293
- */
1294
- int scan_boxes( pix *p ){
1295
- int x, y, nx, cs, rc, ds;
1296
- struct box *box3;
1297
-
1298
- if (JOB->cfg.verbose)
1299
- fprintf(stderr,"# scanning boxes");
1300
-
1301
- cs = JOB->cfg.cs;
1302
- JOB->res.sumX = JOB->res.sumY = JOB->res.numC = 0;
1303
-
1304
- /* clear the lowest bits of each pixel, later used as "scanned"-marker */
1305
- clr_bits( p, 0, p->x - 1, 0, p->y - 1);
1306
-
1307
- for (y=0; y < p->y; y++)
1308
- for (x=0; x < p->x; x++)
1309
- for (ds=2; ds<7; ds+=4) { // NO - dust of size 1 is not removed !!!
1310
- nx=x+((ds==2)?-1:+1);
1311
- if (nx<0 || nx>=p->x) continue; /* out of image, ex: recframe */
1312
- if ( getpixel(p, x,y)>=cs || getpixel(p,nx,y)< cs) // b/w transition?
1313
- continue;
1314
- if ((marked(p, x,y) & 1)&&(marked(p, nx, y) & 1))
1315
- continue;
1316
- /* check (and mark) only horizontal b/w transitions */
1317
- // --- insert new box in list
1318
- box3 = (struct box *)malloc_box(NULL);
1319
- box3->x0=box3->x1=box3->x=x;
1320
- box3->y0=box3->y1=box3->y=y;
1321
- box3->num_frames=0;
1322
- box3->dots=0;
1323
- box3->num_boxes=1;
1324
- box3->num_subboxes=0;
1325
- box3->modifier='\0';
1326
- box3->num=JOB->res.numC;
1327
- box3->line=0; // not used here
1328
- box3->m1=0; box3->m2=0; box3->m3=0; box3->m4=0;
1329
- box3->p=p;
1330
- box3->num_ac=0; // for future use
1331
-
1332
- /* frame, vectorize and mark only odd/even horizontal b/w transitions
1333
- * args: box, x,y, cs, mark, diag={0,1}, ds={2,6}
1334
- * ds - start direction, 6=right of right border, 2=left of left border
1335
- * ret - 0=ok, -1=already marked, -2=max_num_frames_exceeded
1336
- * -7=no border in direction ds
1337
- * ToDo: count errors and print out for debugging
1338
- */
1339
- rc=frame_vector(box3, x, y, cs, 1, 1, ds);
1340
- g_debug(fprintf(stderr,"\n# ... scan xy= %3d %3d rc= %2d", x, y, rc);)
1341
- if (rc<0) { free_box(box3); continue; }
1342
- if (box3->num_frames && !box3->num_frame_vectors[0])
1343
- fprintf(stderr,"\nERROR scan_boxes: no vector in frame (%d,%d)",x,y);
1344
-
1345
- JOB->res.numC++;
1346
- JOB->res.sumX += box3->x1 - box3->x0 + 1;
1347
- JOB->res.sumY += box3->y1 - box3->y0 + 1;
1348
-
1349
- box3->c=(((box3->y1-box3->y0+1)
1350
- *(box3->x1-box3->x0+1)>=MaxBox)? PICTURE : UNKNOWN);
1351
- list_app(&(JOB->res.boxlist), box3); // append to list
1352
- // ToDo: debug
1353
- // if (JOB->cfg.verbose && box3->y0==29) out_x(box3);
1354
- }
1355
- if(JOB->res.numC){
1356
- if (JOB->cfg.verbose)
1357
- fprintf(stderr," nC= %3d avD= %2d %2d\n",JOB->res.numC,
1358
- (JOB->res.sumX+JOB->res.numC/2)/JOB->res.numC,
1359
- (JOB->res.sumY+JOB->res.numC/2)/JOB->res.numC);
1360
- }
1361
- return JOB->res.numC;
1362
- }
1363
-
1364
- /* compare ints for sorting. Return -1, 0, or 1 according to
1365
- whether *vr < *vs, vr == *vs, or *vr > *vs */
1366
- int
1367
- intcompare (const void *vr, const void *vs)
1368
- {
1369
- int *r=(int *)vr;
1370
- int *s=(int *)vs;
1371
-
1372
- if (*r < *s) return -1;
1373
- if (*r > *s) return 1;
1374
- return 0;
1375
- }
1376
-
1377
- /*
1378
- * measure_pitch - detect monospaced font and measure the pitch
1379
- * measure overall pitch for difficult lines,
1380
- * after that measure pitch per line
1381
- * dists arrays are limited to 1024 elements to reduce
1382
- * cpu usage for qsort on images with extreme high number of objects
1383
- * insert space if dist>=pitch in list_insert_spaces()
1384
- * ToDo: ???
1385
- * - min/max distance-matrix a-a,a-b,a-c,a-d ... etc; td,rd > ie,el,es
1386
- * - OR measuring distance as min. pixel distance instead of box distance
1387
- * especially useful for italic font!
1388
- */
1389
- void measure_pitch( job_t *job ){
1390
- int numdists=0, spc=0, /* number of stored distances */
1391
- pitch_p=2, pdist, pdists[1024], /* proportional distances */
1392
- pitch_m=6, mdist, mdists[1024], /* monospaced distances */
1393
- monospaced=0, l1;
1394
- struct box *box2, *prev=NULL;
1395
-
1396
- if(job->cfg.verbose){ fprintf(stderr,"# check for word pitch"); }
1397
- for (l1=0; l1<job->res.lines.num; l1++)
1398
- { /* 0 means all lines */
1399
- if(job->cfg.verbose){ fprintf(stderr,"\n# line %2d",l1); }
1400
- numdists = 0; /* clear distance lists */
1401
- for_each_data(&(job->res.boxlist)) {
1402
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
1403
- if (l1>0 && box2->line!=l1) continue; /* ignore other lines */
1404
- /* ignore dots and pictures (min. font is 4x6) */
1405
- if (box2->y1 - box2->y0 + 1 < 4 || box2->c==PICTURE) { prev=NULL; }
1406
- if (!prev) { prev=box2; continue; } /* we need a predecessor */
1407
- /* use center distance for monospaced fonts */
1408
- mdist = ((box2->x0 + box2->x1) - (prev->x0 + prev->x1) + 1)/2;
1409
- /* use gap for proportional fonts */
1410
- pdist = box2->x0 - prev->x1 + 1;
1411
- /* ToDo: better take 3 instead of 2 neighbours?, smallest font 4x6 */
1412
- /* fonts are expected to be 6 to 60 pixels high, which is about
1413
- 4 to 50 pixels wide. We allow some extra margin. */
1414
- if (3 < mdist && mdist < 150) { /* better mdist < 3*Xaverage ? */
1415
- /* two options for overflow: 1) ignore, 2) store randomly */
1416
- if (numdists<1024) { /* we do ignore here */
1417
- mdists[numdists] = mdist;
1418
- pdists[numdists] = pdist;
1419
- numdists++;
1420
- }
1421
- }
1422
- prev = box2;
1423
- } end_for_each(&(job->res.boxlist));
1424
-
1425
- if(job->cfg.verbose){ fprintf(stderr," num_gaps= %2d",numdists); }
1426
- if( numdists<8 ){
1427
- if (job->cfg.verbose && l1==0) /* only for all lines */
1428
- fprintf(stderr," (WARNING num_gaps<8)");
1429
- }
1430
- if (numdists>0) {
1431
- int i,diff,ni_min,max,best_m,best_p,ni; double v;
1432
- /* aware: takes long time for big data sets */
1433
- /* dilute? (german: ausduennen?) */
1434
- qsort (mdists, numdists, sizeof (int), intcompare);
1435
- qsort (pdists, numdists, sizeof (int), intcompare);
1436
- /* the new method, div0? */
1437
- v = (mdists[numdists*7/10]-mdists[numdists/5])
1438
- /(double)mdists[numdists/5];
1439
- /* measurements showed v=.09 for Courier and .44 for Times-Roman */
1440
- if (l1==0) monospaced = (v < .22);
1441
- best_m= numdists/5;
1442
- best_p=4*numdists/5;
1443
- /* try to find better pitch for monospaced font (ok for prop) */
1444
- for (i=numdists/5+1;i<numdists;i++) {
1445
- if (2*mdists[i]>=3*mdists[best_m]) { best_m=i-1; break; }
1446
- }
1447
- /* try to find better pitch for proportional font */
1448
- // the largest diff could be the best, if diff is always 1,
1449
- // take the diff with the lowest weight
1450
- for (ni=ni_min=1024,max=0,i=numdists/2+1;i<numdists-numdists/16;i++) {
1451
- diff=pdists[i]-pdists[i-1];
1452
- if (diff>max) {
1453
- max=diff; best_p=i-1;
1454
- if ((job->cfg.verbose&(32+16))==48)
1455
- fprintf(stderr," best_p=%d maxdiff=%d\n# ...", pdists[best_p], max);
1456
- if (max>3 && 3*pdists[i]>=4*pdists[i-1]) { break; }
1457
- }
1458
- if (diff) {
1459
- if (ni<ni_min) {
1460
- // do not try to divide one word per line
1461
- ni_min=ni; if (max<=1 && numdists>16) best_p=i-1;
1462
- if ((job->cfg.verbose&(32+16))==48)
1463
- fprintf(stderr," best_p=%d ni_min=%d\n# ...", pdists[best_p], ni_min);
1464
- }
1465
- ni=1;
1466
- } else ni++;
1467
- }
1468
- if (numdists<16 && max<=1 && ni_min>1) best_p=numdists-1; // one word
1469
- #if 1 /* debugging */
1470
- if ((job->cfg.verbose&(32+16))==48) {
1471
- fprintf(stderr,"\n# ...");
1472
- for (i=0;i<numdists;i++) fprintf(stderr," %2d",mdists[i]);
1473
- fprintf(stderr," <- mdist[%d]\n# ...",l1);
1474
- for (i=0;i<numdists;i++) fprintf(stderr," %2d",pdists[i]);
1475
- fprintf(stderr," <- pdist[%d]\n# ...",l1);
1476
- fprintf(stderr," maxdiff=%d min_samediffs=%d\n# ...",max,ni_min);
1477
- }
1478
- #endif
1479
- /* we measure spaces in two different ways (mono, prop) */
1480
- /* prop: gap between boxes, mono: distance of middle */
1481
- if (best_p<numdists-1) pitch_p = ((pdists[best_p]+pdists[best_p+1])/2+1);
1482
- else pitch_p = (pdists[best_p]+1 );
1483
- pitch_m = (mdists[best_m]*4/3);
1484
- if (numdists)
1485
- if ( pdists[numdists-1]*2 <= pdists[0]*3
1486
- || pdists[numdists-1] <= pdists[0]+3) {
1487
- /* line is just a single word */
1488
- pitch_p = pdists[numdists-1]+10;
1489
- }
1490
- if (l1>0 && job->cfg.spc==0) {
1491
- job->res.lines.pitch[l1]=(monospaced?pitch_m:pitch_p);
1492
- job->res.lines.mono[l1]=monospaced;
1493
- }
1494
- if (job->cfg.verbose) {
1495
- fprintf(stderr,"\n# ..."
1496
- " mono: v=%f (v<0.22) line=%d numdists=%d\n# ...",
1497
- v, l1, numdists);
1498
- fprintf(stderr," mono: min=%3d max=%3d pitch=%3d @ %2d%%\n# ...",
1499
- mdists[0],mdists[numdists-1],pitch_m,best_m*100/numdists);
1500
- fprintf(stderr," prop: min=%3d max=%3d pitch=%3d @ %2d%%\n# ...",
1501
- pdists[0],pdists[numdists-1],pitch_p,best_p*100/numdists);
1502
- fprintf(stderr," result: distance >= %d considered space\n# ...",
1503
- job->res.lines.pitch[l1]);
1504
- }
1505
- } /* if (not) enough spaces */
1506
- if (l1==0) { /* set default spaces to each line */
1507
- int l2;
1508
- spc = job->cfg.spc;
1509
- if (spc==0) /* set only if not set by option */
1510
- spc = ((monospaced)?pitch_m:pitch_p);
1511
- for (l2=0; l2<job->res.lines.num; l2++ )
1512
- job->res.lines.pitch[l2]=spc;
1513
- }
1514
- } /* each line */
1515
- if (job->cfg.spc==0)
1516
- job->cfg.spc = spc;
1517
- if (job->cfg.verbose)
1518
- fprintf(stderr," overall space width is %d %s\n",
1519
- spc, ((monospaced)?"monospaced":"proportional"));
1520
-
1521
-
1522
- }
1523
-
1524
- /* ---- count subboxes (white holes within black area) --------
1525
- * new: count boxes lying inside another box (usually holes, ex: "aeobdg")
1526
- * needed for glue_boxes, dont glue textboxes, tables and other complex
1527
- * objects
1528
- * ToDo: count only frames of invers spin? do we need sorted list here? -> no
1529
- */
1530
- int count_subboxes( pix *pp ){
1531
- int ii=0, num_mini=0, num_same=0, cnt=0;
1532
- struct box *box2,*box4;
1533
- progress_counter_t *pc = NULL;
1534
- if (JOB->cfg.verbose) { fprintf(stderr,"# count subboxes\n# ..."); }
1535
-
1536
- pc = open_progress(JOB->res.boxlist.n,"count_subboxes");
1537
- for_each_data(&(JOB->res.boxlist)) {
1538
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
1539
- box2->num_subboxes=0;
1540
- progress(cnt++,pc);
1541
- if ( (box2->x1 - box2->x0)<2
1542
- || (box2->y1 - box2->y0)<2) continue; /* speedup for dotted bg */
1543
- // holes inside box2 char, aoebdqg, 0.41
1544
- for_each_data(&(JOB->res.boxlist)) {
1545
- box4=(struct box *)list_get_current(&(JOB->res.boxlist));
1546
- if (box4->y0 > box2->y1) break; // faster, but boxes need to be sorted
1547
- // ToDo: better use binary tree (above/below x) to find near boxes?
1548
- if (box4==box2) continue;
1549
- if( box4->x0==box2->x0 && box4->x1==box2->x1
1550
- && box4->y0==box2->y0 && box4->y1==box2->y1)
1551
- num_same++; /* erroneous!? */
1552
- if ( box4->x0 >= box2->x0 && box4->x1 <= box2->x1
1553
- && box4->y0 >= box2->y0 && box4->y1 <= box2->y1
1554
- && box4->num_subboxes==0 ) /* box4 inside box2? */
1555
- {
1556
- box2->num_subboxes++; ii++;
1557
- if ((box4->x1 - box4->x0 + 1)
1558
- *(box4->y1 - box4->y0 + 1)<17) num_mini++;
1559
- }
1560
- } end_for_each(&(JOB->res.boxlist));
1561
- #if 0
1562
- if (cnt < 1000 && JOB->cfg.verbose)
1563
- fprintf(stderr," %4d box %4d %4d %+3d %+3d subboxes %4d\n# ...",
1564
- cnt, box2->x0, box2->y0, box2->x1-box2->x0,
1565
- box2->y1-box2->y0, box2->num_subboxes);
1566
- #endif
1567
- } end_for_each(&(JOB->res.boxlist));
1568
- close_progress(pc);
1569
- if (JOB->cfg.verbose)
1570
- fprintf(stderr," %3d subboxes counted (mini=%d, same=%d) nC= %d\n",
1571
- ii, num_mini, num_same/2 /* counted twice */, cnt);
1572
- return 0;
1573
- }
1574
-
1575
- /* ---- glue holes tochars( before step1 ) v0.42 -----------------------
1576
- glue boxes lying inside another box (usually holes, ex: "aeobdg46890")
1577
- Dont add dust to a char!
1578
- lines are not detected yet
1579
- */
1580
- int glue_holes_inside_chars( pix *pp ){
1581
- int ii, cs, x0, y0, x1, y1, cnt=0,
1582
- glued_same=0, glued_holes=0;
1583
- struct box *box2, *box4;
1584
- progress_counter_t *pc = NULL;
1585
- cs=JOB->cfg.cs;
1586
- {
1587
- count_subboxes( pp ); /* move to pgm2asc() later */
1588
-
1589
- pc = open_progress(JOB->res.boxlist.n,"glue_holes_inside_chars");
1590
- if (JOB->cfg.verbose)
1591
- fprintf(stderr,"# glue holes to chars nC= %d\n# ...",JOB->res.numC);
1592
- ii=0;
1593
- for_each_data(&(JOB->res.boxlist)) {
1594
- // get the smaller box which may be extended by bigger boxes around it
1595
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
1596
- x0 = box2->x0; x1 = box2->x1;
1597
- y0 = box2->y0; y1 = box2->y1;
1598
-
1599
- progress(cnt++,pc);
1600
-
1601
- // would it better than moving vectors to build a sub-box-tree?
1602
-
1603
- // do not remove chars inside pictures (car plates on photos)
1604
- if( box2->c == PICTURE || box2->num_subboxes > 7) continue;
1605
-
1606
- // holes inside char, aoebdqg, 0.41
1607
- // dont merge boxes which have subboxes by itself!
1608
- // search boxes inside box2
1609
- // if (x1-x0+1>2 || y1-y0+1>2) /* skip tiny boxes, bad for 4x6 */
1610
- for_each_data(&(JOB->res.boxlist)) {
1611
- box4=(struct box *)list_get_current(&(JOB->res.boxlist));
1612
- if(box4!=box2 && box4->c != PICTURE )
1613
- {
1614
- // ToDo: dont glue, if size differs by big factors (>16?)
1615
- if ( ( box4->x0==x0 && box4->x1==x1
1616
- && box4->y0==y0 && box4->y1==y1 ) /* do not happen !? */
1617
- || ( box4->x0>=x0 && box4->x1<=x1
1618
- && box4->y0>=y0 && box4->y1<=y1
1619
- && box4->num_subboxes==0 ) ) /* no or very small subboxes? */
1620
- { // fkt melt(box2,box4)
1621
- // same box, if very small but hollow char (4x5 o)
1622
- if( box4->x0==x0 && box4->x1==x1
1623
- && box4->y0==y0 && box4->y1==y1) glued_same++; else glued_holes++;
1624
- // fprintf(stderr,"\n# DEBUG merge:");
1625
- // out_x(box2); // small
1626
- // out_x(box4); // big
1627
- if ((JOB->cfg.verbose & 7)==7) // LEV3
1628
- fprintf(stderr," glue hole (%4d %4d %+3d %+3d %+4d)"
1629
- " (%4d %4d %+3d %+3d %+4d) %d\n# ...",
1630
- x0, y0, x1-x0+1, y1-y0+1, box2->frame_vol[0],
1631
- box4->x0, box4->y0,
1632
- box4->x1-box4->x0+1, box4->y1-box4->y0+1,
1633
- box4->frame_vol[0], glued_same);
1634
- if ((box4->x1-box4->x0+1)< 8*(x1-x0+1)
1635
- || (box4->y1-box4->y0+1)<12*(y1-y0+1)) // skip dust
1636
- merge_boxes( box2, box4 ); // add box4 to box2
1637
- // out_x(box2);
1638
- x0 = box2->x0; x1 = box2->x1;
1639
- y0 = box2->y0; y1 = box2->y1;
1640
- JOB->res.numC--; // dont count fragments as chars
1641
- ii++; // count removed
1642
- list_del(&(JOB->res.boxlist), box4); // remove box4
1643
- free_box(box4);
1644
- // now search another hole inside box2
1645
- }
1646
- }
1647
- } end_for_each(&(JOB->res.boxlist));
1648
-
1649
- } end_for_each(&(JOB->res.boxlist));
1650
-
1651
- if (JOB->cfg.verbose)
1652
- fprintf(stderr," glued: %3d holes, %3d same, nC= %d\n",
1653
- glued_holes, glued_same, JOB->res.numC);
1654
- close_progress(pc);
1655
- }
1656
- return 0;
1657
- }
1658
-
1659
-
1660
- /* ---- glue broken chars ( before step1 ??? ) -----------------------
1661
- use this carefully, do not destroy previous detection ~fi, broken K=k' g
1662
- glue if boxes are near or diagonally connected
1663
- other strategy: mark boxes for deleting and delete in extra loop at end
1664
- faster: check only next two following boxes because list is sorted!
1665
- ToDo: store m4 of upper line to m4_of_prev_line, and check that "-points are below
1666
- done: glue boxes lying inside another box (usually holes, ex: "aeobdg")
1667
- Dont add dust to a char!
1668
- lines should be detected already (Test it for m1-m4 unknown)
1669
- ToDo: divide in glue_idots, glue_thin_chars etc. and optimize it
1670
- */
1671
- int glue_broken_chars( pix *pp ){
1672
- int ii, y, cs, x0, y0, x1, y1, cnt=0,
1673
- num_frags=0, glued_frags=0, glued_hor=0;
1674
- struct box *box2, *box4;
1675
- progress_counter_t *pc = NULL;
1676
- cs=JOB->cfg.cs;
1677
- {
1678
- count_subboxes( pp ); /* move to pgm2asc() later */
1679
-
1680
- pc = open_progress(JOB->res.boxlist.n,"glue_broken_chars");
1681
- if (JOB->cfg.verbose)
1682
- fprintf(stderr,"# glue broken chars nC= %d\n# ...",JOB->res.numC);
1683
- ii=0;
1684
- for_each_data(&(JOB->res.boxlist)) {
1685
- // get the box which may be extended by boxes around it
1686
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
1687
- x0 = box2->x0; x1 = box2->x1;
1688
- y0 = box2->y0; y1 = box2->y1;
1689
-
1690
- progress(cnt++,pc);
1691
-
1692
- // vertical broken (g965T umlauts etc.)
1693
- // not: f,
1694
-
1695
- // would it better than moving vectors to build a sub-box-tree?
1696
-
1697
- // do not remove chars inside pictures (car plates on photos)
1698
- if( box2->c == PICTURE || box2->num_subboxes > 7) continue;
1699
-
1700
- /* continue loop if box is below or above line */
1701
- if( box2->m4>0 && y0>box2->m4 ) continue; /* dust outside ? */
1702
- if( box2->m1>0 && y0<box2->m1-(box2->m3-box2->m2) ) continue;
1703
- /* ToDo:
1704
- * - check that y0 is greater as m3 of the char/line above
1705
- */
1706
-
1707
- // check small boxes (box2) whether they belong
1708
- // to near same size or bigger boxes (box4)
1709
- if( 2*(y1-y0) < box2->m4 - box2->m1 // care for dots etc.
1710
- && ( 2*y1<=(box2->m3+box2->m2) // upper fragments
1711
- || 2*y0>=(box2->m3+box2->m2)) ) { // lower fragments
1712
- struct box *box5=NULL, *box6=NULL; // nearest and next nearest box
1713
- box4=NULL;
1714
- num_frags++; /* count for debugging */
1715
- // get the [2nd] next x-nearest box in the same line
1716
- for_each_data(&(JOB->res.boxlist)) {
1717
- box4=(struct box *)list_get_current(&(JOB->res.boxlist));
1718
- if (box4 == box2 || box4->c == PICTURE) continue;
1719
- /* 0.42 speed up for backround pixel pattern, box4 to small */
1720
- if ( box4->x1 - box4->x0 + 1 < x1-x0+1
1721
- && box4->y1 - box4->y0 + 1 < y1-y0+1 ) continue;
1722
- // have in mind that line number may be wrong for dust
1723
- if (box4->line>=0 && box2->line>=0 && box4->line==box2->line)
1724
- {
1725
- if (!box5) box5=box4;
1726
- if ( abs(box4->x0 + box4->x1 - 2*box2->x0)
1727
- <abs(box5->x0 + box5->x1 - 2*box2->x0))
1728
- { box6=box5; box5=box4; }
1729
- }
1730
- } end_for_each(&(JOB->res.boxlist));
1731
- box4=box5; // next nearest box within the same line
1732
- if (box4) {
1733
- #if 0 /* set this to 1 for debugging of melting bugs */
1734
- if (JOB->cfg.verbose & 7) {
1735
- fprintf(stderr,"\n# next two boxes are candidates for melting ");
1736
- out_x(box2);
1737
- out_x(box4); }
1738
- #endif
1739
- if( /* umlaut "a "o "u, ij; box2 is the small dot, box4 the body */
1740
- ( y1 <= box2->m2
1741
- && box4->y1 >= box2->m2 // dont melt dots together
1742
- && 2* y1 < box4->y1 + box4->y0 // box2 above box4
1743
- && box4->x1+JOB->res.avX/2>=x0
1744
- && box4->x0-JOB->res.avX/2<=x1
1745
- && (y1 < box4->y0 || x0 < box4->x1) // dont melt "d'"
1746
- && 3* ( y1 - box4->y0)
1747
- <= 2* (box4->y1 - box4->y0) // too far away? dust!
1748
- && 8* ( x1 - x0 + 1)
1749
- >= (box4->x1 - box4->x0 + 1) // dot must have minimum size
1750
- && 10* ( y1 - y0 + 1)
1751
- >= (box4->y1 - box4->y0 + 1) // dot must have minimum size
1752
- ) || ( 0 && /* broken T */
1753
- 3*(box2->x1 - box2->x0) > 2*JOB->res.avX
1754
- && 4*box4->x0>3*box2->x0+box2->x1
1755
- && 4*box4->x1<box2->x0+3*box2->x1
1756
- )
1757
- || /* !?; box2 is the dot, box4 the body */
1758
- ( 2*box4->x1>=x0+x1 /* test if box4 is around box2 */
1759
- && 2*box4->x0<=2*x1 /* +x0+1 Jan00 */
1760
- && ( x1-x0 <= box4->x1-box4->x0+2 )
1761
- && 2*y0>=box2->m2+box2->m3
1762
- && 4*y1>=box2->m2+3*box2->m3
1763
- && 4*(y1-y0)<box2->m4-box2->m1
1764
- && (8*box4->y1 < box4->m2+7*box4->m3
1765
- || box4->m4-box4->m1<16) /* Jan00 */
1766
- )
1767
- || /* =;: box2 is the upper box, box4 the lower box */
1768
- ( 2*box4->x1>=x0+x1 /* test if box4 is around box2 */
1769
- && 2*box4->x0<=2*x1 /* +x0+1 */
1770
- && ( x1-x0 <= box4->x1-box4->x0+4 )
1771
- && ( 4*x0 <= 3*box4->x1+box4->x0 )
1772
- && (( box2->m2 && box4->m2
1773
- && y1< box2->m3
1774
- && 2*box4->y1 > box4->m3+box4->m2 // can be bigger than m3
1775
- && 4*box4->y0 >= 3*box4->m2+box4->m3
1776
- && 2*box2->y0 < box2->m3+box2->m2
1777
- )
1778
- || ( (!box2->m2) || (!box4->m2) )
1779
- )
1780
- )
1781
- )
1782
- { // fkt melt(box2,box4)
1783
- if (JOB->cfg.verbose & 7)
1784
- fprintf(stderr," glue objects (%3d %3d %+3d %+3d)"
1785
- " (%3d %3d %+3d %+3d)\n# ...",
1786
- x0, y0, x1-x0+1, y1-y0+1, box4->x0, box4->y0,
1787
- box4->x1-box4->x0+1, box4->y1-box4->y0+1);
1788
- // fprintf(stderr,"\n# DEBUG merge:"); // d=7x34 @ (109,51) ???
1789
- // out_x(box2);
1790
- // out_x(box4);
1791
- merge_boxes( box2, box4 ); // add box4 to box2
1792
- x0 = box2->x0; x1 = box2->x1;
1793
- y0 = box2->y0; y1 = box2->y1;
1794
- // if (JOB->cfg.verbose & 4) out_x(box2);
1795
- // JOB->res.numC--; // dont count fragments as chars
1796
- ii++; glued_frags++; // remove
1797
- // output_list(JOB);
1798
- list_del(&(JOB->res.boxlist), box4); /* ret&1: error-message ??? */
1799
- // output_list(JOB);
1800
- free_box(box4);
1801
- }
1802
- }
1803
- }
1804
- // continue;
1805
-
1806
- // horizontally broken w' K'
1807
- if( 2*y1 < (box2->m3+box2->m2) )
1808
- if( 2*(y1-y0) < (box2->m3+box2->m2) ) // fragment
1809
- for_each_data(&(JOB->res.boxlist)) {
1810
- box4=(struct box *)list_get_current(&(JOB->res.boxlist));
1811
- if(box4!=box2 && box4->c != PICTURE )
1812
- {
1813
- if( box4->line>=0 && box4->line==box2->line
1814
- && box4->x1>=x0-1 && box4->x1<x0 // do not glue 6-
1815
- && box4->x0+3*box4->x1<4*x0)
1816
- if( get_bw(x0 ,x0 ,y1,y1 ,pp,cs,1) == 1)
1817
- if( get_bw(x0-2,x0-1,y1,y1+2,pp,cs,1) == 1)
1818
- { // fkt melt(box2,box4)
1819
- put(pp,x0,y1+1,~(128+64),0);
1820
- merge_boxes( box2, box4 );
1821
- x0 = box2->x0; x1 = box2->x1;
1822
- y0 = box2->y0; y1 = box2->y1;
1823
- JOB->res.numC--; ii++; // remove
1824
- glued_hor++;
1825
- list_del(&(JOB->res.boxlist), box4);
1826
- free_box(box4);
1827
- }
1828
- }
1829
- } end_for_each(&(JOB->res.boxlist));
1830
-
1831
- // horizontally broken n h (h=l_) v0.2.5 Jun00
1832
- if( abs(box2->m2-y0)<=(y1-y0)/8 )
1833
- if( abs(box2->m3-y1)<=(y1-y0)/8 )
1834
- if( num_cross(x0, x1,(y0+ y1)/2,(y0+ y1)/2,pp,cs) == 1)
1835
- if( num_cross(x0, x1,(y0+3*y1)/4,(y0+3*y1)/4,pp,cs) == 1)
1836
- if( get_bw((3*x0+x1)/4,(3*x0+x1)/4,(3*y0+y1)/4,y1,pp,cs,1) == 0)
1837
- if( get_bw(x0,(3*x0+x1)/4,(3*y0+y1)/4,(y0+3*y1)/4,pp,cs,1) == 0)
1838
- if( get_bw(x0, x0, y0,(3*y0+y1)/4,pp,cs,1) == 1)
1839
- for_each_data(&(JOB->res.boxlist)) {
1840
- box4=(struct box *)list_get_current(&(JOB->res.boxlist));
1841
- if(box4!=box2 && box4->c != PICTURE )
1842
- {
1843
- if( box4->line>=0 && box4->line==box2->line
1844
- && box4->x1>x0-3 && box4->x1-2<x0
1845
- && abs(box4->y1-box2->m3)<2)
1846
- { // fkt melt(box2,box4)
1847
- y=loop(pp,x0,y0,y1-y0,cs,0,DO);if(2*y>y1-y0) continue;
1848
- put(pp,x0-1,y0+y ,~(128+64),0);
1849
- put(pp,x0-1,y0+y+1,~(128+64),0);
1850
- merge_boxes( box2, box4 ); // add box4 to box2
1851
- x0 = box2->x0; x1 = box2->x1;
1852
- y0 = box2->y0; y1 = box2->y1;
1853
- JOB->res.numC--; ii++; // remove
1854
- glued_hor++;
1855
- list_del(&(JOB->res.boxlist), box4);
1856
- free_box(box4);
1857
- }
1858
- }
1859
- } end_for_each(&(JOB->res.boxlist));
1860
- } end_for_each(&(JOB->res.boxlist));
1861
- if (JOB->cfg.verbose)
1862
- fprintf(stderr," glued: %3d fragments (found %3d), %3d rest, nC= %d\n",
1863
- glued_frags, num_frags, glued_hor, JOB->res.numC);
1864
- close_progress(pc);
1865
- }
1866
- return 0;
1867
- }
1868
-
1869
- /*
1870
- ** this is a simple way to improve results on noisy images:
1871
- ** - find similar chars (build cluster of same chars)
1872
- ** - analyze clusters (could be used for generating unknown font-base)
1873
- ** - the quality of the result depends mainly on the distance function
1874
- */
1875
- // ---- analyse boxes, compare chars, compress picture ------------
1876
- // ToDo: - error-correction only on large chars!
1877
- int find_same_chars( pix *pp){
1878
- int i,k,d,cs,dist,n1,dx; struct box *box2,*box3,*box4,*box5;
1879
- pix p=(*pp);
1880
- cs=JOB->cfg.cs;
1881
- {
1882
- if(JOB->cfg.verbose)fprintf(stderr,"# packing");
1883
- i = list_total(&(JOB->res.boxlist));
1884
- for_each_data(&(JOB->res.boxlist)) {
1885
- box4 = box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
1886
- dist=1000; // 100% maximum
1887
- dx = box2->x1 - box2->x0 + 1;
1888
-
1889
- if(JOB->cfg.verbose)fprintf(stderr,"\r# packing %5d",i);
1890
- if( dx>3 )
1891
- for(box3=(struct box *)list_next(&(JOB->res.boxlist),box2);box3;
1892
- box3=(struct box *)list_next(&(JOB->res.boxlist),box3)) {
1893
- if(box2->num!=box3->num){
1894
- int d=distance(&p,box2,&p,box3,cs);
1895
- if ( d<dist ) { dist=d; box4=box3; } // best fit
1896
- if ( d<5 ){ // good limit = 5% ???
1897
- i--;n1=box3->num; // set all num==box2.num to box2.num
1898
- for_each_data(&(JOB->res.boxlist)) {
1899
- box5=(struct box *)(struct box *)list_get_current(&(JOB->res.boxlist));
1900
- if(box5!=box2)
1901
- if( box5->num==n1 ) box5->num=box2->num;
1902
- } end_for_each(&(JOB->res.boxlist));
1903
- // out_x2(box2,box5);
1904
- // fprintf(stderr," dist=%d\n",d);
1905
- }
1906
- }
1907
- }
1908
- // nearest dist to box2 has box4
1909
- // out_b2(box2,box4);
1910
- // fprintf(stderr," dist=%d\n",dist);
1911
- } end_for_each(&(JOB->res.boxlist));
1912
- k=0;
1913
- if(JOB->cfg.verbose)fprintf(stderr," %d different chars",i);
1914
- for_each_data(&(JOB->res.boxlist)) {
1915
- struct box *box3,*box4;
1916
- int j,dist;
1917
- box2=(struct box *)list_get_current(&(JOB->res.boxlist));
1918
- for(box3=(struct box *)list_get_header(&(JOB->res.boxlist));
1919
- box3!=box2 && box3!=NULL;
1920
- box3=(struct box *)list_next(&(JOB->res.boxlist), box3))
1921
- if(box3->num==box2->num)break;
1922
- if(box3!=box2 && box3!=NULL)continue;
1923
- i++;
1924
- // count number of same chars
1925
- dist=0;box4=box2;
1926
-
1927
- for(box3=box2,j=0;box3;
1928
- box3=(struct box *)list_next(&(JOB->res.boxlist), box3)) {
1929
- if(box3->num==box2->num){
1930
- j++;
1931
- d=distance(&p,box2,&p,box3,cs);
1932
- if ( d>dist ) { dist=d; box4=box3; } // worst fit
1933
- }
1934
- }
1935
- if(JOB->cfg.verbose&8){
1936
- out_x2(box2,box4);
1937
- fprintf(stderr," no %d char %4d %5d times maxdist=%d\n",i,box2->num,j,dist);
1938
- }
1939
- // calculate mean-char (error-correction)
1940
- // ToDo: calculate maxdist in group
1941
- k+=j;
1942
- // if(j>1)
1943
- // out_b(box1,NULL,0,0,0,0,cs);
1944
- if(JOB->cfg.verbose&8)
1945
- fprintf(stderr," no %d char %4d %5d times sum=%d\n",i,box2->num,j,k);
1946
- } end_for_each(&(JOB->res.boxlist));
1947
- if(JOB->cfg.verbose)fprintf(stderr," ok\n");
1948
- }
1949
- return 0;
1950
- }
1951
-
1952
- /*
1953
- ** call the first engine for all boxes and set box->c=result;
1954
- **
1955
- */
1956
- int char_recognition( pix *pp, int mo){
1957
- int i,ii,ni,cs,x0,y0,x1,y1;
1958
- struct box *box2;
1959
- progress_counter_t *pc;
1960
- wchar_t cc;
1961
- cs=JOB->cfg.cs;
1962
- // ---- analyse boxes, find chars ---------------------------------
1963
- if (JOB->cfg.verbose)
1964
- fprintf(stderr,"# char recognition");
1965
- i=ii=ni=0;
1966
- for_each_data(&(JOB->res.boxlist)) { /* count boxes */
1967
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
1968
- /* wew: isn't this just JOB->res.numC? */
1969
- /* js: The program is very complex. I am not sure anymore
1970
- wether numC is the number of boxes or the number of valid
1971
- characters.
1972
- Because its not time consuming I count the boxes here. */
1973
- if (box2->c==UNKNOWN) i++;
1974
- if (box2->c==PICTURE) ii++;
1975
- ni++;
1976
- } end_for_each(&(JOB->res.boxlist));
1977
- if(JOB->cfg.verbose)
1978
- fprintf(stderr," unknown= %d picts= %d boxes= %d\n# ",i,ii,ni);
1979
- if (!ni) return 0;
1980
- i=ii=0;
1981
- pc = open_progress(ni,"char_recognition");
1982
- for_each_data(&(JOB->res.boxlist)) {
1983
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
1984
- x0=box2->x0;x1=box2->x1;
1985
- y0=box2->y0;y1=box2->y1; // box
1986
- cc=box2->c;
1987
- if (cc==PICTURE) continue;
1988
-
1989
- if ((mo&256)==0) { /* this case should be default (main engine) */
1990
- if(cc==UNKNOWN || box2->num_ac==0 || box2->wac[0]<JOB->cfg.certainty)
1991
- cc=whatletter(box2,cs ,0);
1992
- }
1993
-
1994
- if(mo&2)
1995
- if(cc==UNKNOWN || box2->num_ac==0 || box2->wac[0]<JOB->cfg.certainty)
1996
- cc=ocr_db(box2);
1997
-
1998
-
1999
- // box2->c=cc; bad idea (May03 removed)
2000
- // set(box2,cc,95); ToDo: is that better?
2001
-
2002
- if(cc==UNKNOWN)
2003
- i++;
2004
- ii++;
2005
-
2006
- if(JOB->cfg.verbose&8) {
2007
- fprintf(stderr,"\n# code= %04lx %c",(long)cc,(char)((cc<255)?cc:'_'));
2008
- out_b(box2,pp,x0,y0,x1-x0+1,y1-y0+1,cs);
2009
- }
2010
- progress(ii,pc); /* ii = 0..ni */
2011
-
2012
- } end_for_each(&(JOB->res.boxlist));
2013
- close_progress(pc);
2014
- if(JOB->cfg.verbose)fprintf(stderr," %d of %d chars unidentified\n",i,ii);
2015
- return 0;
2016
- }
2017
-
2018
-
2019
- /*
2020
- ** compare unknown with known chars,
2021
- ** very similar to the find_similar_char_function but here only to
2022
- ** improve the result
2023
- */
2024
- int compare_unknown_with_known_chars(pix * pp, int mo) {
2025
- int i, cs = JOB->cfg.cs, dist, d, ad, wac, ni, ii;
2026
- struct box *box2, *box3, *box4;
2027
- progress_counter_t *pc=NULL;
2028
- wchar_t bc;
2029
- i = ii = 0; // ---- -------------------------------
2030
- if (JOB->cfg.verbose)
2031
- fprintf(stderr, "# try to compare unknown with known chars !(mode&8)");
2032
- if (!(mo & 8))
2033
- {
2034
- ii=ni=0;
2035
- for_each_data(&(JOB->res.boxlist)) { ni++; } end_for_each(&(JOB->res.boxlist));
2036
- pc = open_progress(ni,"compare_chars");
2037
- for_each_data(&(JOB->res.boxlist)) {
2038
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist)); ii++;
2039
- if (box2->c == UNKNOWN || (box2->num_ac>0 && box2->wac[0]<97))
2040
- if (box2->y1 - box2->y0 > 4 && box2->x1 - box2->x0 > 1) { // no dots!
2041
- box4 = (struct box *)list_get_header(&(JOB->res.boxlist));;
2042
- dist = 1000; /* 100% maximum */
2043
- bc = UNKNOWN; /* best fit char */
2044
- for_each_data(&(JOB->res.boxlist)) {
2045
- box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
2046
- wac=((box3->num_ac>0)?box3->wac[0]:100);
2047
- if (box3 == box2 || box3->c == UNKNOWN
2048
- || wac<JOB->cfg.certainty) continue;
2049
- if (box2->y1 - box2->y0 < 5 || box2->x1 - box2->x0 < 3) continue;
2050
- d = distance(pp, box2, pp, box3, cs);
2051
- if (d < dist) {
2052
- dist = d; bc = box3->c; box4 = box3;
2053
- }
2054
- } end_for_each(&(JOB->res.boxlist));
2055
- if (dist < 10) {
2056
- /* sureness can be maximal of box3 */
2057
- if (box4->num_ac>0) ad = box4->wac[0];
2058
- else ad = 97;
2059
- ad-=dist; if(ad<1) ad=1;
2060
- /* ToDo: ad should depend on ad of bestfit */
2061
- setac(box2,(wchar_t)bc,ad);
2062
- i++;
2063
- } // limit as option???
2064
- // => better max distance('e','e') ???
2065
- if (dist < 50 && (JOB->cfg.verbose & 7)) { // only for debugging
2066
- fprintf(stderr,"\n# L%02d best fit was %04x=%c dist=%3d%% i=%d",
2067
- box2->line, (int)bc, (char)((bc<128)?bc:'_'), dist, i);
2068
- if(box4->num_ac>0)fprintf(stderr," w= %3d%%",box4->wac[0]);
2069
- if ((JOB->cfg.verbose & 4) && dist < 10)
2070
- out_x2(box2, box4);
2071
- }
2072
- progress(ii,pc);
2073
- }
2074
- } end_for_each(&(JOB->res.boxlist));
2075
- close_progress(pc);
2076
- }
2077
- if (JOB->cfg.verbose)
2078
- fprintf(stderr, " - found %d (nC=%d)\n", i, ii);
2079
- return 0;
2080
- }
2081
-
2082
- /*
2083
- // ---- divide overlapping chars which !strchr("_,.:;",c);
2084
- // block-splitting (two ore three glued chars)
2085
- // division if dots>0 does not work properly! ???
2086
- //
2087
- // what about glued "be"?
2088
- // what about recursive division?
2089
- // ToDo: mark divided boxes to give the engine a chance to
2090
- // handle wrong divisions
2091
- */
2092
- int try_to_divide_boxes( pix *pp, int mo){
2093
- struct box *box2, boxa, boxb;
2094
- int cs=JOB->cfg.cs, ad=100,
2095
- a2[8], ar, // certainty of each part, ar = product of all certainties
2096
- cbest; // best certainty, skip search of certainty<cbest-1 for speed
2097
- wchar_t ci[8], // split max. 8 chars
2098
- s1[]={ UNKNOWN, '_', '.', ',', '\'', '!', ';', '?', ':', '-',
2099
- '=', '(', ')', '/', '\\', '\0' }; // not accepted chars, \0-terminated!
2100
- int x0, x1, y0, y1,
2101
- xi[8+1]; // cutting positions
2102
- int i, ii, n1, dy, dx;
2103
- // pix p=(*pp); // remove!
2104
- if (JOB->cfg.verbose)
2105
- fprintf(stderr,"# try to divide unknown chars !(mode&16)");
2106
- if(!(mo&16)) // put this to the caller
2107
- for_each_data(&(JOB->res.boxlist)) {
2108
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
2109
- // don't try to split simple structures (ex: 400x30 square)
2110
- if ((!box2->num_frames)
2111
- || box2->num_frame_vectors[ box2->num_frames-1 ]<9) continue;
2112
- if((box2->c==UNKNOWN || (box2->num_ac && box2->wac[0]<JOB->cfg.certainty))
2113
- && box2->x1-box2->x0>5 && box2->y1-box2->y0>4){
2114
- x0=box2->x0; x1=box2->x1;
2115
- y0=box2->y0; y1=box2->y1;
2116
- ad=100;
2117
- cbest=0;
2118
-
2119
- /* get minimum vertical lines */
2120
- n1 = num_cross(x0,x1,( y1+y0)/2,( y1+y0)/2,pp,cs);
2121
- ii = num_cross(x0,x1,(3*y1+y0)/4,(3*y1+y0)/4,pp,cs); if (ii<n1) n1=ii;
2122
- if (box2->m2 && box2->m3 > box2->m2+2)
2123
- for (i=box2->m2+1;i<=box2->m3-1;i++) {
2124
- if (loop(pp,x0+1,i,x1-x0,cs,1,RI) > (x1-x0-2)) continue; // ll
2125
- ii = num_cross(x0,x1,i,i,pp,cs); if (ii<n1) n1=ii;
2126
- } if (n1<2) continue; // seems to make no sense to divide
2127
- if (n1<4) ad=99*ad/100; // not to strong because m2+m3 could be wrong
2128
- if (n1<3) ad=99*ad/100;
2129
-
2130
- if( 2*y1 < box2->m3+box2->m4 /* baseline char ? */
2131
- && num_cross(x0,x1,y1-1,y1-1,pp,cs)==1 // -1 for slopes
2132
- && num_cross((x0+2*x1)/3,(x0+3*x1)/4,y0,y1,pp,cs)<3 // not exclude tz
2133
- && num_cross((3*x0+x1)/4,(2*x0+x1)/3,y0,y1,pp,cs)<3 // not exclude zl
2134
- && loop(pp,x0,y1-(y1-y0)/32,x1-x0,cs,0,RI)
2135
- +loop(pp,x1,y1-(y1-y0)/32,x1-x0,cs,0,LE) > (x1-x0+1)/2
2136
- ) continue; /* do not try on bvdo"o etc. */
2137
-
2138
- // one vertical line can not be two glued chars, lc?
2139
- if ( num_cross(x0,x1,(y1+y0)/2,(y1+y0)/2,pp,cs)<=1 ) continue;
2140
- { // doublet = 2 letters
2141
- // char buf[4]="\0\0\0"; // 4th byte is string end == \0
2142
- // buf[0]=c1; // c1 is wchar_t! (0xbf00 to 0) failes
2143
- // buf[1]=c2;
2144
- char buf[64]=""; // end == \0
2145
- if (JOB->cfg.verbose&2){
2146
- fprintf(stderr, "\n#\n# divide box: %4d %4d %3d %3d\n",
2147
- x0, y0, x1-x0+1, y1-y0+1);
2148
- if (JOB->cfg.verbose&4) out_x(box2);
2149
- }
2150
- // it would be better if testing is only if most right and left char
2151
- // is has no horizontal gap (below m2) ex: be
2152
- i=0; // num splittet chars
2153
- xi[0]=x0; xi[1]=x0+3; xi[2]=x1;
2154
- for ( ; ; xi[i+1]++) { // x[i] .. x[i+1], slower? but better v0.42
2155
- /* break if x is to near to the right border */
2156
- if (xi[i+1]>x1-3) { if (i==0) break; i--; xi[i+2]=x1; continue; }
2157
- // ToDo: skip if not a local dy-min for speedup
2158
- { int ymin=y1, ymax=y0, bow=0, // min max at cutting point
2159
- max0=y0, max1=y0, // max y on left and right side
2160
- min0=y1, min1=y1; // min y on left and right side
2161
- for (dy=0,ii=0;ii<box2->num_frame_vectors[ 0 ];ii++) {
2162
- int pre=ii-1, next=(ii+1)%box2->num_frame_vectors[ 0 ];
2163
- if (pre<0) pre=box2->num_frame_vectors[ 0 ]-1;
2164
- // check if vector is inside box to cut
2165
- if ( box2->frame_vector[ii ][0]<=xi[i ]) continue;
2166
- if ( box2->frame_vector[ii ][0]> xi[i+2]) continue;
2167
- // 2nd derivation of y(x)
2168
- if (abs(box2->frame_vector[ii ][0]-xi[i+1])<2) {
2169
- dy= 2*box2->frame_vector[ii ][1]
2170
- -box2->frame_vector[next][1]
2171
- -box2->frame_vector[pre ][1];
2172
- dx= box2->frame_vector[next][0]
2173
- -box2->frame_vector[pre ][0];
2174
- // rotate 180 degree if dx<0
2175
- if (((dx>0)?dy:-dy)<-abs(dx)/2) { bow=1; }
2176
- }
2177
- // its not the best if we think on glued fi fo etc.
2178
- if (( box2->frame_vector[pre ][0]<=xi[i+1]
2179
- && box2->frame_vector[next][0]>=xi[i+1])
2180
- || ( box2->frame_vector[pre ][0]>=xi[i+1]
2181
- && box2->frame_vector[next][0]<=xi[i+1])) {
2182
- if ( box2->frame_vector[ii ][1]>ymax)
2183
- ymax= box2->frame_vector[ii ][1];
2184
- if ( box2->frame_vector[ii ][1]<ymin)
2185
- ymin= box2->frame_vector[ii ][1];
2186
- }
2187
- // min and max of left and right side
2188
- if ( box2->frame_vector[ii ][1]>max0
2189
- && box2->frame_vector[ii ][0]<=xi[i+1])
2190
- max0=box2->frame_vector[ii ][1];
2191
- if ( box2->frame_vector[ii ][1]>max1
2192
- && box2->frame_vector[ii ][0]> xi[i+1])
2193
- max1=box2->frame_vector[ii ][1];
2194
- if ( box2->frame_vector[ii ][1]<min0
2195
- && box2->frame_vector[ii ][0]<=xi[i+1])
2196
- min0=box2->frame_vector[ii ][1];
2197
- if ( box2->frame_vector[ii ][1]<min1
2198
- && box2->frame_vector[ii ][0]> xi[i+1])
2199
- min1=box2->frame_vector[ii ][1];
2200
- }
2201
- if(JOB->cfg.verbose&2)
2202
- fprintf(stderr,"\n# test if to split at x%d= %2d %2d %2d"
2203
- " bow,(max-min)[i,0,1] %d %3d %3d %3d"
2204
- , i, xi[i]-x0, xi[i+1]-x0, xi[i+2]-x0, bow, ymax-ymin, max0-min0, max1-min1);
2205
- /* skip if no local minimum at xi[i+1] or if its not thin enough */
2206
- if (bow==0 || 4*(ymax-ymin)>2*(y1-y0)) continue;
2207
- // cuttet parts should have about the same height (max-min)
2208
- // we dont want to cut an 'n' in three parts!
2209
- if (2*(max0-min0+1)<(y1-y0+1)) continue; // left height
2210
- if (2*(max1-min1+1)<(y1-y0+1)) continue; // right height
2211
- // ToDo: thickness on xi[i+1]?
2212
- }
2213
- // try to split successive right box if left box is recognised,
2214
- // else shift the splitting point further to the right border
2215
- // removing ->dots if dot only above one char !!! ??? not implemented
2216
- if(JOB->cfg.verbose&2)
2217
- fprintf(stderr,"\n# try to split, newbox[%d].x= %2d ... %2d "
2218
- "dy= %d ", i, xi[i]-x0, xi[i+1]-x0, dy);
2219
- boxa=*box2; // copy contents, ToDo: reset ac-list (in cut_box?)
2220
- boxa.x=xi[i]; boxa.y=y0; // obsolete? mark pixel, overlap?
2221
- boxa.x0=xi[i];boxa.x1=xi[i+1]; // new horizontal box range
2222
- cut_box(&boxa); boxa.num_ac=0;
2223
- // out_x(&boxa);
2224
- // get wchar + certainty
2225
- ci[i]=whatletter(&boxa,cs,0); a2[i]=testac(&boxa,ci[i]);
2226
- if(JOB->cfg.verbose&2)
2227
- fprintf(stderr,"\n# certainty %d limit= %d cbest= %d ",
2228
- a2[i], JOB->cfg.certainty, cbest);
2229
- if (a2[i]<JOB->cfg.certainty || a2[i]<cbest-1
2230
- || wcschr(s1,ci[i]) ) { continue; } // dont split here
2231
-
2232
- for (ar=ad,ii=0;ii<=i;ii++) {
2233
- ar=a2[ii]*ar/100; } // multiply all probabilities
2234
- if (ar<98*JOB->cfg.certainty/100 || ar<cbest) {
2235
- continue; } // dont go deeper, no longer string
2236
-
2237
- i++; if (i==8) break; // maximum splits
2238
- if (i==4) break; // at the moment its to slow to go further
2239
- if (i+1<8) xi[i+1]=x1; // right border of next box
2240
- if (i+2<8) xi[i+2]=x1;
2241
-
2242
- if(JOB->cfg.verbose&2)
2243
- fprintf(stderr,"\n try end split [%d]=%d [%d]=%d ",
2244
- i, xi[i]-x0, i+1, xi[i+1]-x0);
2245
- boxb=*box2; // try rest if it has to be split again
2246
- boxb.x=xi[i]+1; boxb.y=y0;
2247
- boxb.x0=xi[i]+1;boxb.x1=xi[i+1];
2248
- cut_box(&boxb); boxb.num_ac=0;
2249
- ci[i]=whatletter(&boxb,cs,0); a2[i]=testac(&boxb,ci[i]);
2250
- if (a2[i]<JOB->cfg.certainty || a2[i]<cbest-1
2251
- || wcschr(s1,ci[i]) ) { xi[i+1]=xi[i]+2; continue; } // split rest
2252
- // now we have everything splittet
2253
-
2254
- if(JOB->cfg.verbose&2) {
2255
- fprintf(stderr,"\n split at/to: ");
2256
- for (ii=0;ii<=i;ii++)
2257
- fprintf(stderr," %2d %s (%3d)", xi[ii+1]-x0,
2258
- decode(ci[ii],ASCII), a2[ii]);
2259
- fprintf(stderr,"\n");
2260
- }
2261
- // boxa..c changed!!! dots should be modified!!!
2262
- // Question: cut it into boxes v0.40 or set a string v0.41?
2263
- // new way of building a string v0.41 (can call setas multiple)
2264
- // usefull if compare unknown with known strings (except barcode?)
2265
- // ToDo: also create alternate variants? ex: I <-> l
2266
- for (buf[0]=0,ar=ad,ii=0;ii<=i;ii++) {
2267
- ar=a2[ii]*ar/100; // multiply all probabilities
2268
- if (i>0 && ci[ii]=='n' && ci[ii-1]=='r') ar--; // m == rn
2269
- strncat(buf,decode(ci[ii],JOB->cfg.out_format),20);
2270
- }
2271
-
2272
- if (ar>cbest) cbest=ar; // best (highest) certainty found
2273
- // reduce, but not if we cross certainty border
2274
- if (99*ar/100 > JOB->cfg.certainty) ar=99*ar/100;
2275
- if (JOB->cfg.verbose&2)
2276
- fprintf(stderr,"\n split result= %s (%3d) ",buf, ar);
2277
- setas(box2,buf,ar); // char *, does it disturb further splitting?
2278
- buf[0]=0;
2279
- i--; xi[i+2]=x1;
2280
- }
2281
- }
2282
- }
2283
- } end_for_each(&(JOB->res.boxlist));
2284
- if (JOB->cfg.verbose) fprintf(stderr,", numC %d\n",JOB->res.numC);
2285
- return 0;
2286
- }
2287
-
2288
- /*
2289
- // ---- divide vertical glued boxes (ex: g above T);
2290
- */
2291
- int divide_vert_glued_boxes( pix *pp, int mo){
2292
- struct box *box2,*box3,*box4;
2293
- int y0,y1,y,dy,flag_found,dx;
2294
- if(JOB->cfg.verbose)fprintf(stderr,"# divide vertical glued boxes");
2295
- for_each_data(&(JOB->res.boxlist)) {
2296
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
2297
- if (box2->c != UNKNOWN) continue; /* dont try on pictures */
2298
- y0=box2->y0; y1=box2->y1; dy=y1-y0+1;
2299
- dx=4*(JOB->res.avX+box2->x1-box2->x0+1); // we want to be sure to look at 4ex distance
2300
- if ( dy>2*JOB->res.avY && dy<6*JOB->res.avY && box2->m1
2301
- && y0<=box2->m2+2 && y0>=box2->m1-2
2302
- && y1>=box2->m4+JOB->res.avY-2)
2303
- { // test if lower end fits one of the other lines?
2304
- box4=box2; flag_found=0;
2305
- for_each_data(&(JOB->res.boxlist)) {
2306
- box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
2307
- if (box4->c != UNKNOWN) continue; /* dont try on pictures */
2308
- if (box4->x1<box2->x0-dx || box4->x0>box2->x1+dx) continue; // ignore far boxes
2309
- if (box4->line==box2->line ) flag_found|=1; // near char on same line
2310
- if (box4->line==box2->line+1) flag_found|=2; // near char on next line
2311
- if (flag_found==3) break; // we have two vertical glued chars
2312
- } end_for_each(&(JOB->res.boxlist));
2313
- if (flag_found!=3) continue; // do not divide big chars or special symbols
2314
- y=box2->m4; // lower end of the next line
2315
- if(JOB->cfg.verbose&2){
2316
- fprintf(stderr,"\n# divide box below y=%4d",y-y0);
2317
- if(JOB->cfg.verbose&6)out_x(box2);
2318
- }
2319
- // --- insert box3 before box2
2320
- box3= (struct box *) malloc_box(box2);
2321
- box3->y1=y;
2322
- box2->y0=y+1; box2->line++; // m1..m4 should be corrected!
2323
- if (box4->line == box2->line){
2324
- box2->m1=box4->m1; box2->m2=box4->m2;
2325
- box2->m3=box4->m3; box2->m4=box4->m4;
2326
- }
2327
- box3->num=JOB->res.numC;
2328
- if (list_ins(&(JOB->res.boxlist), box2, box3)) {
2329
- fprintf(stderr,"ERROR list_ins\n"); };
2330
- JOB->res.numC++;
2331
- }
2332
- } end_for_each(&(JOB->res.boxlist));
2333
- if(JOB->cfg.verbose)fprintf(stderr,", numC %d\n",JOB->res.numC);
2334
- return 0;
2335
- }
2336
-
2337
-
2338
- /*
2339
- on some systems isupper(>255) cause a segmentation fault SIGSEGV
2340
- therefore this function
2341
- ToDo: should be replaced (?) by wctype if available on every system
2342
- */
2343
- int wisupper(wchar_t cc){ return ((cc<128)?isupper(cc):0); }
2344
- int wislower(wchar_t cc){ return ((cc<128)?islower(cc):0); }
2345
- int wisalpha(wchar_t cc){ return ((cc<128)?isalpha(cc):0); }
2346
- int wisdigit(wchar_t cc){ return ((cc<128)?isdigit(cc):0); }
2347
- int wisspace(wchar_t cc){ return ((cc<128)?isspace(cc):0); }
2348
-
2349
- /* set box2->c to cc if cc is in the ac-list of box2, return 1 on success */
2350
- int setc(struct box *box2, wchar_t cc){
2351
- int ret=0, w1, w2;
2352
- w1=((box2->num_ac) ? box2->wac[0] : 0); // weight of replaced char
2353
- w2=testac(box2,cc);
2354
- if (JOB->cfg.verbose)
2355
- fprintf(stderr, "\n# change %s (%d) to %s (%d to %d) at (%d,%d)",
2356
- decode(box2->c,ASCII), w1, decode(cc,ASCII), w2, (100+w2+1)/2,
2357
- box2->x0, box2->y0);
2358
- if (w2) { if (box2->c!=cc) { ret=1; setac(box2,cc,(100+w2+1)/2); } }
2359
- // if(JOB->cfg.verbose & 4) out_x(box2);
2360
- // ToDo: modify per setac (shift ac)
2361
- return ret;
2362
- }
2363
-
2364
-
2365
- /* ---- proof difficult chars Il1 by context view ----
2366
- context: separator, number, vowel, nonvowel, upper case ????
2367
- could be also used to find unknown chars if the environment (nonumbers)
2368
- can be found in other places!
2369
- ToDo:
2370
- - box->tac[] as set of possible chars, ac set by engine, example:
2371
- ac="l/" (not "Il|/\" because serifs detected and slant>0)
2372
- correction only to one of the ac-set (alternative chars)!
2373
- - should be language-settable; Unicode compatible
2374
- - box2->ad and wac should be changed? (not proper yet)
2375
- * ------------- */
2376
- int context_correction( job_t *job ) {
2377
- // const static char
2378
- char *l_vowel="aeiouy";
2379
- // *l_Vowel="AEIOU",chars if the environment (nonumbers)
2380
- char *l_nonvo = "bcdfghjklmnpqrstvwxz";
2381
- struct box *box4, *box3, *box2, *prev, *next;
2382
- // pix *pp = &(job->src.p);
2383
- int nc=0, ns=0; // num corrections
2384
-
2385
- if (job->cfg.verbose)
2386
- fprintf(stderr, "# context correction Il1 0O");
2387
-
2388
- for_each_data(&(job->res.boxlist)) {
2389
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
2390
- if (box2->c > 0xFF) continue; // temporary UNICODE fix
2391
- prev = (struct box *)list_get_cur_prev(&(job->res.boxlist));
2392
- next = (struct box *)list_get_cur_next(&(job->res.boxlist));
2393
- if( (prev) && (prev->c > 0xFF)) continue; // temporary UNICODE fix 2
2394
- if( (next) && (next->c > 0xFF)) continue; // temporary UNICODE fix 3
2395
- if (box2->num_ac<2) continue; // no alternatives
2396
- if (box2->wac[0]==100 && box2->wac[1]<100) continue;
2397
- if (box2->num_ac && box2->tas[0]) continue; // buggy space_remove 0.42
2398
-
2399
- /* check for Il1| which are general difficult to distinguish */
2400
- /* bbg: not very good. Should add some tests to check if is preceded by '.',
2401
- spelling, etc */
2402
- /* ToDo: only correct if not 100% sure (wac[i]<100)
2403
- and new char is in wat[] */
2404
- if (strchr("Il1|", box2->c) && next && prev) {
2405
- // if( strchr(" \n",prev->c) // SPC
2406
- // && strchr(" \n",next->c) ) box2->c='I'; else // bad idea! I have ...
2407
- if (wisalpha(next->c) && next->c!='i' &&
2408
- ( prev->c == '\n' ||
2409
- ( prev->c == ' ' &&
2410
- ( box4=(struct box *)list_prev(&(job->res.boxlist), prev)) &&
2411
- box4->c == '.' ) ) ) { nc+=setc(box2,(wchar_t)'I'); }
2412
- else if (box2->c!='1' && strchr(l_nonvo,next->c) &&
2413
- strchr("\" \n",prev->c)) /* lnt => Int, but 1st */
2414
- /* do not change he'll to he'Il! */
2415
- { nc+=setc(box2,(wchar_t)'I'); } // set box2->c to 'I' if 'I' is in the ac-list
2416
- else if (strchr(l_vowel,next->c)) /* unusual? Ii Ie Ia Iy Iu */
2417
- /* && strchr("KkBbFfgGpP",prev->c)) */ /* kle Kla Kli */
2418
- { nc+=setc(box2,(wchar_t)'l'); }
2419
- else if (wisupper(next->c)
2420
- && !strchr("O0I123456789",next->c)
2421
- && !strchr("O0I123456789",prev->c)) /* avoid lO => IO (10) */
2422
- { nc+=setc(box2,(wchar_t)'I'); }
2423
- else if (wislower(prev->c))
2424
- { nc+=setc(box2,(wchar_t)'l'); }
2425
- else if (wisdigit(prev->c) || wisdigit(next->c)
2426
- || (next->c=='O' && !wisalpha(prev->c))) /* lO => 10 */
2427
- { nc+=setc(box2,(wchar_t)'1'); }
2428
- }
2429
-
2430
- /* check for O0 */
2431
- else if (strchr("O0", box2->c) && next && prev) {
2432
- if (wisspace(prev->c) && wisalpha(next->c)) /* initial letter */
2433
- { nc+=setc(box2,(wchar_t)'O'); }
2434
- else if (wisalpha(prev->c) && wisalpha(next->c)
2435
- && wisupper(next->c)) /* word in upper case */
2436
- { nc+=setc(box2,(wchar_t)'O'); }
2437
- else if (wisdigit(prev->c) || wisdigit(next->c))
2438
- { nc+=setc(box2,(wchar_t)'0'); }
2439
- }
2440
-
2441
- /* check for 5S */
2442
- else if (strchr("5S", box2->c) && next && prev) {
2443
- if (wisspace(prev->c) && wisalpha(next->c)) /* initial letter */
2444
- { nc+=setc(box2,(wchar_t)'S'); }
2445
- else if (wisalpha(prev->c) && wisalpha(next->c)
2446
- && wisupper(next->c)) /* word in upper case */
2447
- { nc+=setc(box2,(wchar_t)'S'); }
2448
- else if (wisdigit(prev->c) || wisdigit(next->c))
2449
- { nc+=setc(box2,(wchar_t)'5'); }
2450
- }
2451
-
2452
- /* was a space not found? xXx => x Xx ??? */
2453
- if (wisupper(box2->c) && next && prev) {
2454
- if (wislower(prev->c) && wislower(next->c)
2455
- && 2 * (box2->x0 - prev->x1) > 3 * (next->x0 - box2->x1)) {
2456
- struct box *box3 = malloc_box((struct box *) NULL);
2457
- box3->x0 = prev->x1 + 2;
2458
- box3->x1 = box2->x0 - 2;
2459
- box3->y0 = box2->y0;
2460
- box3->y1 = box2->y1;
2461
- box3->x = box2->x0 - 1;
2462
- box3->y = box2->y0;
2463
- box3->dots = 0;
2464
- box3->num_boxes = 0;
2465
- box3->num_subboxes = 0;
2466
- box3->c = ' ';
2467
- box3->modifier = 0;
2468
- setac(box3,' ',99); /* ToDo: weight depends from distance */
2469
- box3->num = -1;
2470
- box3->line = prev->line;
2471
- box3->m1 = box3->m2 = box3->m3 = box3->m4 = 0;
2472
- box3->p = &(job->src.p);
2473
- list_ins(&(job->res.boxlist), box2, box3);
2474
- }
2475
- }
2476
-
2477
- /* a space before punctuation? but not " ./file" */
2478
- if ( prev && next)
2479
- if (prev->c == ' ' && strchr(" \n" , next->c)
2480
- && strchr(".,;:!?)", box2->c))
2481
- if (prev->x1 - prev->x0 < 2 * job->res.avX) { // carefully on tables
2482
- box3 = prev;
2483
- if ( !list_del(&(job->res.boxlist), box3) ) free_box(box3);
2484
- prev = (struct box *)list_get_cur_prev(&(job->res.boxlist));
2485
- ns++;
2486
- }
2487
-
2488
- /* \'\' to \" */
2489
- if ( prev )
2490
- if ( (prev->c == '`' || prev->c == '\'')
2491
- && (box2->c == '`' || box2->c == '\'') )
2492
- if (prev->x1 - box2->x0 < job->res.avX) { // carefully on tables
2493
- box2->c='\"';
2494
- box3 = prev;
2495
- list_del(&(job->res.boxlist), box3);
2496
- free_box(box3);
2497
- }
2498
- } end_for_each(&(job->res.boxlist));
2499
- if (job->cfg.verbose)
2500
- fprintf(stderr, " num_corrected= %d removed_spaces= %d\n", nc, ns);
2501
- return 0;
2502
- }
2503
-
2504
-
2505
- /* ---- insert spaces ----
2506
- * depends strongly from the outcome of measure_pitch()
2507
- * ------------------------ */
2508
- int list_insert_spaces( pix *pp, job_t *job ) {
2509
- int i=0, j1, j2, i1, maxline=-1, dy=0; char cc;
2510
- struct box *box2, *box3=NULL, *box4=NULL;
2511
-
2512
- // measure mean line height
2513
- for(i1=1;i1<job->res.lines.num;i1++) {
2514
- dy+=job->res.lines.m4[i1]-job->res.lines.m1[i1]+1;
2515
- } if (job->res.lines.num>1) dy/=(job->res.lines.num-1);
2516
- i=0; j2=0;
2517
- for(i1=1;i1<job->res.lines.num;i1++) {
2518
- j1=job->res.lines.m4[i1]-job->res.lines.m1[i1]+1;
2519
- if (j1>dy*120/100 || j1<dy*80/100) continue; // only most frequently
2520
- j2+=j1; i++;
2521
- } if (i>0 && j2/i>7) dy=j2/i;
2522
- if( job->cfg.verbose&1 )
2523
- fprintf(stderr,"# insert space between words (dy=%d) ...",dy);
2524
- if (!dy) dy=(job->res.avY)*110/100+1;
2525
-
2526
- i=0;
2527
- for_each_data(&(job->res.boxlist)) {
2528
- box2 =(struct box *)list_get_current(&(job->res.boxlist));
2529
- cc=0;
2530
- if (box2->line>maxline) { // lines and chars must be sorted!
2531
- if (maxline>=0) cc='\n'; // NL
2532
- maxline=box2->line;
2533
- }
2534
- if((box3 = (struct box *)list_prev(&(job->res.boxlist), box2))){
2535
- if (maxline && !box2->line && cc==0) cc=' ';
2536
- if (box2->line<=maxline && cc==0) { // lines and chars must be sorted!
2537
- int thispitch = job->res.lines.pitch[box2->line];
2538
- int thismono = job->res.lines.mono[box2->line];
2539
- int mdist = (box2->x1 + box2->x0 - (box3->x1 + box3->x0) + 1)/2;
2540
- int pdist = box2->x0 - box3->x1 + 1;
2541
- if (box2->x1 - box2->x0 < thispitch) pdist=pdist*4/3;
2542
- /* allow extra pixels around small characters .,'!: etc */
2543
- // fprintf(stderr,"#\n ... mono= %2d pitch= %2d mdist= %2d pdist= %2d",
2544
- // thismono, thispitch, mdist, pdist);
2545
- if ((thismono!=0 && mdist >= thispitch)
2546
- || (thismono==0 && pdist >= thispitch))
2547
- cc=' '; // insert SPACE
2548
- }
2549
- }
2550
- if(cc){
2551
- box4=(struct box *)list_prev(&(job->res.boxlist), box2);
2552
- box3=(struct box *)malloc_box(NULL);
2553
- box3->x0=box2->x0-2; box3->x1=box2->x0-2;
2554
- box3->y0=box2->y0; box3->y1=box2->y1;
2555
- if(cc!='\n' && box4)
2556
- box3->x0=box4->x1+2;
2557
- if(cc=='\n' || !box4)
2558
- box3->x0=job->res.lines.x0[box2->line];
2559
- if(cc=='\n' && box4){
2560
- box3->y0=box4->y1; // better use lines.y1[box2->pre] ???
2561
- box3->y1=box2->y0;
2562
- }
2563
- box3->x =box2->x0-1; box3->y=box2->y0;
2564
- box3->dots=0; box3->c=cc;
2565
- box3->num_boxes = 0;
2566
- box3->num_subboxes = 0;
2567
- box3->modifier='\0';
2568
- box3->num=-1; box3->line=box2->line;
2569
- box3->m1=box2->m1; box3->m2=box2->m2;
2570
- box3->m3=box2->m3; box3->m4=box2->m4;
2571
- box3->p=pp;
2572
- setac(box3,cc,100); /* ToDo: weight depends from distance */
2573
- list_ins(&(job->res.boxlist),box2,box3);
2574
- if( job->cfg.verbose&1 ) {
2575
- fprintf(stderr,"\n# insert space &%d; at x= %4d %4d box= %p",
2576
- (int)cc, box3->x0, box3->y0, (void*)box3);
2577
- /* out_x(box3); */
2578
- }
2579
- i++;
2580
- }
2581
- } end_for_each(&(job->res.boxlist));
2582
- if( job->cfg.verbose&1 ) fprintf(stderr," found %d\n",i);
2583
- return 0;
2584
- }
2585
-
2586
-
2587
- /*
2588
- add infos where the box is positioned to the box
2589
- this is useful for better recognition
2590
- */
2591
- int add_line_info(/* List *boxlist2 */){
2592
- // pix *pp=&JOB->src.p;
2593
- struct tlines *lines = &JOB->res.lines;
2594
- struct box *box2;
2595
- int i,xx,m1,m2,m3,m4,num_line_members=0,num_rest=0;
2596
- if( JOB->cfg.verbose&1 ) fprintf(stderr,"# add line infos to boxes ...");
2597
- for_each_data(&(JOB->res.boxlist)) {
2598
- box2 =(struct box *)list_get_current(&(JOB->res.boxlist));
2599
- for(i=1;i<JOB->res.lines.num;i++) /* line 0 is a place holder */
2600
- {
2601
- if (lines->dx) xx=lines->dy*((box2->x1+box2->x0)/2)/lines->dx; else xx=0;
2602
- m1=lines->m1[i]+xx;
2603
- m2=lines->m2[i]+xx;
2604
- m3=lines->m3[i]+xx;
2605
- m4=lines->m4[i]+xx;
2606
- // fprintf(stderr," test line %d m1=%d %d %d %d\n",i,m1,m2,m3,m4);
2607
- if (m4-m1==0) continue; /* no text line (line==0) */
2608
- #if 0
2609
- if( box2->y1+2*JOB->res.avY >= m1
2610
- && box2->y0-2*JOB->res.avY <= m4 ) /* not to far away */
2611
- #endif
2612
- /* give also a comma behind the line a chance */
2613
- if( box2->x0 >= lines->x0[i] && box2->x1 <= lines->x1[i]+JOB->res.avX )
2614
- if( box2->m2==0 || abs(box2->y0-box2->m2) > abs(box2->y0-m2) )
2615
- { /* found nearest line */
2616
- box2->m1=m1;
2617
- box2->m2=m2;
2618
- box2->m3=m3;
2619
- box2->m4=m4;
2620
- box2->line=i;
2621
- }
2622
- }
2623
- if( box2->y1+2 < box2->m1
2624
- || box2->y0 < box2->m1 - (box2->m3-box2->m1)/2
2625
- || box2->y0-2 > box2->m4
2626
- || box2->y1 > box2->m3 + (box2->m3-box2->m1)
2627
- ) /* to far away */
2628
- { /* reset */
2629
- box2->m1=0;
2630
- box2->m2=0;
2631
- box2->m3=0;
2632
- box2->m4=0;
2633
- box2->line=0;
2634
- num_rest++;
2635
- } else num_line_members++;
2636
- } end_for_each(&(JOB->res.boxlist));
2637
- if( JOB->cfg.verbose&1 )
2638
- fprintf(stderr," done, num_line_chars=%d rest=%d\n",
2639
- num_line_members, num_rest);
2640
- return 0;
2641
- }
2642
-
2643
-
2644
- /*
2645
- * bring the boxes in right order
2646
- * add_line_info must be executed first!
2647
- */
2648
- int sort_box_func (const void *a, const void *b) {
2649
- struct box *boxa, *boxb;
2650
-
2651
- boxa = (struct box *)a;
2652
- boxb = (struct box *)b;
2653
-
2654
- if ( ( boxb->line < boxa->line ) ||
2655
- ( boxb->line == boxa->line && boxb->x0 < boxa->x0 ) )
2656
- return 1;
2657
- return -1;
2658
- }
2659
-
2660
- // -------------------------------------------------------------
2661
- // ------ use this for entry from other programs
2662
- // include pnm.h pgm2asc.h
2663
- // -------------------------------------------------------------
2664
- // entry point for gocr.c or if it is used as lib
2665
- // better name is call_ocr ???
2666
- // jb: OLD COMMENT: not removed due to set_options_* ()
2667
- // args after pix *pp should be removed and new functions
2668
- // set_option_mode(int mode), set_option_spacewidth() .... etc.
2669
- // should be used instead, before calling pgm2asc(pix *pp)
2670
- // ! change if you can ! - used by X11 frontend
2671
- int pgm2asc(job_t *job)
2672
- {
2673
- pix *pp;
2674
- progress_counter_t *pc;
2675
-
2676
- assert(job);
2677
- /* FIXME jb: remove pp */
2678
- pp = &(job->src.p);
2679
-
2680
- if( job->cfg.verbose )
2681
- fprintf(stderr, "# db_path= %s\n", job->cfg.db_path);
2682
-
2683
- pc = open_progress(100,"pgm2asc_main");
2684
- progress(0,pc); /* start progress output 0% 0% */
2685
-
2686
- /* ----- count colors ------ create histogram -------
2687
- - this should be used to create a upper and lower limit for cs
2688
- - cs is the optimum gray value between cs_min and cs_max
2689
- - also inverse scans could be detected here later */
2690
- if (job->cfg.cs==0)
2691
- job->cfg.cs=otsu( pp->p,pp->y,pp->x,0,0,pp->x,pp->y, job->cfg.verbose & 1 );
2692
- /* renormalize the image and set the normalized threshold value */
2693
- job->cfg.cs=thresholding( pp->p,pp->y,pp->x,0,0,pp->x,pp->y, job->cfg.cs );
2694
- if( job->cfg.verbose )
2695
- fprintf(stderr, "# thresholding new_threshold= %d\n", job->cfg.cs);
2696
-
2697
- progress(5,pc); /* progress is only estimated */
2698
-
2699
- #if 0 /* dont vast memory */
2700
- /* FIXME jb: malloc */
2701
- if ( job->cfg.verbose & 32 ) {
2702
- // generate 2nd imagebuffer for debugging output
2703
- job->tmp.ppo.p = (unsigned char *)malloc(job->src.p.y * job->src.p.x);
2704
- // buffer
2705
- assert(job->tmp.ppo.p);
2706
- copybox(&job->src.p,
2707
- 0, 0, job->src.p.x, job->src.p.y,
2708
- &job->tmp.ppo,
2709
- job->src.p.x * job->src.p.y);
2710
- }
2711
- #else
2712
- job->tmp.ppo=job->src.p; /* temporarely, removed later */
2713
- #endif
2714
-
2715
- /* load character data base */
2716
- if ( job->cfg.mode&2 )
2717
- load_db();
2718
-
2719
- /* this is first step for reorganize the PG
2720
- ---- look for letters, put rectangular frames around letters
2721
- letter = connected points near color F
2722
- should be used by dust removing (faster) and line detection!
2723
- ---- 0..cs = black letters, last change = Mai99 */
2724
-
2725
- progress(8,pc); /* progress is only estimated */
2726
-
2727
- scan_boxes( pp );
2728
- if ( !job->res.numC ){
2729
- fprintf( stderr,"# no boxes found - stopped\n" );
2730
- if(job->cfg.verbose&32) debug_img("out01",job,8);
2731
- /***** should free stuff, etc) */
2732
- return(1);
2733
- }
2734
- // if (job->cfg.verbose&32) debug_img("out00",job,4+8);
2735
-
2736
- progress(10,pc); /* progress is only estimated */
2737
- // if(job->cfg.verbose&32) debug_img("out01",job,4+8);
2738
- // output_list(job); // for debugging
2739
- // ToDo: matrix printer preprocessing
2740
-
2741
- remove_dust( job ); /* from the &(job->res.boxlist)! */
2742
- // if(job->cfg.verbose&32) debug_img("out02",job,4+8);
2743
- // output_list(job); // for debugging
2744
- smooth_borders( job ); /* only for big chars */
2745
- progress(12,pc); /* progress is only estimated */
2746
- // if(job->cfg.verbose&32) debug_img("out03",job,4+8);
2747
- // output_list(job); // for debugging
2748
-
2749
- detect_barcode( job ); /* mark barcode */
2750
- // if(job->cfg.verbose&32) debug_img("out04",job,4+8);
2751
- // output_list(job); // for debugging
2752
-
2753
- detect_pictures( job ); /* mark pictures */
2754
- // if(job->cfg.verbose&32) debug_img("out05",job,4+8);
2755
- // output_list(job); // for debugging
2756
-
2757
- remove_pictures( job ); /* do this as early as possible, before layout */
2758
- // if(job->cfg.verbose&32) debug_img("out06",job,4+8);
2759
- // output_list(job); // for debugging
2760
-
2761
- glue_holes_inside_chars( pp ); /* including count subboxes (holes) */
2762
-
2763
- detect_rotation_angle( job );
2764
-
2765
- #if 1 /* Rotate the whole picture! move boxes */
2766
- if( job->res.lines.dy!=0 ){ // move down lowest first, move up highest first
2767
- // in work! ??? (at end set dy=0) think on ppo!
2768
- }
2769
- #endif
2770
- detect_text_lines( pp, job->cfg.mode ); /* detect and mark JOB->tmp.ppo */
2771
- // if(job->cfg.verbose&32) debug_img("out07",job,4+8);
2772
- progress(20,pc); /* progress is only estimated */
2773
-
2774
- add_line_info(/* &(job->res.boxlist) */);
2775
- if (job->cfg.verbose&32) debug_img("out10",job,4+8);
2776
-
2777
- divide_vert_glued_boxes( pp, job->cfg.mode); /* after add_line_info, before list_sort! */
2778
- // if(job->cfg.verbose&32) debug_img("out11",job,0);
2779
-
2780
- remove_melted_serifs( pp ); /* make some corrections on pixmap */
2781
- /* list_ins seems to sort in the boxes on the wrong place ??? */
2782
- // if(job->cfg.verbose&32) debug_img("out12",job,4+8);
2783
-
2784
- glue_broken_chars( pp ); /* 2nd glue */
2785
- // if(job->cfg.verbose&32) debug_img("out14",job,4+8);
2786
-
2787
- remove_rest_of_dust( );
2788
- // if(job->cfg.verbose&32) debug_img("out15",job,4+8);
2789
-
2790
- /* better sort after dust is removed (slow for lot of pixels) */
2791
- list_sort(&(job->res.boxlist), sort_box_func);
2792
-
2793
- measure_pitch( job );
2794
-
2795
- if(job->cfg.mode&64) find_same_chars( pp );
2796
- progress(30,pc); /* progress is only estimated */
2797
- // if(job->cfg.verbose&32) debug_img("out16",job,4+8);
2798
-
2799
- char_recognition( pp, job->cfg.mode);
2800
- progress(60,pc); /* progress is only estimated */
2801
- // if(job->cfg.verbose&32) debug_img("out17",job,4+8);
2802
-
2803
- if ( adjust_text_lines( pp, job->cfg.mode ) ) { /* correct using chars */
2804
- /* may be, characters/pictures have changed line number */
2805
- list_sort(&(job->res.boxlist), sort_box_func);
2806
- // 2nd recognition call if lines are adjusted
2807
- char_recognition( pp, job->cfg.mode);
2808
- }
2809
-
2810
- #define BlownUpDrawing 1 /* german: Explosionszeichnung, temporarly */
2811
- #if BlownUpDrawing == 1 /* german: Explosionszeichnung */
2812
- { /* just for debugging */
2813
- int i,ii,ni; struct box *box2;
2814
- i=ii=ni=0;
2815
- for_each_data(&(JOB->res.boxlist)) { /* count boxes */
2816
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
2817
- if (box2->c==UNKNOWN) i++;
2818
- if (box2->c==PICTURE) ii++;
2819
- ni++;
2820
- } end_for_each(&(JOB->res.boxlist));
2821
- if (JOB->cfg.verbose)
2822
- fprintf(stderr,"# debug: unknown= %d picts= %d boxes= %d\n",i,ii,ni);
2823
- }
2824
- #endif
2825
- // ----------- write out20.pgm ----------- mark lines + boxes
2826
- if (job->cfg.verbose&32) debug_img("out20",job,1+4+8);
2827
-
2828
- compare_unknown_with_known_chars( pp, job->cfg.mode);
2829
- progress(70,pc); /* progress is only estimated */
2830
-
2831
- try_to_divide_boxes( pp, job->cfg.mode);
2832
- progress(80,pc); /* progress is only estimated */
2833
-
2834
- /* --- list output ---- for debugging --- */
2835
- if (job->cfg.verbose&6) output_list(job);
2836
-
2837
- /* ---- insert spaces ---- */
2838
- list_insert_spaces( pp , job );
2839
-
2840
- // ---- proof difficult chars Il1 by context view ----
2841
- if (JOB->cfg.verbose)
2842
- fprintf(stderr,"# context correction if !(mode&32)\n");
2843
- if (!(job->cfg.mode&32)) context_correction( job );
2844
-
2845
- store_boxtree_lines( job->cfg.mode );
2846
- progress(90,pc); /* progress is only estimated */
2847
-
2848
- /* 0050002.pgm.gz ca. 109 digits, only 50 recognized (only in lines?)
2849
- * ./gocr -v 39 -m 56 -e - -m 4 -C 0-9 -f XML tmp0406/0050002.pbm.gz
2850
- * awk 'BEGIN{num=0}/1<\/box>/{num++;}END{print num}' o
2851
- * 15*0 24*1 18*2 19*3 15*4 6*5 6*6 6*7 4*8 8*9 sum=125digits counted boxes
2852
- * 9*0 19*1 14*2 15*3 11*4 6*5 5*6 6*7 4*8 8*9 sum=97digits recognized
2853
- * 1*1 1*7 not recognized (Oct04)
2854
- * 33*SPC 76*NL = 109 spaces + 36*unknown sum=241 * 16 missed
2855
- */
2856
- #if BlownUpDrawing == 1 /* german: Explosionszeichnung */
2857
- { /* just for debugging */
2858
- int i,ii,ni; struct box *box2; const char *testc="0123456789ABCDEFGHIJK";
2859
- i=ii=ni=0;
2860
- for_each_data(&(JOB->res.boxlist)) { /* count boxes */
2861
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
2862
- if (box2->c==UNKNOWN) i++;
2863
- if (box2->c==PICTURE) ii++;
2864
- if (box2->c>' ' && box2->c<='z') ni++;
2865
- } end_for_each(&(JOB->res.boxlist));
2866
- if(JOB->cfg.verbose)
2867
- fprintf(stderr,"# debug: (_)= %d picts= %d chars= %d",i,ii,ni);
2868
- for (i=0;i<20;i++) {
2869
- ni=0;
2870
- for_each_data(&(JOB->res.boxlist)) { /* count boxes */
2871
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
2872
- if (box2->c==testc[i]) ni++;
2873
- } end_for_each(&(JOB->res.boxlist));
2874
- if(JOB->cfg.verbose && ni>0)
2875
- fprintf(stderr," (%c)=%d",testc[i],ni);
2876
- }
2877
- if(JOB->cfg.verbose)
2878
- fprintf(stderr,"\n");
2879
- }
2880
- #endif
2881
-
2882
- // ---- frame-size-histogram
2883
- // ---- (my own defined) distance between letters
2884
- // ---- write internal picture of textsite
2885
- // ----------- write out30.pgm -----------
2886
- if( job->cfg.verbose&32 ) debug_img("out30",job,2+4);
2887
-
2888
- progress(100,pc); /* progress is only estimated */
2889
-
2890
- close_progress(pc);
2891
-
2892
- return 0; /* what should I return? error-state? num-of-chars? */
2893
- }