gocr-ruby 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +21 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +49 -0
  8. data/ext/gocr/Makefile +141 -0
  9. data/ext/gocr/Makefile.in +140 -0
  10. data/ext/gocr/amiga.h +31 -0
  11. data/ext/gocr/barcode.c +2108 -0
  12. data/ext/gocr/barcode.h +11 -0
  13. data/ext/gocr/box.c +496 -0
  14. data/ext/gocr/config.h +37 -0
  15. data/ext/gocr/config.h.in +36 -0
  16. data/ext/gocr/database.c +468 -0
  17. data/ext/gocr/detect.c +1003 -0
  18. data/ext/gocr/extconf.rb +6 -0
  19. data/ext/gocr/gocr.c +436 -0
  20. data/ext/gocr/gocr.h +290 -0
  21. data/ext/gocr/jconv.c +168 -0
  22. data/ext/gocr/job.c +92 -0
  23. data/ext/gocr/lines.c +364 -0
  24. data/ext/gocr/list.c +334 -0
  25. data/ext/gocr/list.h +91 -0
  26. data/ext/gocr/ocr0.c +7312 -0
  27. data/ext/gocr/ocr0.h +63 -0
  28. data/ext/gocr/ocr0n.c +1527 -0
  29. data/ext/gocr/ocr1.c +85 -0
  30. data/ext/gocr/ocr1.h +3 -0
  31. data/ext/gocr/otsu.c +310 -0
  32. data/ext/gocr/otsu.h +23 -0
  33. data/ext/gocr/output.c +291 -0
  34. data/ext/gocr/output.h +37 -0
  35. data/ext/gocr/pcx.c +153 -0
  36. data/ext/gocr/pcx.h +9 -0
  37. data/ext/gocr/pgm2asc.c +3259 -0
  38. data/ext/gocr/pgm2asc.h +105 -0
  39. data/ext/gocr/pixel.c +538 -0
  40. data/ext/gocr/pnm.c +538 -0
  41. data/ext/gocr/pnm.h +35 -0
  42. data/ext/gocr/progress.c +87 -0
  43. data/ext/gocr/progress.h +42 -0
  44. data/ext/gocr/remove.c +715 -0
  45. data/ext/gocr/tga.c +87 -0
  46. data/ext/gocr/tga.h +6 -0
  47. data/ext/gocr/unicode.c +1318 -0
  48. data/ext/gocr/unicode.h +62 -0
  49. data/ext/gocr/unicode_defs.h +1245 -0
  50. data/ext/gocr/version.h +2 -0
  51. data/gocr-ruby.gemspec +28 -0
  52. data/image.png +0 -0
  53. data/lib/gocr.rb +6 -0
  54. data/lib/gocr/image.rb +8 -0
  55. data/lib/gocr/version.rb +3 -0
  56. metadata +156 -0
@@ -0,0 +1,85 @@
1
+ // test routines - faster to compile
2
+ #include <stdlib.h>
3
+ #include <stdio.h>
4
+ #include "pgm2asc.h"
5
+ #include "unicode.h"
6
+ #include "amiga.h"
7
+ #include "gocr.h"
8
+
9
+ // for learn_mode/analyze_mode high, with, yoffset, num of pattern_i,
10
+ // - holes (center,radius in relative coordinates) etc. => cluster analyze
11
+ // num_hole => min-volume, tolerance border
12
+ // pattern: @@ @. @@
13
+ // .@ @. ..
14
+ // regular filter for large resolutions to make edges more smooth (on boxes)
15
+ // extra-filter (only if not recognized?)
16
+ // map + same color to (#==change)
17
+ // - anti color
18
+ // . not used
19
+ // strongest neighbour pixels (3x3) => directions
20
+ // second/third run with more and more tolerance!?
21
+
22
+ /* FIXME jb: following is unused */
23
+ #if 0
24
+ struct lobj { // line-object (for fitting to near lines)
25
+ int x0,y0; // starting point (left up)
26
+ int x1,y1; // end point (right down)
27
+ int mt; // minimum thickness
28
+ int q; // quality, overlapp
29
+ };
30
+
31
+ /* FIXME jb global */
32
+ struct lobj obj1;
33
+ #endif
34
+
35
+ // that is the first draft of feature extraction
36
+ // detect main lines and bows
37
+ // seems bad implemented, looking for better algorithms (ToDo: use autotrace)
38
+ #define MAXL 10
39
+ void ocr2(pix *b,int cs){
40
+ int x1,y1,x2,y2,l,i,j,xa[MAXL],ya[MAXL],xb[MAXL],yb[MAXL],ll[MAXL];
41
+ for(i=0;i<MAXL;i++)xa[i]=ya[i]=xb[i]=yb[i]=ll[i]=0;
42
+ for(x1=0;x1<b->x;x1++) // very slowly, but simple to program
43
+ for(y1=0;y1<b->y;y1++) // brute force
44
+ for(x2=0;x2<b->x;x2++)
45
+ for(y2=y1+1;y2<b->y;y2++)
46
+ {
47
+ if( get_line2(x1,y1,x2,y2,b,cs,100)>99 )
48
+ { // line ???
49
+ l=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1); // len
50
+ for(i=0;i<MAXL;i++)
51
+ { // remove similar lines (same middle point) IMPROVE IT !!!!!! ???
52
+ if(
53
+ abs(x1+x2-xa[i]-xb[i])<1+b->x/2
54
+ && abs(y1+y2-ya[i]-yb[i])<1+b->y/2
55
+ && abs(y1-ya[i])<1+b->y/4
56
+ && abs(x1-xa[i])<1+b->x/4
57
+ )
58
+ {
59
+ if( l>ll[i] )
60
+ {
61
+ for(j=i;j<MAXL-1;j++)
62
+ { // shift table
63
+ xa[j]=xa[j+1];ya[j]=ya[j+1];
64
+ xb[j]=xb[j+1];yb[j]=yb[j+1];ll[j]=ll[j+1];
65
+ }
66
+ ll[MAXL-1]=0;
67
+ }
68
+ else break; // forget it if shorter
69
+ }
70
+ if( l>ll[i] ){ // insert if larger
71
+ for(j=MAXL-1;j>i;j--){ // shift table
72
+ xa[j]=xa[j-1];ya[j]=ya[j-1];
73
+ xb[j]=xb[j-1];yb[j]=yb[j-1];ll[j]=ll[j-1];
74
+ }
75
+ xa[i]=x1;ya[i]=y1;xb[i]=x2;yb[i]=y2;ll[i]=l;
76
+ break;
77
+ }
78
+ }
79
+ }
80
+ }
81
+ for(i=0;i<MAXL;i++){
82
+ printf(" %2d %2d %2d %2d %3d\n",xa[i],ya[i],xb[i],yb[i],ll[i]);
83
+ }
84
+ }
85
+
@@ -0,0 +1,3 @@
1
+ /* #include "pgm2asc.h" */
2
+ #include "pnm.h"
3
+ /* wchar_t ocr1(struct box *box1, pix *b, int cs); */
@@ -0,0 +1,310 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2010 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+
21
+ the following code was send by Ryan Dibble <dibbler@umich.edu>
22
+
23
+ The algorithm is very simple but works good hopefully.
24
+
25
+ Compare the grayscale histogram with a mass density diagram:
26
+ I think the algorithm is a kind of
27
+ divide a body into two parts in a way that the mass
28
+ centers have the largest distance from each other,
29
+ the function is weighted in a way that same masses have a advantage
30
+
31
+ - otsu algorithm is failing on diskrete multi color images
32
+
33
+ TODO:
34
+ RGB: do the same with all colors (CMYG?) seperately
35
+
36
+ test: hardest case = two colors
37
+ bbg: test done, using a two color gray file. Output:
38
+ # threshold: Value = 43 gmin=43 gmax=188
39
+
40
+ my changes:
41
+ - float -> double
42
+ - debug option added (vvv & 1..2)
43
+ - **image => *image, &image[i][1] => &image[i*cols+1]
44
+ - do only count pixels near contrast regions
45
+ this makes otsu much better for shadowed fonts or multi colored text
46
+ on white background
47
+
48
+ (m) Joerg Schulenburg (see README for email address)
49
+
50
+ ToDo:
51
+ - measure contrast
52
+ - detect low-contrast regions
53
+
54
+ */
55
+
56
+ #include <stdio.h>
57
+ #include <string.h>
58
+
59
+ #define Abs(x) ((x<0)?-(x):x)
60
+
61
+ /*======================================================================*
62
+ * global thresholding routine *
63
+ * takes a 2D unsigned char array pointer, number of rows, and *
64
+ * number of cols in the array. returns the value of the threshold *
65
+ * x0,y0,x0+dx,y0+dy are the edgepoints of the interesting region *
66
+ * vvv is the verbosity for debugging purpose *
67
+ *======================================================================*/
68
+ int
69
+ otsu (unsigned char *image, int rows, int cols,
70
+ int x0, int y0, int dx, int dy, int vvv) {
71
+
72
+ unsigned char *np; // pointer to position in the image we are working with
73
+ unsigned char op1, op2, op3, op4; // predecessor of pixel *np (start value)
74
+ int maxc=0; // maximum contrast (start value)
75
+ int thresholdValue=1; // value we will threshold at
76
+ int ihist[256]; // image histogram
77
+ int chist[256]; // contrast histogram
78
+
79
+ int i, j, k; // various counters
80
+ int is, i1, i2, ns, n1, n2, gmin, gmax;
81
+ double m1, m2, sum, csum, fmax, sb;
82
+
83
+ // zero out histogram ...
84
+ memset(ihist, 0, sizeof(ihist));
85
+ memset(chist, 0, sizeof(chist));
86
+ op4=op3=op1=op2=image[y0*cols+x0];
87
+
88
+ gmin=255; gmax=0; k=dy/512+1;
89
+ // v0.43 first get max contrast, dont do it together with next step
90
+ // because it failes if we have pattern as background (on top)
91
+ for (i = 0; i < dy ; i+=k) {
92
+ np = &image[(y0+i)*cols+x0];
93
+ for (j = 0; j < dx ; j++) {
94
+ ihist[*np]++;
95
+ if (*np > gmax) gmax=*np;
96
+ if (*np < gmin) gmin=*np;
97
+ if (Abs(*np-op1)>maxc) maxc=Abs(*np-op1); /* new maximum contrast */
98
+ if (Abs(*np-op2)>maxc) maxc=Abs(*np-op2); /* new maximum contrast */
99
+ // 2010-10-07 add op3+op4 for invalid_ogv.jpg
100
+ if (Abs(*np-op3)>maxc) maxc=Abs(*np-op3); /* new maximum contrast */
101
+ if (Abs(*np-op4)>maxc) maxc=Abs(*np-op4); /* new maximum contrast */
102
+ /* we hope that maxc will be find its maximum very fast */
103
+ op4=op3;op3=op2;op2=op1; /* shift old pixel to next older */
104
+ op1=*np; /* store old pixel for contrast check */
105
+ np++; /* next pixel */
106
+ }
107
+ }
108
+ if ((vvv&1)) // Debug
109
+ fprintf(stderr,"# threshold: max_contrast= %d\n", maxc);
110
+
111
+ // generate the histogram
112
+ // Aug06 images with large white or black homogeneous
113
+ // areas give bad results, so we only add pixels on contrast edges
114
+ op4=op3=op1=op2=image[y0*cols+x0];
115
+ for (i = 0; i < dy ; i+=k) {
116
+ np = &image[(y0+i)*cols+x0];
117
+ for (j = 0; j < dx ; j++) {
118
+ if (Abs(*np-op1)>=1*maxc/4 // min_d_max_contrast=1
119
+ || Abs(*np-op2)>=1*maxc/4 // min_d_max_contrast=2
120
+ || Abs(*np-op3)>=1*maxc/4
121
+ || Abs(*np-op4)>=1*maxc/4
122
+ // || Abs(*np-gmax)<=1*maxc/4
123
+ // || Abs(*np-gmin)<=1*maxc/4
124
+ )
125
+ chist[*np]++; // count only relevant pixels
126
+ op4=op3;op3=op2;op2=op1; /* shift old pixel to next older */
127
+ op1=*np; /* store old pixel for contrast check */
128
+ np++; /* next pixel */
129
+ }
130
+ }
131
+
132
+ // set up everything
133
+ sum = csum = 0.0;
134
+ ns = 0;
135
+ is = 0;
136
+
137
+ for (k = 0; k <= 255; k++) {
138
+ sum += (double) k * (double) chist[k]; /* x*f(x) cmass moment */
139
+ ns += chist[k]; /* f(x) cmass */
140
+ is += ihist[k]; /* f(x) imass */
141
+ // Debug: output to out_hist.dat?
142
+ // fprintf(stderr,"\chistogram %3d %6d (brightness weight)", k, ihist[k]);
143
+ }
144
+
145
+ if (!ns) {
146
+ // if n has no value we have problems...
147
+ fprintf (stderr, "NOT NORMAL, thresholdValue = 160\n");
148
+ return (160);
149
+ }
150
+
151
+ // ToDo: search max contrast in 2 near(r=8) pixels, take as mass centers + dist
152
+ // expand b/w-mass ranges by further contrast dipols
153
+ // min_max_b_b_distance + max_b_b distance
154
+ // use a range white=... black=... for between values due local thresholding
155
+ // 24x24 field (cashed) where 8x8 center is thresholded
156
+ // ToDo: only care about extremas in a 3 pixel environment
157
+ // check if there are more than 2 mass centers (more colors)
158
+ // return object colors and color radius instead of threshold value
159
+ // also the reagion, where colored objects are found
160
+ // what if more than one background color? no otsu at all?
161
+ // whats background? box with lot of other boxes in it
162
+ // threshold each box (examples/invers.png,colors.png)
163
+ // get maximum white and minimum black pixel color (possible range)
164
+ // check range between them for low..high contrast ???
165
+ // typical scenes (which must be covered):
166
+ // - white page with text of different colors (gray values)
167
+ // - binear page: background (gray=1) + black text (gray=0)
168
+ // - text mixed with big (dark) images
169
+ // ToDo: recursive clustering for maximum multipol moments?
170
+ // idea: normalize ihist to max=1024 before otsu?
171
+
172
+ // do the otsu global thresholding method
173
+
174
+ if ((vvv&1)) // Debug
175
+ fprintf(stderr,"# threshold: value ihist chist mass_dipol_moment trigg\n");
176
+ fmax = -1.0;
177
+ n1 = 0;
178
+ for (k = 0; k < 255; k++) {
179
+ n1 += chist[k]; // left mass (integration)
180
+ if (!n1) continue; // we need at least one foreground pixel
181
+ n2 = ns - n1; // right mass (num pixels - left mass)
182
+ if (n2 == 0) break; // we need at least one background pixel
183
+ csum += (double) k *chist[k]; // left mass moment
184
+ m1 = csum / n1; // left mass center (black chars)
185
+ m2 = (sum - csum) / n2; // right mass center (white background)
186
+ // max. dipol moment?
187
+ // orig: sb = (double) n1 *(double) n2 * (m1 - m2) * (m1 - m2);
188
+ sb = (double) n1 *(double) n2 * (m2 - m1); // seems to be better Aug06
189
+ /* bbg: note: can be optimized. */
190
+ if (sb > fmax) {
191
+ fmax = sb;
192
+ thresholdValue = k + 1;
193
+ // thresholdValue = (m1 + 3 * m2) / 4;
194
+ }
195
+ if ((vvv&1) && ihist[k]) // Debug
196
+ fprintf(stderr,"# threshold: %3d %6d %6d %8.3f %4d\n",
197
+ k, ihist[k], chist[k],
198
+ sb*256/(dx*dy)/(dx*dy), /* normalized dipol moment */
199
+ thresholdValue);
200
+ }
201
+ // ToDo: error = left/right point where sb is 90% of maximum?
202
+ // now we count all pixels for background detection
203
+ i1 = 0;
204
+ for (k = 0; k < thresholdValue; k++) {
205
+ i1 += ihist[k]; // left mass (integration)
206
+ }
207
+ i2 = is - i1; // right mass (num pixels - left mass)
208
+
209
+ // at this point we have our thresholding value
210
+ // black_char: value<cs, white_background: value>=cs
211
+
212
+ // can it happen? check for sureness
213
+ if (thresholdValue > gmax) {
214
+ fprintf(stderr,"# threshold: Value >gmax\n");
215
+ thresholdValue = gmax;
216
+ }
217
+ if (thresholdValue <= gmin) {
218
+ fprintf(stderr,"# threshold: Value<=gmin\n");
219
+ thresholdValue = gmin+1;
220
+ }
221
+
222
+ // debug code to display thresholding values
223
+ if ( vvv & 1 )
224
+ fprintf(stderr,"# threshold: Value = %d gmin=%d gmax=%d cmax=%d"
225
+ " b/w= %d %d\n",
226
+ thresholdValue, gmin, gmax, maxc, i1, i2);
227
+
228
+ // this is a primitive criteria for inversion and should be improved
229
+ // old: i1 >= 4*i2, but 0811qemu1.png has a bit above 1/4
230
+ if (2*i1 > 7*i2) { // more black than white, obviously black is background
231
+ if ( vvv & 1 )
232
+ fprintf(stderr,"# threshold: invert the image\n");
233
+ // we do inversion here (no data lost)
234
+ for (i = 0; i < dy ; i++) {
235
+ np = &image[(y0+i)*cols+x0];
236
+ for (j = 0; j < dx ; j++) {
237
+ *np=255-*np;
238
+ np++; /* next pixel */
239
+ }
240
+ }
241
+ thresholdValue=255-thresholdValue+1;
242
+ }
243
+
244
+ return(thresholdValue);
245
+ /* range: 0 < thresholdValue <= 255, example: 1 on b/w images */
246
+ /* 0..threshold-1 is foreground */
247
+ /* threshold..255 is background */
248
+ /* ToDo: min=blackmasscenter/2,thresh,max=(whitemasscenter+255)/2 */
249
+ }
250
+
251
+ /*======================================================================*/
252
+ /* thresholding the image (set threshold to 128+32=160=0xA0) */
253
+ /* now we have a fixed thresholdValue good to recognize on gray image */
254
+ /* - so lower bits can used for other things (bad design?) */
255
+ /* ToDo: different foreground colors, gray on black/white background */
256
+ /*======================================================================*/
257
+ int
258
+ thresholding (unsigned char *image, int rows, int cols,
259
+ int x0, int y0, int dx, int dy, int thresholdValue) {
260
+
261
+ unsigned char *np; // pointer to position in the image we are working with
262
+
263
+ int i, j; // various counters
264
+ int gmin=255,gmax=0;
265
+ int nmin=255,nmax=0;
266
+
267
+ // Aug10: i=y0+1, why 1 pixel frame? bug if v[y0] > gmax, fixed
268
+ // calculate min/max (twice?)
269
+ for (i = y0 /*+ 1*/; i < y0 + dy /*- 1*/; i++) {
270
+ np = &image[i*cols+x0+1];
271
+ for (j = x0 /*+ 1*/; j < x0 + dx /*- 1*/; j++) {
272
+ if(*np > gmax) gmax=*np;
273
+ if(*np < gmin) gmin=*np;
274
+ np++; /* next pixel */
275
+ }
276
+ }
277
+
278
+ /* allowed_threshold=gmin+1..gmax v0.43 */
279
+ if (thresholdValue<=gmin || thresholdValue>gmax){
280
+ thresholdValue=(gmin+gmax+1)/2; /* range=0..1 -> threshold=1 */
281
+ fprintf(stderr,"# thresholdValue out of range %d..%d, reset to %d\n",
282
+ gmin, gmax, thresholdValue);
283
+ }
284
+
285
+ /* b/w: min=0,tresh=1,max=1 v0.43 */
286
+ // actually performs the thresholding of the image...
287
+ // later: grayvalues should also be used, only rescaling threshold=160=0xA0
288
+ // sometimes images have no contrast (thresholdValue == gmin)
289
+ for (i = y0; i < y0+dy; i++) {
290
+ np = &image[i*cols+x0];
291
+ for (j = x0; j < x0+dx; j++) {
292
+ if ( *np > gmax || *np <gmin) // should never happen
293
+ fprintf(stderr,"ERROR th yx= %2d %2d t= %d v= %3d min/max= %d %d\n",
294
+ i,j,thresholdValue,*np,gmin,gmax);
295
+ *np = (unsigned char) (*np >= thresholdValue || thresholdValue == gmin ?
296
+ (255-(gmax - *np)* 80/(gmax - thresholdValue + 1)) :
297
+ ( 0+(*np - gmin)*150/(thresholdValue - gmin )) );
298
+ if(*np > nmax) nmax=*np;
299
+ if(*np < nmin) nmin=*np;
300
+ np++;
301
+ }
302
+ }
303
+
304
+ // fprintf(stderr,"# thresholding: nmin=%d nmax=%d\n", nmin, nmax);
305
+
306
+ return(128+32); // return the new normalized threshold value
307
+ /* 0..159 is foreground */
308
+ /* 160..255 is background */
309
+ }
310
+
@@ -0,0 +1,23 @@
1
+ /*
2
+
3
+ see README for EMAIL-address
4
+
5
+ */
6
+
7
+
8
+ /*======================================================================*/
9
+ /* OTSU global thresholding routine */
10
+ /* takes a 2D unsigned char array pointer, number of rows, and */
11
+ /* number of cols in the array. returns the value of the threshold */
12
+ /*======================================================================*/
13
+ int
14
+ otsu (unsigned char *image, int rows, int cols, int x0, int y0, int dx, int dy, int vvv);
15
+
16
+
17
+ /*======================================================================*/
18
+ /* thresholding the image (set threshold to 128+32=160=0xA0) */
19
+ /* now we have a fixed thresholdValue good to recognize on gray image */
20
+ /* - so lower bits can used for other things (bad design?) */
21
+ /*======================================================================*/
22
+ int
23
+ thresholding (unsigned char *image, int rows, int cols, int x0, int y0, int dx, int dy, int thresholdValue);
@@ -0,0 +1,291 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2010 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL address
20
+ */
21
+
22
+ #include <string.h>
23
+ #include "unicode.h"
24
+ #include "output.h"
25
+ #include "pcx.h"
26
+ #include "gocr.h" /* extern job_t OCR_JOB; */
27
+ #include "unicode_defs.h" /* UNKNOWN + PICTURE */
28
+
29
+ /* function is only for debugging and for developing
30
+ it prints out a part of pixmap b at point x0,y0 to stderr
31
+ using dots .,; if no pixel, and @xoO for pixels
32
+ modify n_run and print out what would happen on 2nd, 3th loop!
33
+ new: output original and copied pixmap in the same figure
34
+ */
35
+ void out_b(struct box *px, pix *b, int x0, int y0, int dx, int dy, int cs ){
36
+ int x,y,x2,y2,yy0,tx,ty,n1,i;
37
+ char c1, c2;
38
+ yy0=y0;
39
+ if (px) { /* overwrite rest of arguments */
40
+ if (!b) {
41
+ b=px->p;
42
+ x0=px->x0; dx=px->x1-px->x0+1;
43
+ y0=px->y0; dy=px->y1-px->y0+1; yy0=y0;
44
+ }
45
+ if(cs==0) cs=OCR_JOB->cfg.cs;
46
+ fprintf(stderr,"\n# list box x= %4d %4d d= %3d %3d r= %3d %3d"
47
+ " nrun=%d p=%p", /* ToDo: r,nrun is obsolete */
48
+ px->x0, px->y0, px->x1 - px->x0 + 1, px->y1 - px->y0 + 1,
49
+ px->x - px->x0, px->y - px->y0, OCR_JOB->tmp.n_run, (void*)px);
50
+ fprintf(stderr,"\n# dots=%d boxes=%d subboxes=%d c=%s mod=%s"
51
+ " line=%d m= %d %d %d %d",
52
+ px->dots, px->num_boxes, px->num_subboxes,
53
+ decode(px->c,ASCII), decode(px->modifier,ASCII), px->line,
54
+ px->m1 - px->y0, px->m2 - px->y0, px->m3 - px->y0, px->m4 - px->y0);
55
+ if (px->num_frames) {
56
+ int i,j,jo;
57
+ fprintf(stderr,"\n# frames= %d (sumvects=%d)",px->num_frames,
58
+ ((px->num_frames)?px->num_frame_vectors[px->num_frames-1]:-1));
59
+ // output objects: volume periphery(de:Umfang) num_frame_vectors
60
+ for (jo=j=i=0; i<px->num_frames; i++, jo=j) {
61
+ fprintf(stderr,"\n# frame %d (%+4d,%3d,%2d) ",
62
+ i, px->frame_vol[i], px->frame_per[i],
63
+ px->num_frame_vectors[i]-jo);
64
+ /* print only the first vectors of each frame */
65
+ for (;j<px->num_frame_vectors[i] && j<MaxFrameVectors; j++)
66
+ fprintf(stderr," #%02d %2d %2d", j,
67
+ px->frame_vector[j][0] - px->x0,
68
+ px->frame_vector[j][1] - px->y0);
69
+ }
70
+ }
71
+ if (px->num_ac){ /* output table of chars and its probabilities */
72
+ fprintf(stderr,"\n# list box char: ");
73
+ for(i=0;i<px->num_ac && i<NumAlt;i++)
74
+ /* output the (xml-)string (picture position, barcodes, glyphs, ...) */
75
+ if (px->tas[i])
76
+ fprintf(stderr," %s(%d)", px->tas[i] ,px->wac[i]);
77
+ else
78
+ fprintf(stderr," %s(%d)",decode(px->tac[i],ASCII),px->wac[i]);
79
+ }
80
+ fprintf(stderr,"\n");
81
+ if (px->m2 && px->m1<y0 && (px->dots || y0>px->m2)) {
82
+ yy0=px->m1; dy=px->y1-yy0+1;
83
+ }
84
+ }
85
+ tx=dx/80+1;
86
+ ty=dy/40+1; /* step, usually 1, but greater on large maps */
87
+ fprintf(stderr,"# list pattern x= %4d %4d d= %3d %3d t= %d %d yy0= %d\n",
88
+ x0,y0,dx,dy,tx,ty,yy0);
89
+ if (dx>0)
90
+ for(y=yy0;y<yy0+dy;y+=ty) { /* reduce the output to max 78x40 */
91
+ /* first image is the copied and modified bitmap of the box */
92
+ if (px)
93
+ for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
94
+ n1=0; c1='.';
95
+ for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
96
+ for(x2=x;x2<x+tx && x2<x0+dx;x2++)
97
+ {
98
+ if((getpixel(px->p,x2-x0+px->x0,
99
+ y2-y0+px->y0)<cs)) c1='@';
100
+ }
101
+ if (px->num_frames) { /* mark vectors */
102
+ int i;
103
+ if (c1!='$' && c1!='S') /* dont mark twice */
104
+ for (i=0;i<px->num_frame_vectors[px->num_frames-1];i++)
105
+ if ((px->frame_vector[i][0]-px->x0)/tx==(x-x0)/tx
106
+ && (px->frame_vector[i][1]-px->y0)/ty==(y-y0)/ty)
107
+ { c1=((c1=='@')?'$':'S'); break; }
108
+ }
109
+ fprintf(stderr,"%c", c1 );
110
+ }
111
+
112
+ /* 2nd image is the boxframe in the original bitmap */
113
+ if (dx<40) fprintf(stderr," ");
114
+ if (dx<40) /* do it only, if we have enough place */
115
+ for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
116
+ c1='.';
117
+ for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
118
+ for(x2=x;x2<x+tx && x2<x0+dx;x2++)
119
+ { if((getpixel(b,x2,y2)<cs)) c1='@'; }
120
+ fprintf(stderr,"%c", c1 );
121
+ }
122
+
123
+ c1=c2=' ';
124
+ /* mark lines with < */
125
+ if (px) if (y-y0+px->y0==px->m1 || y-y0+px->y0==px->m2
126
+ || y-y0+px->y0==px->m3 || y-y0+px->y0==px->m4) c1='<';
127
+ if (y==y0 || y==yy0+dy-1) c2='-'; /* boxmarks */
128
+
129
+ fprintf(stderr,"%c%c\n",c1,c2);
130
+ }
131
+ }
132
+
133
+ /* same as out_b, but for faster use, only a box as argument
134
+ */
135
+ void out_x(struct box *px) {
136
+ out_b(px,NULL,0, 0, 0, 0, OCR_JOB->cfg.cs);
137
+ }
138
+
139
+
140
+ /* print out two boxes side by side, for debugging comparision algos */
141
+ void out_x2(struct box *box1, struct box *box2){
142
+ int x,y,i,tx,ty,dy;
143
+ /*FIXME jb static*/static char *c1="OXXXXxx@.,,,,,,,";
144
+ pix *b=&OCR_JOB->src.p;
145
+ dy=(box1->y1-box1->y0+1);
146
+ if(dy<box2->y1-box2->y0+1)dy=box2->y1-box2->y0+1;
147
+ tx=(box1->x1-box1->x0)/40+1;
148
+ ty=(box1->y1-box1->y0)/40+1; /* step, usually 1, but greater on large maps */
149
+ if(box2)fprintf(stderr,"\n# list 2 patterns");
150
+ for(i=0;i<dy;i+=ty) { /* reduce the output to max 78x40??? */
151
+ fprintf(stderr,"\n"); y=box1->y0+i;
152
+ for(x=box1->x0;x<=box1->x1;x+=tx)
153
+ fprintf(stderr,"%c", c1[ ((getpixel(b,x,y)<OCR_JOB->cfg.cs)?0:8)+marked(b,x,y) ] );
154
+ if(!box2) continue;
155
+ fprintf(stderr," "); y=box2->y0+i;
156
+ for(x=box2->x0;x<=box2->x1;x+=tx)
157
+ fprintf(stderr,"%c", c1[ ((getpixel(b,x,y)<OCR_JOB->cfg.cs)?0:8)+marked(b,x,y) ] );
158
+ }
159
+ }
160
+
161
+
162
+ /* ---- list output ---- for debugging ---
163
+ * list all boxes where the results can be found within the c-option
164
+ */
165
+ int output_list(job_t *job) {
166
+ int i = 0, j;
167
+ struct box *box2;
168
+ pix *pp = &job->src.p;
169
+ char *lc = job->cfg.lc;
170
+
171
+ fprintf(stderr,"\n# list shape for charlist %s",lc);
172
+ for_each_data(&(job->res.boxlist)) {
173
+ box2 = (struct box *) list_get_current(&(job->res.boxlist));
174
+ for (j=0; j<box2->num_ac; j++)
175
+ if (!lc || (box2->tac[j] && strchr(lc, box2->tac[j]))
176
+ || (box2->tas[j] && strstr(lc, box2->tas[j]))) break;
177
+ if (j<box2->num_ac)
178
+ fprintf(stderr,"\n# box found in charlist");
179
+ if (!lc || (strchr(lc, box2->c) && box2->c < 256 && box2->c)
180
+ || (strchr(lc, '_') && box2->c==UNKNOWN) /* for compability */
181
+ || j<box2->num_ac ){ /* also list alternative chars */
182
+ if (!pp) pp=box2->p;
183
+ fprintf(stderr,
184
+ "\n# list shape %3d x=%4d %4d d= %3d %3d vf=%d ac=%d %04x %s",
185
+ i, box2->x0, box2->y0,
186
+ box2->x1 - box2->x0 + 1,
187
+ box2->y1 - box2->y0 + 1,
188
+ box2->num_frames, box2->num_ac,
189
+ (int)box2->c, /* wchar_t -> char ???? */
190
+ decode(box2->c,ASCII) );
191
+ if (job->cfg.verbose & 4) out_x(box2);
192
+ }
193
+ i++;
194
+ } end_for_each(&(job->res.boxlist));
195
+ fprintf(stderr,"\n");
196
+ return 0;
197
+ }
198
+
199
+ /* --- output of image incl. corored lines usefull for developers ---
200
+ * debugging
201
+ * bit 0+1 is used for color coding (optical marker)
202
+ * color/gray: 0x01=red, 0x02=blue, 0x04=green???
203
+ * opt: 1 - mark unknown boxes red (first pass)
204
+ * 2 - mark unknown boxes more red (final pass)
205
+ * 4 - mark lines blue
206
+ * 8 - reset coloring (remove old marker)
207
+ */
208
+ int debug_img(char *fname, struct job_s *job, int opt) {
209
+ struct box *box2;
210
+ int x, y, ic, dx, i, j, col;
211
+ unsigned char *np;
212
+ pix *pp = &job->tmp.ppo;
213
+
214
+ if ( opt & 8 ) { /* clear debug bits in image */
215
+ for(y=0;y<pp->y;y++) {
216
+ np=&pp->p[(pp->x)*y];
217
+ for(x=0;x<pp->x;x++) {
218
+ *np = *np & 0xF1;
219
+ np++;
220
+ }
221
+ }
222
+ }
223
+
224
+ /* mark longest line which was used to estimate the rotation angle */
225
+ if ((job->cfg.verbose&32) && job->res.lines.dx)
226
+ for(i=0;i<pp->x;i++) {
227
+ y=pp->y/2;
228
+ if (job->res.lines.dx) y+=job->res.lines.dy*i/job->res.lines.dx;
229
+ x=i;
230
+ if (x<0 || x>=pp->x || y<0 || y>=pp->y) continue;
231
+ np=&pp->p[x + (pp->x)*y];
232
+ if (*np<160) continue;
233
+ if((x&7)<5 && !(x&1)) /* dotted line */
234
+ put(pp,x,y,255,8);
235
+ }
236
+
237
+ ic = ((opt & 2) ? 1 : 2); /* obsolete */
238
+ for_each_data(&(job->res.boxlist)) {
239
+ box2 = (struct box *) list_get_current(&(job->res.boxlist));
240
+ /* mark boxes in 32=0x40=blue */
241
+ if (box2->c == ' ' || box2->c == '\n') continue;
242
+ /* mark chars with green left and under line */
243
+ col = 4; /* green */
244
+ if (box2->c == UNKNOWN && (opt & 3)) col=2; /* red */
245
+ if (box2->x0>1)
246
+ for (y = box2->y0; y <= box2->y1; y++) {
247
+ np=&pp->p[box2->x0-1 + y * pp->x]; if (*np<160) continue; *np|=col; }
248
+ if (box2->y1+1<pp->y)
249
+ for (x = box2->x0; x <= box2->x1; x++) {
250
+ np=&pp->p[x + (box2->y1+1) * pp->x]; if (*np<160) continue; *np|=col; }
251
+ /* mark pictures by green cross */
252
+ if (box2->c == PICTURE)
253
+ for (x = 0; x < box2->x1-box2->x0+1; x++){
254
+ y=(box2->y1-box2->y0+1)*x/(box2->x1-box2->x0+1);
255
+ pp->p[(box2->x0+x) + (box2->y0+y) * pp->x] |= 4;
256
+ pp->p[(box2->x1-x) + (box2->y0+y) * pp->x] |= 4;
257
+ }
258
+ } end_for_each(&(job->res.boxlist));
259
+
260
+ if( opt & 4 )
261
+ {
262
+ struct tlines *lines = &job->res.lines;
263
+ int yr;
264
+ if (job->cfg.verbose)
265
+ fprintf(stderr, "# mark lines for %s.ppm\n", fname);
266
+ /* or read from outside??? */
267
+ for (i = 0; i < lines->num; i++) { /* mark lines by 0x08 = blue */
268
+ dx = lines->x1[i] - lines->x0[i] + 1;
269
+ for (j = -1; j < dx+1; j++) {
270
+ x = lines->x0[i] + j;
271
+ if (x<0 || x>=pp->x) continue;
272
+ for (y=lines->m1[i];y<=lines->m4[i];y++) {
273
+ /* box arround m2-m3 */
274
+ if (y>=lines->m2[i] && y<=lines->m3[i] && j>-1 && j<dx) continue;
275
+ yr = y; /* y.rotated */
276
+ if (lines->dx) yr += lines->dy * x / (lines->dx);
277
+ if (yr<0 || yr>=pp->y) continue;
278
+ np = &(pp->p[x + (pp->x)*yr]);
279
+ if (*np<160) continue; /* do not touch dark pixels */
280
+ if ((*np&6)!=0) continue; /* only change white pixels */
281
+ put(pp, x, yr, 255, 6); /* UPN: 255 and 6 or */
282
+ }
283
+ }
284
+ }
285
+ }
286
+
287
+ if (job->cfg.verbose&1)
288
+ fprintf(stderr,"# writing %s[.png] xy= %d %d\n", fname, pp->x, pp->y);
289
+ writeppm(fname, pp);
290
+ return 0;
291
+ }