isbn 2.0.4 → 2.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/{README → README.md} +5 -11
- data/Rakefile +20 -14
- data/isbn.gemspec +23 -0
- data/lib/isbn.rb +2 -0
- data/test/isbn_spec.rb +1 -1
- metadata +29 -316
- data/VERSION +0 -1
- data/src/gocr-0.48/.cvsignore +0 -6
- data/src/gocr-0.48/AUTHORS +0 -7
- data/src/gocr-0.48/BUGS +0 -55
- data/src/gocr-0.48/CREDITS +0 -17
- data/src/gocr-0.48/HISTORY +0 -243
- data/src/gocr-0.48/INSTALL +0 -83
- data/src/gocr-0.48/Makefile +0 -193
- data/src/gocr-0.48/Makefile.in +0 -193
- data/src/gocr-0.48/README +0 -165
- data/src/gocr-0.48/READMEde.txt +0 -80
- data/src/gocr-0.48/REMARK.txt +0 -18
- data/src/gocr-0.48/REVIEW +0 -538
- data/src/gocr-0.48/TODO +0 -65
- data/src/gocr-0.48/bin/.cvsignore +0 -2
- data/src/gocr-0.48/bin/create_db +0 -38
- data/src/gocr-0.48/bin/gocr.tcl +0 -527
- data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
- data/src/gocr-0.48/configure +0 -4689
- data/src/gocr-0.48/configure.in +0 -71
- data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
- data/src/gocr-0.48/doc/.cvsignore +0 -2
- data/src/gocr-0.48/doc/Makefile +0 -39
- data/src/gocr-0.48/doc/Makefile.in +0 -39
- data/src/gocr-0.48/doc/example.dtd +0 -53
- data/src/gocr-0.48/doc/example.xml +0 -21
- data/src/gocr-0.48/doc/examples.txt +0 -67
- data/src/gocr-0.48/doc/gocr.html +0 -578
- data/src/gocr-0.48/doc/unicode.txt +0 -57
- data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
- data/src/gocr-0.48/examples/4x6.png +0 -0
- data/src/gocr-0.48/examples/4x6.txt +0 -2
- data/src/gocr-0.48/examples/5x7.png +0 -0
- data/src/gocr-0.48/examples/5x7.png.txt +0 -2
- data/src/gocr-0.48/examples/5x8.png +0 -0
- data/src/gocr-0.48/examples/5x8.png.txt +0 -2
- data/src/gocr-0.48/examples/Makefile +0 -166
- data/src/gocr-0.48/examples/color.fig +0 -20
- data/src/gocr-0.48/examples/ex.fig +0 -16
- data/src/gocr-0.48/examples/font.tex +0 -22
- data/src/gocr-0.48/examples/font1.tex +0 -46
- data/src/gocr-0.48/examples/font2.fig +0 -27
- data/src/gocr-0.48/examples/font_nw.tex +0 -24
- data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
- data/src/gocr-0.48/examples/handwrt1.txt +0 -10
- data/src/gocr-0.48/examples/inverse.fig +0 -20
- data/src/gocr-0.48/examples/matrix.jpg +0 -0
- data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
- data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
- data/src/gocr-0.48/examples/ocr-a.png +0 -0
- data/src/gocr-0.48/examples/ocr-a.txt +0 -6
- data/src/gocr-0.48/examples/ocr-b.png +0 -0
- data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
- data/src/gocr-0.48/examples/polish.tex +0 -28
- data/src/gocr-0.48/examples/rotate45.fig +0 -14
- data/src/gocr-0.48/examples/score +0 -36
- data/src/gocr-0.48/examples/text.tex +0 -28
- data/src/gocr-0.48/gpl.html +0 -537
- data/src/gocr-0.48/include/.cvsignore +0 -2
- data/src/gocr-0.48/include/config.h +0 -36
- data/src/gocr-0.48/include/config.h.in +0 -36
- data/src/gocr-0.48/include/version.h +0 -2
- data/src/gocr-0.48/install-sh +0 -3
- data/src/gocr-0.48/make.bat +0 -57
- data/src/gocr-0.48/man/.cvsignore +0 -2
- data/src/gocr-0.48/man/Makefile +0 -29
- data/src/gocr-0.48/man/Makefile.in +0 -29
- data/src/gocr-0.48/man/man1/gocr.1 +0 -166
- data/src/gocr-0.48/src/.cvsignore +0 -4
- data/src/gocr-0.48/src/Makefile +0 -132
- data/src/gocr-0.48/src/Makefile.in +0 -132
- data/src/gocr-0.48/src/amiga.h +0 -31
- data/src/gocr-0.48/src/barcode.c +0 -846
- data/src/gocr-0.48/src/barcode.c.orig +0 -593
- data/src/gocr-0.48/src/barcode.h +0 -11
- data/src/gocr-0.48/src/box.c +0 -372
- data/src/gocr-0.48/src/database.c +0 -462
- data/src/gocr-0.48/src/detect.c +0 -943
- data/src/gocr-0.48/src/gocr.c +0 -373
- data/src/gocr-0.48/src/gocr.h +0 -288
- data/src/gocr-0.48/src/jconv.c +0 -168
- data/src/gocr-0.48/src/job.c +0 -84
- data/src/gocr-0.48/src/lines.c +0 -350
- data/src/gocr-0.48/src/list.c +0 -334
- data/src/gocr-0.48/src/list.h +0 -90
- data/src/gocr-0.48/src/ocr0.c +0 -6756
- data/src/gocr-0.48/src/ocr0.h +0 -63
- data/src/gocr-0.48/src/ocr0n.c +0 -1475
- data/src/gocr-0.48/src/ocr1.c +0 -85
- data/src/gocr-0.48/src/ocr1.h +0 -3
- data/src/gocr-0.48/src/otsu.c +0 -289
- data/src/gocr-0.48/src/otsu.h +0 -23
- data/src/gocr-0.48/src/output.c +0 -289
- data/src/gocr-0.48/src/output.h +0 -37
- data/src/gocr-0.48/src/pcx.c +0 -153
- data/src/gocr-0.48/src/pcx.h +0 -9
- data/src/gocr-0.48/src/pgm2asc.c +0 -2893
- data/src/gocr-0.48/src/pgm2asc.h +0 -105
- data/src/gocr-0.48/src/pixel.c +0 -537
- data/src/gocr-0.48/src/pnm.c +0 -533
- data/src/gocr-0.48/src/pnm.h +0 -35
- data/src/gocr-0.48/src/progress.c +0 -87
- data/src/gocr-0.48/src/progress.h +0 -42
- data/src/gocr-0.48/src/remove.c +0 -703
- data/src/gocr-0.48/src/tga.c +0 -87
- data/src/gocr-0.48/src/tga.h +0 -6
- data/src/gocr-0.48/src/unicode.c +0 -1314
- data/src/gocr-0.48/src/unicode.h +0 -1257
- data/src/jpeg-7/Makefile.am +0 -133
- data/src/jpeg-7/Makefile.in +0 -1089
- data/src/jpeg-7/README +0 -322
- data/src/jpeg-7/aclocal.m4 +0 -8990
- data/src/jpeg-7/ansi2knr.1 +0 -36
- data/src/jpeg-7/ansi2knr.c +0 -739
- data/src/jpeg-7/cderror.h +0 -132
- data/src/jpeg-7/cdjpeg.c +0 -181
- data/src/jpeg-7/cdjpeg.h +0 -187
- data/src/jpeg-7/change.log +0 -270
- data/src/jpeg-7/cjpeg.1 +0 -325
- data/src/jpeg-7/cjpeg.c +0 -616
- data/src/jpeg-7/ckconfig.c +0 -402
- data/src/jpeg-7/coderules.txt +0 -118
- data/src/jpeg-7/config.guess +0 -1561
- data/src/jpeg-7/config.sub +0 -1686
- data/src/jpeg-7/configure +0 -17139
- data/src/jpeg-7/configure.ac +0 -317
- data/src/jpeg-7/depcomp +0 -630
- data/src/jpeg-7/djpeg.1 +0 -251
- data/src/jpeg-7/djpeg.c +0 -617
- data/src/jpeg-7/example.c +0 -433
- data/src/jpeg-7/filelist.txt +0 -215
- data/src/jpeg-7/install-sh +0 -520
- data/src/jpeg-7/install.txt +0 -1097
- data/src/jpeg-7/jaricom.c +0 -148
- data/src/jpeg-7/jcapimin.c +0 -282
- data/src/jpeg-7/jcapistd.c +0 -161
- data/src/jpeg-7/jcarith.c +0 -921
- data/src/jpeg-7/jccoefct.c +0 -453
- data/src/jpeg-7/jccolor.c +0 -459
- data/src/jpeg-7/jcdctmgr.c +0 -482
- data/src/jpeg-7/jchuff.c +0 -1612
- data/src/jpeg-7/jcinit.c +0 -65
- data/src/jpeg-7/jcmainct.c +0 -293
- data/src/jpeg-7/jcmarker.c +0 -667
- data/src/jpeg-7/jcmaster.c +0 -770
- data/src/jpeg-7/jcomapi.c +0 -106
- data/src/jpeg-7/jconfig.bcc +0 -48
- data/src/jpeg-7/jconfig.cfg +0 -45
- data/src/jpeg-7/jconfig.dj +0 -38
- data/src/jpeg-7/jconfig.mac +0 -43
- data/src/jpeg-7/jconfig.manx +0 -43
- data/src/jpeg-7/jconfig.mc6 +0 -52
- data/src/jpeg-7/jconfig.sas +0 -43
- data/src/jpeg-7/jconfig.st +0 -42
- data/src/jpeg-7/jconfig.txt +0 -155
- data/src/jpeg-7/jconfig.vc +0 -45
- data/src/jpeg-7/jconfig.vms +0 -37
- data/src/jpeg-7/jconfig.wat +0 -38
- data/src/jpeg-7/jcparam.c +0 -632
- data/src/jpeg-7/jcprepct.c +0 -358
- data/src/jpeg-7/jcsample.c +0 -545
- data/src/jpeg-7/jctrans.c +0 -381
- data/src/jpeg-7/jdapimin.c +0 -396
- data/src/jpeg-7/jdapistd.c +0 -275
- data/src/jpeg-7/jdarith.c +0 -762
- data/src/jpeg-7/jdatadst.c +0 -151
- data/src/jpeg-7/jdatasrc.c +0 -212
- data/src/jpeg-7/jdcoefct.c +0 -736
- data/src/jpeg-7/jdcolor.c +0 -396
- data/src/jpeg-7/jdct.h +0 -393
- data/src/jpeg-7/jddctmgr.c +0 -382
- data/src/jpeg-7/jdhuff.c +0 -1309
- data/src/jpeg-7/jdinput.c +0 -384
- data/src/jpeg-7/jdmainct.c +0 -512
- data/src/jpeg-7/jdmarker.c +0 -1360
- data/src/jpeg-7/jdmaster.c +0 -663
- data/src/jpeg-7/jdmerge.c +0 -400
- data/src/jpeg-7/jdpostct.c +0 -290
- data/src/jpeg-7/jdsample.c +0 -361
- data/src/jpeg-7/jdtrans.c +0 -136
- data/src/jpeg-7/jerror.c +0 -252
- data/src/jpeg-7/jerror.h +0 -304
- data/src/jpeg-7/jfdctflt.c +0 -174
- data/src/jpeg-7/jfdctfst.c +0 -230
- data/src/jpeg-7/jfdctint.c +0 -4348
- data/src/jpeg-7/jidctflt.c +0 -242
- data/src/jpeg-7/jidctfst.c +0 -368
- data/src/jpeg-7/jidctint.c +0 -5137
- data/src/jpeg-7/jinclude.h +0 -91
- data/src/jpeg-7/jmemansi.c +0 -167
- data/src/jpeg-7/jmemdos.c +0 -638
- data/src/jpeg-7/jmemdosa.asm +0 -379
- data/src/jpeg-7/jmemmac.c +0 -289
- data/src/jpeg-7/jmemmgr.c +0 -1118
- data/src/jpeg-7/jmemname.c +0 -276
- data/src/jpeg-7/jmemnobs.c +0 -109
- data/src/jpeg-7/jmemsys.h +0 -198
- data/src/jpeg-7/jmorecfg.h +0 -369
- data/src/jpeg-7/jpegint.h +0 -395
- data/src/jpeg-7/jpeglib.h +0 -1135
- data/src/jpeg-7/jpegtran.1 +0 -272
- data/src/jpeg-7/jpegtran.c +0 -546
- data/src/jpeg-7/jquant1.c +0 -856
- data/src/jpeg-7/jquant2.c +0 -1310
- data/src/jpeg-7/jutils.c +0 -179
- data/src/jpeg-7/jversion.h +0 -14
- data/src/jpeg-7/libjpeg.map +0 -4
- data/src/jpeg-7/libjpeg.txt +0 -3067
- data/src/jpeg-7/ltmain.sh +0 -8406
- data/src/jpeg-7/makcjpeg.st +0 -36
- data/src/jpeg-7/makdjpeg.st +0 -36
- data/src/jpeg-7/makeadsw.vc6 +0 -77
- data/src/jpeg-7/makeasln.vc9 +0 -33
- data/src/jpeg-7/makecdep.vc6 +0 -82
- data/src/jpeg-7/makecdsp.vc6 +0 -130
- data/src/jpeg-7/makecmak.vc6 +0 -159
- data/src/jpeg-7/makecvcp.vc9 +0 -186
- data/src/jpeg-7/makeddep.vc6 +0 -82
- data/src/jpeg-7/makeddsp.vc6 +0 -130
- data/src/jpeg-7/makedmak.vc6 +0 -159
- data/src/jpeg-7/makedvcp.vc9 +0 -186
- data/src/jpeg-7/makefile.ansi +0 -220
- data/src/jpeg-7/makefile.bcc +0 -291
- data/src/jpeg-7/makefile.dj +0 -226
- data/src/jpeg-7/makefile.manx +0 -220
- data/src/jpeg-7/makefile.mc6 +0 -255
- data/src/jpeg-7/makefile.mms +0 -224
- data/src/jpeg-7/makefile.sas +0 -258
- data/src/jpeg-7/makefile.unix +0 -234
- data/src/jpeg-7/makefile.vc +0 -217
- data/src/jpeg-7/makefile.vms +0 -142
- data/src/jpeg-7/makefile.wat +0 -239
- data/src/jpeg-7/makejdep.vc6 +0 -423
- data/src/jpeg-7/makejdsp.vc6 +0 -285
- data/src/jpeg-7/makejdsw.vc6 +0 -29
- data/src/jpeg-7/makejmak.vc6 +0 -425
- data/src/jpeg-7/makejsln.vc9 +0 -17
- data/src/jpeg-7/makejvcp.vc9 +0 -328
- data/src/jpeg-7/makeproj.mac +0 -213
- data/src/jpeg-7/makerdep.vc6 +0 -6
- data/src/jpeg-7/makerdsp.vc6 +0 -78
- data/src/jpeg-7/makermak.vc6 +0 -110
- data/src/jpeg-7/makervcp.vc9 +0 -133
- data/src/jpeg-7/maketdep.vc6 +0 -43
- data/src/jpeg-7/maketdsp.vc6 +0 -122
- data/src/jpeg-7/maketmak.vc6 +0 -131
- data/src/jpeg-7/maketvcp.vc9 +0 -178
- data/src/jpeg-7/makewdep.vc6 +0 -6
- data/src/jpeg-7/makewdsp.vc6 +0 -78
- data/src/jpeg-7/makewmak.vc6 +0 -110
- data/src/jpeg-7/makewvcp.vc9 +0 -133
- data/src/jpeg-7/makljpeg.st +0 -68
- data/src/jpeg-7/maktjpeg.st +0 -30
- data/src/jpeg-7/makvms.opt +0 -4
- data/src/jpeg-7/missing +0 -376
- data/src/jpeg-7/rdbmp.c +0 -439
- data/src/jpeg-7/rdcolmap.c +0 -253
- data/src/jpeg-7/rdgif.c +0 -38
- data/src/jpeg-7/rdjpgcom.1 +0 -63
- data/src/jpeg-7/rdjpgcom.c +0 -515
- data/src/jpeg-7/rdppm.c +0 -459
- data/src/jpeg-7/rdrle.c +0 -387
- data/src/jpeg-7/rdswitch.c +0 -365
- data/src/jpeg-7/rdtarga.c +0 -500
- data/src/jpeg-7/structure.txt +0 -945
- data/src/jpeg-7/testimg.bmp +0 -0
- data/src/jpeg-7/testimg.jpg +0 -0
- data/src/jpeg-7/testimg.ppm +0 -4
- data/src/jpeg-7/testimgp.jpg +0 -0
- data/src/jpeg-7/testorig.jpg +0 -0
- data/src/jpeg-7/testprog.jpg +0 -0
- data/src/jpeg-7/transupp.c +0 -1533
- data/src/jpeg-7/transupp.h +0 -205
- data/src/jpeg-7/usage.txt +0 -605
- data/src/jpeg-7/wizard.txt +0 -211
- data/src/jpeg-7/wrbmp.c +0 -442
- data/src/jpeg-7/wrgif.c +0 -399
- data/src/jpeg-7/wrjpgcom.1 +0 -103
- data/src/jpeg-7/wrjpgcom.c +0 -583
- data/src/jpeg-7/wrppm.c +0 -269
- data/src/jpeg-7/wrrle.c +0 -305
- data/src/jpeg-7/wrtarga.c +0 -253
data/src/gocr-0.48/src/detect.c
DELETED
@@ -1,943 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
This is a Optical-Character-Recognition program
|
3
|
-
Copyright (C) 2000-2007 Joerg Schulenburg
|
4
|
-
|
5
|
-
This program is free software; you can redistribute it and/or
|
6
|
-
modify it under the terms of the GNU General Public License
|
7
|
-
as published by the Free Software Foundation; either version 2
|
8
|
-
of the License, or (at your option) any later version.
|
9
|
-
|
10
|
-
This program is distributed in the hope that it will be useful,
|
11
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
-
GNU General Public License for more details.
|
14
|
-
|
15
|
-
You should have received a copy of the GNU General Public License
|
16
|
-
along with this program; if not, write to the Free Software
|
17
|
-
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
18
|
-
|
19
|
-
check README for my email address
|
20
|
-
*/
|
21
|
-
|
22
|
-
#include <stdlib.h>
|
23
|
-
#include <stdio.h>
|
24
|
-
#include <string.h>
|
25
|
-
#include <ctype.h> // toupper, tolower
|
26
|
-
#include "pgm2asc.h"
|
27
|
-
#include "gocr.h"
|
28
|
-
|
29
|
-
// ----- detect lines ---------------
|
30
|
-
/* suggestion: Fourier transform and set line frequency where the
|
31
|
-
amplitude has a maximum (JS: slow and not smarty enough).
|
32
|
-
|
33
|
-
option: range for line numbers 1..1000 or similar
|
34
|
-
todo: look for thickest line, and divide if thickness=2*mean_thickness
|
35
|
-
Set these elements of the box structs:
|
36
|
-
|
37
|
-
m1 <-- top of upper case letters and (bdfhkl) (can differ)
|
38
|
-
m2 <-- top of letters (acegmnopqrsuvwxyz)
|
39
|
-
m3 <-- baseline
|
40
|
-
m4 <-- bottom of hanging letters (gqpy)
|
41
|
-
|
42
|
-
performance can be improved by working with a temporary
|
43
|
-
list of boxes of the special text line
|
44
|
-
|
45
|
-
- Jun23,00 more robustness of m3 (test liebfrau1)
|
46
|
-
- Feb01,02 more robustness of m4 (test s46_084.pgm)
|
47
|
-
- Dec03,12 fix problems with footnotes
|
48
|
-
ToDo:
|
49
|
-
- generate lists of boxes per line (faster access)
|
50
|
-
- use statistics
|
51
|
-
- for each box look at it neighbours and set box-m1..m4
|
52
|
-
- m[1..4].max .min if m4.min-m3.max<1 probability lower
|
53
|
-
*/
|
54
|
-
int detect_lines1(pix * p, int x0, int y0, int dx, int dy)
|
55
|
-
{
|
56
|
-
int i, jj, j2, y, yy, my, mi, mc, i1, i2, i3, i4,
|
57
|
-
m1, m2, m3, m4, ma1, ma2, ma3, ma4, m3pre, m4pre;
|
58
|
-
struct box *box2, *box3; /* box3 is for verbose / debugging */
|
59
|
-
struct tlines *lines = &JOB->res.lines;
|
60
|
-
|
61
|
-
/* ToDo: optional read line-data from external source??? */
|
62
|
-
if (lines->num == 0) { // initialize one dummy-line for pictures etc.
|
63
|
-
lines->m4[0] = 0;
|
64
|
-
lines->m3[0] = 0;
|
65
|
-
lines->m2[0] = 0;
|
66
|
-
lines->m1[0] = 0;
|
67
|
-
lines->x0[0] = p->x; /* expand to left end during detection */
|
68
|
-
lines->x1[0] = 0; /* expand to right end */
|
69
|
-
lines->pitch[0] = JOB->cfg.spc; /* default word pitch */
|
70
|
-
lines->mono[0] = 0; /* default spacing, 0 = prop */
|
71
|
-
lines->num++;
|
72
|
-
}
|
73
|
-
i = lines->num;
|
74
|
-
if (dy < 4)
|
75
|
-
return 0; /* image is to low for latin chars */
|
76
|
-
my = jj = 0;
|
77
|
-
// get the mean height of all hollow chars
|
78
|
-
// (better than mean value of everything including bg-pattern or dust?)
|
79
|
-
for_each_data(&(JOB->res.boxlist)) {
|
80
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
81
|
-
if ( box2->c != PICTURE
|
82
|
-
&& box2->num_frames>1 && box2->num_frames<3 /* 1 or 2 holes */
|
83
|
-
&& box2->y0 >= y0 && box2->y1 <= y0 + dy
|
84
|
-
&& box2->x0 >= x0 && box2->x1 <= x0 + dx
|
85
|
-
&& box2->frame_vol[0]>0
|
86
|
-
&& box2->frame_vol[1]<0
|
87
|
-
) {
|
88
|
-
jj++;
|
89
|
-
my += box2->y1 - box2->y0 + 1;
|
90
|
-
}
|
91
|
-
} end_for_each(&(JOB->res.boxlist));
|
92
|
-
if (jj==0) {
|
93
|
-
// get the mean height of all chars
|
94
|
-
for_each_data(&(JOB->res.boxlist)) {
|
95
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
96
|
-
if ( box2->c != PICTURE
|
97
|
-
&& box2->y1 - box2->y0 + 1 >= 4 /* 4x6 font */
|
98
|
-
&& box2->y0 >= y0 && box2->y1 <= y0 + dy
|
99
|
-
&& box2->x0 >= x0 && box2->x1 <= x0 + dx ) {
|
100
|
-
jj++;
|
101
|
-
my += box2->y1 - box2->y0 + 1;
|
102
|
-
}
|
103
|
-
} end_for_each(&(JOB->res.boxlist));
|
104
|
-
}
|
105
|
-
if (jj == 0)
|
106
|
-
return 0; /* no chars detected */
|
107
|
-
|
108
|
-
|
109
|
-
/* ToDo: a better way could be to mark good boxes (of typical high a-zA-Z0-9)
|
110
|
-
* first and handle only marked boxes for line scan, exclude ?!,.:;etc
|
111
|
-
* but without setect the chars itself (using good statistics)
|
112
|
-
* see adjust_text_lines()
|
113
|
-
*/
|
114
|
-
my /= jj; /* we only care about chars with high arround my */
|
115
|
-
if (JOB->cfg.verbose & 16)
|
116
|
-
fprintf(stderr,"\n# detect_lines1(%d %d %d %d) vvv&16 chars=%d my=%d\n# ",
|
117
|
-
x0, y0, dx, dy, jj, my);
|
118
|
-
// "my" is the average over the whole image (bad, if different fontsizes)
|
119
|
-
|
120
|
-
if (my < 4)
|
121
|
-
return 0; /* mean high is to small => error */
|
122
|
-
|
123
|
-
m4pre=m3pre=y0; /* lower bond of upper line */
|
124
|
-
// better function for scanning line around a letter ???
|
125
|
-
// or define lines around known chars "eaTmM"
|
126
|
-
for (j2 = y = y0; y < y0 + dy; y++) {
|
127
|
-
// look for max. of upper and lower bound of next line
|
128
|
-
m1 = y0 + dy;
|
129
|
-
jj = 0;
|
130
|
-
#if 1
|
131
|
-
/* this is only for test runs */
|
132
|
-
if (JOB->cfg.verbose & 16)
|
133
|
-
fprintf(stderr,"searching new line %d\n# ",i /* lines->num */);
|
134
|
-
#endif
|
135
|
-
|
136
|
-
box3 = NULL; /* mark the most upper box starting next line */
|
137
|
-
// find highest point of next line => store to m1-min (m1>=y)
|
138
|
-
// only objects greater 2/3*my and smaller 3*my are allowed
|
139
|
-
// a higher "!" at end of line can result in a to low m1
|
140
|
-
for_each_data(&(JOB->res.boxlist)) {
|
141
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
142
|
-
if (box2->line>0 || box2->c == PICTURE) continue;
|
143
|
-
if (lines->dx)
|
144
|
-
yy = lines->dy * box2->x0 / (lines->dx); /* correct crooked lines */
|
145
|
-
else yy=0;
|
146
|
-
if ( box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
|
147
|
-
&& box2->x0 >= x0 && box2->x1 < x0 + dx // within box ?
|
148
|
-
&& box2->c != PICTURE // no picture
|
149
|
-
&& box2->num_boxes <= 1 // ignore 2 for "!?i" 3 for "ä"
|
150
|
-
&& 3 * (box2->y1 - box2->y0) > 2 * my // not to small
|
151
|
-
&& (box2->y1 - box2->y0) < 3 * my // not to big
|
152
|
-
&& (box2->y1 - box2->y0) > 4) // minimum absolute size
|
153
|
-
{
|
154
|
-
if (box2->y0 < m1 + yy) {
|
155
|
-
m1 = box2->y0 - yy; /* highest upper boundary */
|
156
|
-
box3 = box2;
|
157
|
-
}
|
158
|
-
// fprintf(stderr,"\n %3d %3d %+3d %d m1= %3d",
|
159
|
-
// box2->x0, box2->y0, box2->y1 - box2->y0 + 1, box2->num_boxes, m1);
|
160
|
-
}
|
161
|
-
} end_for_each(&(JOB->res.boxlist));
|
162
|
-
if (!box3 || m1 >= y0+dy) break; /* no further line found */
|
163
|
-
if (JOB->cfg.verbose & 16)
|
164
|
-
fprintf(stderr," most upper box at new line xy= %4d %4d %+4d %+4d\n# ",
|
165
|
-
box3->x0, box3->y0, box3->x1-box3->x0, box3->y1-box3->y0);
|
166
|
-
|
167
|
-
// at the moment values depend from single chars, which can
|
168
|
-
// result in bad values (ex: 4x6 /\=)
|
169
|
-
// ToDo: 2) mean size of next line (store list of y0,y1)
|
170
|
-
// ToDo: 3) count num0[(y0-m1)*16/my], num1[(y1-m1)*16/my]
|
171
|
-
// ToDo: or down-top search horizontal nerarest neighbours
|
172
|
-
lines->x0[i] = x0 + dx - 1; /* expand during operation to left end */
|
173
|
-
lines->x1[i] = x0; /* expand to the right end of line */
|
174
|
-
m4=m2=m1; mi=m1+my; m3=m1+2*my; jj=0;
|
175
|
-
// find limits for upper bound, base line and ground line
|
176
|
-
// m2-max m3-min m4-max
|
177
|
-
for_each_data(&(JOB->res.boxlist)) {
|
178
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
179
|
-
if (box2->line>0 || box2->c == PICTURE) continue;
|
180
|
-
if ( box2->y0 < y0 || box2->y1 >= y0 + dy
|
181
|
-
|| box2->x0 < x0 || box2->x1 >= x0 + dx ) continue; // out of image
|
182
|
-
if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx);
|
183
|
-
else yy=0;
|
184
|
-
/* check for ij-dots, used if chars of same high */
|
185
|
-
if ( box2->y0 >= y + yy
|
186
|
-
&& box2->y0 >= y
|
187
|
-
&& (box2->y1 - box2->y0) < my
|
188
|
-
&& box2->y1 < m1 + yy + my/4
|
189
|
-
&& box2->y0 < mi + yy ) {
|
190
|
-
mi = box2->y0 - yy; /* highest upper boundary i-dot */
|
191
|
-
}
|
192
|
-
// fprintf(stderr,"\n check %3d %3d-%3d y=%d yy=%d m1=%d", box2->x0, box2->y0, box2->y1, y, yy, m1);
|
193
|
-
/* get m2-max m3-min m4-max */
|
194
|
-
if ( box2->y0 >= y + yy // lower than y
|
195
|
-
&& 3 * (box2->y1 - box2->y0 + 1) > 2 * my // right size ?
|
196
|
-
&& (box2->y1 - box2->y0 + 1) < 3 * my // font mix, size = 2.6*my
|
197
|
-
&& (box2->y1 - box2->y0 + 1) > 3 // 4x6 lowercase=4
|
198
|
-
&& box2->y0 >= m1 // in m1 range?
|
199
|
-
&& box2->y0 <= m1 + yy + 9 * my / 8 // my can be to small if mixed
|
200
|
-
// ToDo: we need a better (local?) algorithm for big headlines > 2*my
|
201
|
-
&& box2->y1 <= m1 + yy + 3 * my
|
202
|
-
&& box2->y1 >= m1 + yy + my / 2
|
203
|
-
// lines can differ in high, my may be to small (smaller headlines)
|
204
|
-
&& box2->y0+box2->y1 <= 2*box3->y1
|
205
|
-
)
|
206
|
-
{
|
207
|
-
jj++; // count chars for debugging purpose
|
208
|
-
if (box2->y0 > m2 + yy) {
|
209
|
-
m2 = box2->y0 - yy; /* highest upper boundary */
|
210
|
-
if (JOB->cfg.verbose & 16)
|
211
|
-
fprintf(stderr," set m2= %d yy= %d\n# ",m2, yy);
|
212
|
-
}
|
213
|
-
if (box2->y1 > m4 + yy && (my>6 || box2->y1 < m3+my)) {
|
214
|
-
m4 = box2->y1 - yy; /* lowest lower boundary, small font lines can touch */
|
215
|
-
}
|
216
|
-
if ( box2->y1 < m3 + yy
|
217
|
-
&& ( ( 2*box2->y1 > m2+ m4+yy && m2>m1)
|
218
|
-
|| ( 4*box2->y1 > m1+3*m4+yy) ) ) // care for TeX: \(^1\)Footnote 2003
|
219
|
-
/* "'!?" could cause trouble here, therefore this lines */
|
220
|
-
/* ToDo: get_bw costs time, check pre and next */
|
221
|
-
if( get_bw(box2->x0,box2->x1,box2->y1+1 ,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 0
|
222
|
-
|| get_bw(box2->x0,box2->x1,box2->y1+my/2,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 1
|
223
|
-
|| num_cross(box2->x0,box2->x1,(box2->y0+box2->y1)/2,(box2->y0+box2->y1)/2,box2->p,JOB->cfg.cs)>2 )
|
224
|
-
{
|
225
|
-
m3 = box2->y1 - yy; /* highest lower boundary */
|
226
|
-
// printf("\n# set1 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
|
227
|
-
// out_x(box2);
|
228
|
-
}
|
229
|
-
if (box2->y0 + box2->y1 > 2*(m3 + yy)
|
230
|
-
&& box2->y1 < m4 + yy - my/4 -1
|
231
|
-
&& box2->y1 >= (m2 + m4)/2 // care for TeX: \(^1\)Footnote 2003
|
232
|
-
&& m2 > m1 ) // be sure to not use ', m2 must be ok
|
233
|
-
{
|
234
|
-
m3 = box2->y1 - yy; /* highest lower boundary */
|
235
|
-
// printf("\n# set2 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
|
236
|
-
// out_x(box2);
|
237
|
-
}
|
238
|
-
if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
|
239
|
-
if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
|
240
|
-
// printf(" m=%3d %+2d %+2d %+2d yy=%3d\n",m1,m2-m1,m3-m1,m4-m1,yy);
|
241
|
-
}
|
242
|
-
} end_for_each(&(JOB->res.boxlist));
|
243
|
-
|
244
|
-
#if 1
|
245
|
-
/* this is only for test runs */
|
246
|
-
if (JOB->cfg.verbose & 16)
|
247
|
-
fprintf(stderr," step 1 y=%4d m= %4d %+3d %+3d %+3d"
|
248
|
-
" my=%2d chars=%3d\n# ",
|
249
|
-
y, m1, m2-m1, m3-m1, m4-m1, my, jj);
|
250
|
-
#endif
|
251
|
-
|
252
|
-
if (m3 == m1)
|
253
|
-
break;
|
254
|
-
#if 1 /* make averages about the line */
|
255
|
-
// same again better estimation
|
256
|
-
mc = (3 * m3 + m1) / 4; /* lower center ? */
|
257
|
-
ma1 = ma2 = ma3 = ma4 = i1 = i2 = i3 = i4 = jj = 0;
|
258
|
-
for_each_data(&(JOB->res.boxlist)) {
|
259
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
260
|
-
if (box2->line>0 || box2->c == PICTURE) continue;
|
261
|
-
if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
|
262
|
-
if (box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
|
263
|
-
&& box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
|
264
|
-
&& box2->c != PICTURE // no picture
|
265
|
-
&& 2 * (box2->y1 - box2->y0) > my // right size ?
|
266
|
-
&& (box2->y1 - box2->y0) < 4 * my) {
|
267
|
-
if ( box2->y0 - yy >= m1-my/4
|
268
|
-
&& box2->y0 - yy <= m2+my/4
|
269
|
-
&& box2->y1 - yy >= m3-my/4
|
270
|
-
&& box2->y1 - yy <= m4+my/4 ) { /* its within allowed range! */
|
271
|
-
// jj++; // not used
|
272
|
-
if (abs(box2->y0 - yy - m1) <= abs(box2->y0 - yy - m2))
|
273
|
-
{ i1++; ma1 += box2->y0 - yy; }
|
274
|
-
else { i2++; ma2 += box2->y0 - yy; }
|
275
|
-
if (abs(box2->y1 - yy - m3) < abs(box2->y1 - yy - m4))
|
276
|
-
{ i3++; ma3 += box2->y1 - yy; }
|
277
|
-
else { i4++; ma4 += box2->y1 - yy; }
|
278
|
-
if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
|
279
|
-
if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
|
280
|
-
}
|
281
|
-
}
|
282
|
-
} end_for_each(&(JOB->res.boxlist));
|
283
|
-
|
284
|
-
if (i1) m1 = (ma1+i1/2) / i1; /* best rounded */
|
285
|
-
if (i2) m2 = (ma2+i2/2) / i2;
|
286
|
-
if (i3) m3 = (ma3+i3-1) / i3; /* round up */
|
287
|
-
if (i4) m4 = (ma4+i4-1) / i4;
|
288
|
-
// printf("\n# .. set3 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
|
289
|
-
|
290
|
-
#endif
|
291
|
-
|
292
|
-
/* expand right and left end of line */
|
293
|
-
for_each_data(&(JOB->res.boxlist)) {
|
294
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
295
|
-
if (box2->line>0 || box2->c == PICTURE) continue;
|
296
|
-
if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
|
297
|
-
if ( box2->y0 >= y0 && box2->y1 < y0 + dy
|
298
|
-
&& box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
|
299
|
-
&& box2->c != PICTURE // no picture
|
300
|
-
&& box2->y0 >= m1-1
|
301
|
-
&& box2->y0 <= m4
|
302
|
-
&& box2->y1 >= m1
|
303
|
-
&& box2->y1 <= m4+1 ) { /* its within line */
|
304
|
-
if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
|
305
|
-
if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
|
306
|
-
}
|
307
|
-
} end_for_each(&(JOB->res.boxlist));
|
308
|
-
|
309
|
-
#if 1
|
310
|
-
/* this is only for test runs */
|
311
|
-
if (JOB->cfg.verbose & 16)
|
312
|
-
fprintf(stderr," step 2 y=%4d m= %4d %+3d %+3d %+3d\n# ",
|
313
|
-
y,m1,m2-m1,m3-m1,m4-m1);
|
314
|
-
#endif
|
315
|
-
|
316
|
-
if (m4 == m1) {
|
317
|
-
if(m3+m4>2*y) y = (m4+m3)/2; /* lower end may overlap the next line */
|
318
|
-
continue;
|
319
|
-
}
|
320
|
-
jj=0;
|
321
|
-
lines->wt[i] = 100;
|
322
|
-
if (5 * (m2 - m1 +1) < m3 - m2 || (m2 - m1) < 2) jj|=1; /* same high */
|
323
|
-
if (5 * (m4 - m3 +1) < m3 - m2 || (m4 - m3) < 1) jj|=2; /* same base */
|
324
|
-
if (jj&1) lines->wt[i] = 75*lines->wt[i]/100;
|
325
|
-
if (jj&2) lines->wt[i] = 75*lines->wt[i]/100;
|
326
|
-
if (jj>0 && JOB->cfg.verbose) {
|
327
|
-
fprintf(stderr," trouble on line %d, wt*100= %d\n",i,lines->wt[i]);
|
328
|
-
fprintf(stderr,"# m= %4d %+3d %+3d %+3d\n",m1,m2-m1,m3-m1,m4-m1);
|
329
|
-
fprintf(stderr,"# i= %3d %3d %3d %3d (counts)\n",i1,i2,i3,i4);
|
330
|
-
if (jj==3) fprintf(stderr,"# all boxes of same high!\n# ");
|
331
|
-
if (jj==1) fprintf(stderr,"# all boxes of same upper bound!\n# ");
|
332
|
-
if (jj==2) fprintf(stderr,"# all boxes of same lower bound!\n# ");
|
333
|
-
}
|
334
|
-
/* ToDo: check for dots ij,. to get the missing information */
|
335
|
-
#if 1
|
336
|
-
/* jj=3: ABCDEF123456 or mnmno or gqpy or lkhfdtb => we are in trouble */
|
337
|
-
if (jj==3 && (m4-m1)>my) { jj=0; m2=m1+my/8+1; m4=m3+my/8+1; } /* ABC123 */
|
338
|
-
/* using idots, may fail on "ABCDEFGÄÜÖ" */
|
339
|
-
if (jj==3 && mi>0 && mi<m1 && mi>m4pre) { jj=2; m1=mi; } /* use ij dots */
|
340
|
-
if (jj==1 && m2-(m3-m2)/4>m3pre ) { /* expect: acegmnopqrsuvwxyz */
|
341
|
-
if (m1-m4pre<m4-m1) /* fails for 0123ABCD+Q$ */
|
342
|
-
m1 = ( m2 + m4pre ) / 2 ;
|
343
|
-
else
|
344
|
-
m1 = ( m2 - (m3 - m2) / 4 );
|
345
|
-
}
|
346
|
-
if (jj==3)
|
347
|
-
m2 = m1 + (m3 - m1) / 4 + 1; /* expect: 0123456789ABCDEF */
|
348
|
-
if ( (m2 - m1) < 2)
|
349
|
-
m2 = m1 + 2; /* font hight < 8 pixel ? */
|
350
|
-
if (jj&2)
|
351
|
-
m4 = m3 + (m4 - m1) / 4 + 1; /* chars have same lower base */
|
352
|
-
if (jj>0 && JOB->cfg.verbose & 16) {
|
353
|
-
fprintf(stderr," m= %4d %+2d %+2d %+2d my= %4d\n# ",
|
354
|
-
m1, m2-m1, m3-m1, m4-m1, my);
|
355
|
-
}
|
356
|
-
#endif
|
357
|
-
|
358
|
-
|
359
|
-
{ // empty space between lines
|
360
|
-
lines->m4[i] = m4;
|
361
|
-
lines->m3[i] = m3;
|
362
|
-
lines->m2[i] = m2;
|
363
|
-
lines->m1[i] = m1;
|
364
|
-
lines->pitch[i] = JOB->cfg.spc; /* default word pitch */
|
365
|
-
lines->mono[i] = 0; /* default spacing, 0=prop, 1=mono */
|
366
|
-
if (JOB->cfg.verbose & 16)
|
367
|
-
fprintf(stderr, " m= %4d %+3d %+3d %+3d w= %d (line=%d)\n# ",
|
368
|
-
m1, m2 - m1, m3 - m1, m4 - m1, lines->wt[i], i);
|
369
|
-
if (i < MAXlines && m4 - m1 > 4)
|
370
|
-
i++;
|
371
|
-
if (i >= MAXlines) {
|
372
|
-
fprintf(stderr, "Warning: lines>MAXlines\n");
|
373
|
-
break;
|
374
|
-
}
|
375
|
-
}
|
376
|
-
if (m3+m4>2*y) y = (m3+m4)/2; /* lower end may overlap the next line */
|
377
|
-
if (m3>m3pre) m3pre = m3; else m3=y0; /* set for next-line scan */
|
378
|
-
if (m4>m4pre) m4pre = m4; else m4=y0; /* set for next-line scan */
|
379
|
-
}
|
380
|
-
lines->num = i;
|
381
|
-
if (JOB->cfg.verbose)
|
382
|
-
fprintf(stderr, " num_lines= %d", lines->num-1);
|
383
|
-
return 0;
|
384
|
-
}
|
385
|
-
|
386
|
-
// ----- layout analyzis of dx*dy region at x0,y0 -----
|
387
|
-
// ----- detect lines via recursive division (new version) ---------------
|
388
|
-
// what about text in frames???
|
389
|
-
// ToDo: change to bottom-top analyse or/and take rotation into account
|
390
|
-
int detect_lines2(pix *p,int x0,int y0,int dx,int dy,int r){
|
391
|
-
int i,x2,y2,x3,y3,x4,y4,x5,y5,y6,mx,my,x30,x31,y30,y31;
|
392
|
-
struct box *box2,*box3;
|
393
|
-
// shrink box
|
394
|
-
if(dx<=0 || dy<=0) return 0;
|
395
|
-
if(y0+dy< p->y/128 && y0==0) return 0; /* looks like dust */
|
396
|
-
if(y0>p->y-p->y/128 && y0+dy==p->y) return 0; /* looks like dust */
|
397
|
-
|
398
|
-
if(r>1000){ return -1;} // something is wrong
|
399
|
-
if(JOB->cfg.verbose)fprintf(stderr,"\n# r=%2d ",r);
|
400
|
-
|
401
|
-
mx=my=i=0; // mean thickness
|
402
|
-
// remove border, shrink size
|
403
|
-
x2=x0+dx-1; // min x
|
404
|
-
y2=y0+dy-1; // min y
|
405
|
-
x3=x0; // max x
|
406
|
-
y3=y0; // max y
|
407
|
-
for_each_data(&(JOB->res.boxlist)) {
|
408
|
-
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
409
|
-
if(box3->y0>=y0 && box3->y1<y0+dy &&
|
410
|
-
box3->x0>=x0 && box3->x1<x0+dx)
|
411
|
-
{
|
412
|
-
if( box3->x1 > x3 ) x3=box3->x1; // max x
|
413
|
-
if( box3->x0 < x2 ) x2=box3->x0; // min x
|
414
|
-
if( box3->y1 > y3 ) y3=box3->y1; // max y
|
415
|
-
if( box3->y0 < y2 ) y2=box3->y0; // min y
|
416
|
-
if(box3->c!=PICTURE)
|
417
|
-
if( box3->y1 - box3->y0 > 4 )
|
418
|
-
{
|
419
|
-
i++;
|
420
|
-
mx+=box3->x1-box3->x0+1; // mean x
|
421
|
-
my+=box3->y1-box3->y0+1; // mean y
|
422
|
-
}
|
423
|
-
}
|
424
|
-
} end_for_each(&(JOB->res.boxlist));
|
425
|
-
x0=x2; dx=x3-x2+1;
|
426
|
-
y0=y2; dy=y3-y2+1;
|
427
|
-
|
428
|
-
if(i==0 || dx<=0 || dy<=0) return 0;
|
429
|
-
mx/=i;my/=i;
|
430
|
-
// better look for widest h/v-gap, ToDo: vertical lines?
|
431
|
-
if(r<8){ // max. depth
|
432
|
-
|
433
|
-
// detect widest horizontal gap
|
434
|
-
y2=y3=y4=y5=y6=0;
|
435
|
-
x2=x3=x4=x5=y5=0;// min. 3 lines
|
436
|
-
// position and thickness of gap, y6=num_gaps, nbox^2 ops
|
437
|
-
for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
|
438
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
439
|
-
if( box2->c!=PICTURE ) /* ToDo: not sure, that this is a good idea */
|
440
|
-
if( box2->y0>=y0 && box2->y1<y0+dy
|
441
|
-
&& box2->x0>=x0 && box2->x1<x0+dx
|
442
|
-
&& box2->y1-box2->y0>my/2 ){ // no pictures & dust???
|
443
|
-
|
444
|
-
y4=y0+dy-1; // nearest vert. box
|
445
|
-
x4=x0+dx-1;
|
446
|
-
// ToDo: rotate back box2->x1,y1 to x21,y21
|
447
|
-
// look for nearest lowest (y4) and right (x4) neighbour
|
448
|
-
// of every box (box2)
|
449
|
-
for_each_data(&(JOB->res.boxlist)) {
|
450
|
-
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
451
|
-
if(box3!=box2)
|
452
|
-
if(box3->y0>=y0 && box3->y1<y0+dy)
|
453
|
-
if(box3->x0>=x0 && box3->x1<x0+dx)
|
454
|
-
if(box3->c!=PICTURE) /* ToDo: not sure, that this is a good idea */
|
455
|
-
if(box3->y1-box3->y0>my/2 ){
|
456
|
-
// ToDo: here we need the rotation around box2
|
457
|
-
x30=box3->x0;
|
458
|
-
x31=box3->x1;
|
459
|
-
y30=box3->y0;
|
460
|
-
y31=box3->y1;
|
461
|
-
// get min. distances to lower and to right direction
|
462
|
-
if( y31 > box2->y1 && y30 < y4 ) y4=y30-1;
|
463
|
-
if( x31 > box2->x1 && x30 < x4 ) x4=x30-1;
|
464
|
-
}
|
465
|
-
} end_for_each(&(JOB->res.boxlist));
|
466
|
-
// set the witdht and position of largest hor./vert. gap
|
467
|
-
// largest gap: width position
|
468
|
-
if( y4-box2->y1 > y3 ) { y3=y4-box2->y1; y2=(y4+box2->y1)/2; }
|
469
|
-
if( x4-box2->x1 > x3 ) { x3=x4-box2->x1; x2=(x4+box2->x1)/2; }
|
470
|
-
}
|
471
|
-
} end_for_each(&(JOB->res.boxlist));
|
472
|
-
// fprintf(stderr,"\n widest y-gap= %4d %4d",y2,y3);
|
473
|
-
// fprintf(stderr,"\n widest x-gap= %4d %4d",x2,x3);
|
474
|
-
|
475
|
-
i=0; // i=1 at x, i=2 at y
|
476
|
-
// this is the critical point
|
477
|
-
// is this a good decision or not???
|
478
|
-
if(x3>0 || y3>0){
|
479
|
-
if(x3>mx && x3>2*y3 && (dy>5*x3 || (x3>10*y3 && y3>0))) i=1; else
|
480
|
-
if(dx>5*y3 && y3>my) i=2;
|
481
|
-
}
|
482
|
-
|
483
|
-
// compare with largest box???
|
484
|
-
for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
|
485
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
486
|
-
if( box2->c == PICTURE )
|
487
|
-
if( box2->y0>=y0 && box2->y1<y0+dy
|
488
|
-
&& box2->x0>=x0 && box2->x1<x0+dx )
|
489
|
-
{ // hline ???
|
490
|
-
// largest gap: width position
|
491
|
-
if( box2->x1-box2->x0+4 > dx && box2->y1+4<y0+dy ) { y3=1; y2=box2->y1+1; i=2; break; }
|
492
|
-
if( box2->x1-box2->x0+4 > dx && box2->y0-4>y0 ) { y3=1; y2=box2->y0-1; i=2; break; }
|
493
|
-
if( box2->y1-box2->y0+4 > dy && box2->x1+4<x0+dx ) { x3=1; x2=box2->x1+1; i=1; break; }
|
494
|
-
if( box2->y1-box2->y0+4 > dy && box2->x0-4>x0 ) { x3=1; x2=box2->x0-1; i=1; break; }
|
495
|
-
}
|
496
|
-
} end_for_each(&(JOB->res.boxlist));
|
497
|
-
if(JOB->cfg.verbose)fprintf(stderr," i=%d",i);
|
498
|
-
|
499
|
-
if(JOB->cfg.verbose && i) fprintf(stderr," divide at %s x=%4d y=%4d dx=%4d dy=%4d",
|
500
|
-
((i)?( (i==1)?"x":"y" ):"?"),x2,y2,x3,y3);
|
501
|
-
// divide horizontally if v-gap is thicker than h-gap
|
502
|
-
// and length is larger 5*width
|
503
|
-
if(i==1){ detect_lines2(p,x0,y0,x2-x0+1,dy,r+1);
|
504
|
-
return detect_lines2(p,x2,y0,x0+dx-x2+1,dy,r+1); }
|
505
|
-
// divide vertically
|
506
|
-
if(i==2){ detect_lines2(p,x0,y0,dx,y2-y0+1,r+1);
|
507
|
-
return detect_lines2(p,x0,y2,dx,y0+dy-y2+1,r+1);
|
508
|
-
}
|
509
|
-
}
|
510
|
-
|
511
|
-
|
512
|
-
if(JOB->cfg.verbose) if(dx<5 || dy<7)fprintf(stderr," empty box");
|
513
|
-
if(dx<5 || dy<7) return 0; // do not care about dust
|
514
|
-
if(JOB->cfg.verbose)fprintf(stderr, " box detected at %4d %4d %4d %4d",x0,y0,dx,dy);
|
515
|
-
if(JOB->tmp.ppo.p){
|
516
|
-
for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0 ,255,16);
|
517
|
-
for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0+dy-1,255,16);
|
518
|
-
for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0 ,y0+i ,255,16);
|
519
|
-
for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0+dx-1,y0+i ,255,16);
|
520
|
-
// writebmp("out10.bmp",p2,JOB->cfg.verbose); // colored should be better
|
521
|
-
}
|
522
|
-
return detect_lines1(p,x0-0*1,y0-0*2,dx+0*2,dy+0*3);
|
523
|
-
|
524
|
-
/*
|
525
|
-
struct tlines *lines = &JOB->res.lines;
|
526
|
-
i=lines->num; lines->num++;
|
527
|
-
lines->m1[i]=y0; lines->m2[i]=y0+5*dy/16;
|
528
|
-
lines->m3[i]=y0+12*dy/16; lines->m4[i]=y0+dy-1;
|
529
|
-
lines->x0[i]=x0; lines->x1[i]=x0+dx-1;
|
530
|
-
if(JOB->cfg.verbose)fprintf(stderr," - line= %d",lines->num);
|
531
|
-
return 0;
|
532
|
-
*/
|
533
|
-
}
|
534
|
-
|
535
|
-
/* ToDo: herons algorithm for square root x=(x+y/x)/2 is more efficient
|
536
|
-
* than interval subdivision (?) (germ.: Intervallschachtelung)
|
537
|
-
* without using matlib
|
538
|
-
* see http://www.math.vt.edu/people/brown/doc/sqrts.pdf
|
539
|
-
*/
|
540
|
-
int my_sqrt(int x){
|
541
|
-
int y0=0,y1=x,ym;
|
542
|
-
for (;y0<y1-1;){
|
543
|
-
ym=(y0+y1)/2;
|
544
|
-
if (ym*ym<x) y0=ym; else y1=ym;
|
545
|
-
}
|
546
|
-
return y0;
|
547
|
-
}
|
548
|
-
|
549
|
-
/*
|
550
|
-
** Detect rotation angle (one for whole image)
|
551
|
-
** old: longest text-line and determining the angle of this line.
|
552
|
-
*
|
553
|
-
* search right nearest neighbour of each box and average vectors
|
554
|
-
* to get the text orientation,
|
555
|
-
* upside down decision is not made here (I dont know how to do it)
|
556
|
-
* ToDo: set job->res.lines.{dx,dy}
|
557
|
-
* pass 1: get mean vector to nearest char
|
558
|
-
* pass 2: get mean vector to nearest char without outriders to pass 1
|
559
|
-
* extimate direction as (dx,dy,num)[pass]
|
560
|
-
* ToDo: estimate an error, boxes only work fine for zero-rotation
|
561
|
-
* for 45 degree use vectors, not boxes to get base line
|
562
|
-
*/
|
563
|
-
#define INorm 1024 /* integer unit 1.0 */
|
564
|
-
int detect_rotation_angle(job_t *job){
|
565
|
-
struct box *box2, *box3,
|
566
|
-
*box_nn; /* nearest neighbour box */
|
567
|
-
int x2, y2, x3, y3, dist, mindist, pass,
|
568
|
-
rx=0, ry=0, re=0, // final result
|
569
|
-
/* to avoid 2nd run, wie store pairs in 2 different categories */
|
570
|
-
nn[4]={0,0,0,0}, /* num_pairs used for estimation [(pass-1)%2,pass%2] */
|
571
|
-
dx[4]={0,0,0,0}, /* x-component of rotation vector per pass */
|
572
|
-
dy[4]={0,0,0,0}, /* y-component of rotation vector per pass */
|
573
|
-
er[4]={INorm/4,0,0,0}; /* mean angle deviation to pass-1 (radius^2) */
|
574
|
-
// de; /* ToDo: absolute maximum error (dx^2+dy^2) */
|
575
|
-
// ToDo: next pass: go to bigger distances and reduce max error
|
576
|
-
// error is diff between passes? or diff of bottoms and top borders (?)
|
577
|
-
|
578
|
-
rx=1024; ry=0; // default
|
579
|
-
for (pass=0;pass<4;pass++) {
|
580
|
-
for_each_data(&(job->res.boxlist)) {
|
581
|
-
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
582
|
-
if (box2->c==PICTURE) continue;
|
583
|
-
/* subfunction probability of char */
|
584
|
-
// i?
|
585
|
-
// if (box2->x1 - box2->x0 < 3) continue; /* smallest font is 4x6 */
|
586
|
-
if (box2->y1 - box2->y0 < 4) continue;
|
587
|
-
/* set maximum possible distance */
|
588
|
-
box_nn=box2; // initial box to compare with
|
589
|
-
|
590
|
-
// ToDo: clustering or majority
|
591
|
-
// the algorithm is far from being perfect, pitfalls are likely
|
592
|
-
// but its better than the old algorithm, ToDo: database-rotated-images
|
593
|
-
mindist = job->src.p.x * job->src.p.x + job->src.p.y * job->src.p.y;
|
594
|
-
/* get middle point of the box */
|
595
|
-
x2 = (box2->x0 + box2->x1)/2;
|
596
|
-
y2 = (box2->y0 + box2->y1)/2;
|
597
|
-
re=0;
|
598
|
-
/* search for nearest neighbour box_nn[pass+1] of box_nn[pass] */
|
599
|
-
for_each_data(&(job->res.boxlist)) {
|
600
|
-
box3 = (struct box *)list_get_current(&(job->res.boxlist));
|
601
|
-
/* try to select only potential neighbouring chars */
|
602
|
-
/* select out all senseless combinations */
|
603
|
-
if (box3->c==PICTURE || box3==box2) continue;
|
604
|
-
x3 = (box3->x0 + box3->x1)/2;
|
605
|
-
y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
|
606
|
-
if (x3<x2) continue; /* simplify by going right only */
|
607
|
-
// through-away deviation of angles if > pass-1?
|
608
|
-
// scalprod max in direction, cross prod min in direction
|
609
|
-
// a,b (vectors): <a,b>^2/(|a|*|b|)^2 = 0(90deg)..0.5(45deg).. 1(0deg)
|
610
|
-
// * 1024 ??
|
611
|
-
if (pass>0) { // new variant = scalar product
|
612
|
-
// danger of int overflow, ToDo: use int fraction
|
613
|
-
re =(int) ((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
|
614
|
-
*(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
|
615
|
-
/(1.*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
|
616
|
-
*(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1])));
|
617
|
-
if (INorm-re>er[pass-1]) continue; // hits mean deviation
|
618
|
-
}
|
619
|
-
/* neighbours should have same order of size (?) */
|
620
|
-
if (3*(box3->y1-box3->y0+4) < 2*(box2->y1-box2->y0+1)) continue;
|
621
|
-
if (2*(box3->y1-box3->y0+1) > 3*(box2->y1-box2->y0+4)) continue;
|
622
|
-
if (2*(box3->x1-box3->x0+1) > 5*(box2->x1-box2->x0+4)) continue;
|
623
|
-
if (5*(box3->x1-box3->x0+4) < 2*(box2->x1-box2->x0+1)) continue;
|
624
|
-
/* should be in right range, Idea: center3 outside box2? noholes */
|
625
|
-
if ((x3<box2->x1-1) && (x3>box2->x0+1)
|
626
|
-
&& (y3<box2->y1-1) && (y3>box2->y0+1)) continue;
|
627
|
-
// if chars are of different size, connect careful
|
628
|
-
if ( abs(x3-x2) > 2*(box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
|
629
|
-
if ( abs(y3-y2) > (box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
|
630
|
-
dist = (y3-y2)*(y3-y2) + (x3-x2)*(x3-x2);
|
631
|
-
// make distances in pass-1 directions shorter or continue if not in pass-1 range?
|
632
|
-
if (dist<9) continue; /* minimum distance^2 is 3^2 */
|
633
|
-
if (dist<mindist) { mindist=dist; box_nn=box3;}
|
634
|
-
// fprintf(stderr,"x y %d %d %d %d dist %d min %d\n",
|
635
|
-
// x2,y2,x3,y3,dist,mindist);
|
636
|
-
} end_for_each(&(job->res.boxlist));
|
637
|
-
|
638
|
-
if (box_nn==box2) continue; /* has no neighbour, next box */
|
639
|
-
|
640
|
-
box3=box_nn; dist=mindist;
|
641
|
-
x3 = (box3->x0 + box3->x1)/2;
|
642
|
-
y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
|
643
|
-
// dist = my_sqrt(1024*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2)));
|
644
|
-
// compare with first box
|
645
|
-
x2 = (box2->x0 + box2->x1)/2;
|
646
|
-
y2 = (box2->y0 + box2->y1)/2;
|
647
|
-
// if the high of neighbouring boxes differ, use min diff (y0,y1)
|
648
|
-
if (pass>0 && 16*abs(dy[pass-1]) < dx[pass-1]) // dont work for strong rot.
|
649
|
-
if (abs(box2->y1-box2->y0-box3->y1+box3->y0)>(box2->y1-box2->y0)/8) {
|
650
|
-
// ad eh ck ...
|
651
|
-
if (abs(box2->y1-box3->y1)<abs(y3-y2)) { y2=box2->y1; y3=box3->y1; }
|
652
|
-
// ag ep qu ...
|
653
|
-
if (abs(box2->y0-box3->y0)<abs(y3-y2)) { y2=box2->y0; y3=box3->y0; }
|
654
|
-
}
|
655
|
-
if (abs(x3-x2)<4) continue;
|
656
|
-
dx[pass]+=(x3-x2)*1024; /* normalized before averaging */
|
657
|
-
dy[pass]+=(y3-y2)*1024; /* 1024 is for the precision */
|
658
|
-
nn[pass]++;
|
659
|
-
if (pass>0) { // set error = mean deviation from pass -1
|
660
|
-
re = INorm-(int)((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
|
661
|
-
*(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
|
662
|
-
/((1.*(x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
|
663
|
-
*(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1]))
|
664
|
-
);
|
665
|
-
er[pass]+=re;
|
666
|
-
}
|
667
|
-
#if 0
|
668
|
-
if(JOB->cfg.verbose)
|
669
|
-
fprintf(stderr,"# next nb (x,y,dx,dy,re) %6d %6d %5d %5d %5d pass %d\n",
|
670
|
-
x2, y2, x3-x2, y3-y2, re, pass+1);
|
671
|
-
#endif
|
672
|
-
} end_for_each(&(job->res.boxlist));
|
673
|
-
if (!nn[pass]) break;
|
674
|
-
if (nn[pass]) {
|
675
|
-
/* meanvalues */
|
676
|
-
rx=dx[pass]/=nn[pass];
|
677
|
-
ry=dy[pass]/=nn[pass];
|
678
|
-
if (pass>0) er[pass]/=nn[pass];
|
679
|
-
}
|
680
|
-
if(JOB->cfg.verbose)
|
681
|
-
fprintf(stderr,"# rotation angle (x,y,maxr,num)"
|
682
|
-
" %6d %6d %6d %4d pass %d\n",
|
683
|
-
rx, ry, er[pass], nn[pass], pass+1);
|
684
|
-
}
|
685
|
-
if (abs(ry*100)>abs(rx*50))
|
686
|
-
fprintf(stderr,"<!-- gocr will fail, strong rotation angle detected -->\n");
|
687
|
-
/* ToDo: normalize to 2^10 bit (square fits to 32 it) */
|
688
|
-
JOB->res.lines.dx=rx;
|
689
|
-
JOB->res.lines.dy=ry;
|
690
|
-
return 0;
|
691
|
-
}
|
692
|
-
|
693
|
-
/* ----- detect lines --------------- */
|
694
|
-
int detect_text_lines(pix * pp, int mo) {
|
695
|
-
|
696
|
-
if (JOB->cfg.verbose)
|
697
|
-
fprintf(stderr, "# detect.c detect_text_lines (vvv=16 for more info) ");
|
698
|
-
if (mo & 4){
|
699
|
-
if (JOB->cfg.verbose) fprintf(stderr, "# zoning\n# ... ");
|
700
|
-
detect_lines2(pp, 0, 0, pp->x, pp->y, 0); // later replaced by better algo
|
701
|
-
} else
|
702
|
-
detect_lines1(pp, 0, 0, pp->x, pp->y); // old algo
|
703
|
-
|
704
|
-
if(JOB->cfg.verbose) fprintf(stderr,"\n");
|
705
|
-
return 0;
|
706
|
-
}
|
707
|
-
|
708
|
-
|
709
|
-
/* ----- adjust lines --------------- */
|
710
|
-
// rotation angle? JOB->res.lines.dy, .x0 removed later
|
711
|
-
// this is for cases, where m1..m4 is not very sure detected before
|
712
|
-
// chars are recognized
|
713
|
-
int adjust_text_lines(pix * pp, int mo) {
|
714
|
-
struct box *box2;
|
715
|
-
int *m, /* summ m1..m4, num_chars for m1..m4, min m1..m4, max. m1..m4 */
|
716
|
-
l, i, dy, dx, diff=0, y0, y1;
|
717
|
-
|
718
|
-
if ((l=JOB->res.lines.num)<2) return 0; // ???
|
719
|
-
if (JOB->cfg.verbose)
|
720
|
-
fprintf(stderr, "# adjust text lines ");
|
721
|
-
m=(int *)malloc(l*16*sizeof(int));
|
722
|
-
if (!m) { fprintf(stderr," malloc failed\n"); return 0;}
|
723
|
-
for (i=0;i<16*l;i++) m[i]=0; /* initialize */
|
724
|
-
dy=JOB->res.lines.dy; /* tan(alpha) of skewing */
|
725
|
-
dx=JOB->res.lines.dx; /* old: width of image */
|
726
|
-
// js: later skewing is replaced by one transformation of vectorized image
|
727
|
-
|
728
|
-
if (dx)
|
729
|
-
for_each_data(&(JOB->res.boxlist)) {
|
730
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
731
|
-
if (box2->line<=0) continue;
|
732
|
-
if (box2->num_ac<1) continue;
|
733
|
-
if (box2->wac[0]<95) continue;
|
734
|
-
if (box2->m2==0 || box2->y1<box2->m2) continue; // char outside line
|
735
|
-
if (box2->m3==4 || box2->y0>box2->m3) continue; // char outside line
|
736
|
-
y0=box2->y0-((box2->x1)*dy/dx); /* corrected by page skewing */
|
737
|
-
y1=box2->y1-((box2->x1)*dy/dx);
|
738
|
-
if (strchr("aemnr",(char)box2->tac[0])) { // cC vV sS oO ... is unsure!
|
739
|
-
m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
|
740
|
-
m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
|
741
|
-
if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
|
742
|
-
if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
|
743
|
-
if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
|
744
|
-
if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
|
745
|
-
}
|
746
|
-
if (strchr("bdhklABDEFGHIKLMNRT123456789",(char)box2->tac[0])) {
|
747
|
-
m[box2->line*16+0]+=y0; m[box2->line*16+4]++; // num m1
|
748
|
-
m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
|
749
|
-
if (m[box2->line*16+ 8]>y0) m[box2->line*16+ 8]=y0; /* min m1 */
|
750
|
-
if (m[box2->line*16+12]<y0) m[box2->line*16+12]=y0; /* max m1 */
|
751
|
-
if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
|
752
|
-
if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
|
753
|
-
}
|
754
|
-
if (strchr("gq",(char)box2->tac[0])) {
|
755
|
-
m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
|
756
|
-
m[box2->line*16+3]+=y1; m[box2->line*16+7]++; // num m4
|
757
|
-
if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
|
758
|
-
if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
|
759
|
-
if (m[box2->line*16+11]>y1) m[box2->line*16+11]=y1; /* min m4 */
|
760
|
-
if (m[box2->line*16+15]<y1) m[box2->line*16+15]=y1; /* max m4 */
|
761
|
-
}
|
762
|
-
} end_for_each(&(JOB->res.boxlist));
|
763
|
-
|
764
|
-
for (i=1;i<l;i++) {
|
765
|
-
diff=0; // show diff per line
|
766
|
-
if (m[i*16+4]) diff+=abs(JOB->res.lines.m1[i]-m[i*16+0]/m[i*16+4]);
|
767
|
-
if (m[i*16+5]) diff+=abs(JOB->res.lines.m2[i]-m[i*16+1]/m[i*16+5]);
|
768
|
-
if (m[i*16+6]) diff+=abs(JOB->res.lines.m3[i]-m[i*16+2]/m[i*16+6]);
|
769
|
-
if (m[i*16+7]) diff+=abs(JOB->res.lines.m4[i]-m[i*16+3]/m[i*16+7]);
|
770
|
-
/* recalculate sureness, empirically */
|
771
|
-
if (m[i*16+4]*m[i*16+5]*m[i*16+6]*m[i*16+7] > 0)
|
772
|
-
JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]+100)/2;
|
773
|
-
else
|
774
|
-
JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]*90)/100;
|
775
|
-
// set mean values of sure detected bounds (rounded precisely)
|
776
|
-
if ( m[i*16+4]) JOB->res.lines.m1[i]=(m[i*16+0]+m[i*16+4]/2)/m[i*16+4];
|
777
|
-
if ( m[i*16+5]) JOB->res.lines.m2[i]=(m[i*16+1]+m[i*16+5]/2)/m[i*16+5];
|
778
|
-
if ( m[i*16+6]) JOB->res.lines.m3[i]=(m[i*16+2]+m[i*16+6]/2)/m[i*16+6];
|
779
|
-
if ( m[i*16+7]) JOB->res.lines.m4[i]=(m[i*16+3]+m[i*16+7]/2)/m[i*16+7];
|
780
|
-
// care about very small fonts
|
781
|
-
if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+5]==0 && m[i*16+4])
|
782
|
-
JOB->res.lines.m2[i]=JOB->res.lines.m1[i]+2;
|
783
|
-
if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+4]==0 && m[i*16+5])
|
784
|
-
JOB->res.lines.m1[i]=JOB->res.lines.m2[i]-2;
|
785
|
-
if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+7]==0 && m[i*16+6])
|
786
|
-
JOB->res.lines.m4[i]=JOB->res.lines.m3[i]+2;
|
787
|
-
if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+6]==0 && m[i*16+7])
|
788
|
-
JOB->res.lines.m3[i]=JOB->res.lines.m4[i]-2;
|
789
|
-
if ( m[i*16+7]<1 &&
|
790
|
-
JOB->res.lines.m4[i]
|
791
|
-
<=JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4 )
|
792
|
-
JOB->res.lines.m4[i]=
|
793
|
-
JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4;
|
794
|
-
if ( m[i*16+7]<1 && m[i*16+12+2]>0 && // m4 < max.m3+..
|
795
|
-
JOB->res.lines.m4[i] < 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2 )
|
796
|
-
JOB->res.lines.m4[i] = 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2;
|
797
|
-
if (JOB->res.lines.m4[i]<=JOB->res.lines.m3[i])
|
798
|
-
JOB->res.lines.m4[i]= JOB->res.lines.m3[i]+1; /* 4x6 */
|
799
|
-
|
800
|
-
if (JOB->cfg.verbose & 17)
|
801
|
-
fprintf(stderr, "\n# line= %3d m= %4d %+3d %+3d %+3d "
|
802
|
-
" n= %2d %2d %2d %2d w= %3d diff= %d",
|
803
|
-
i, JOB->res.lines.m1[i],
|
804
|
-
JOB->res.lines.m2[i] - JOB->res.lines.m1[i],
|
805
|
-
JOB->res.lines.m3[i] - JOB->res.lines.m1[i],
|
806
|
-
JOB->res.lines.m4[i] - JOB->res.lines.m1[i],
|
807
|
-
m[i*16+4],m[i*16+5],m[i*16+6],m[i*16+7],
|
808
|
-
JOB->res.lines.wt[i], diff);
|
809
|
-
}
|
810
|
-
diff=0; // count adjusted chars
|
811
|
-
#if 1
|
812
|
-
if (dx)
|
813
|
-
for_each_data(&(JOB->res.boxlist)) {
|
814
|
-
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
815
|
-
if (box2->line<=0) continue;
|
816
|
-
/* check if box was on the wrong line, ToDo: search a better line */
|
817
|
-
if (2*box2->y0<2*JOB->res.lines.m1[box2->line]
|
818
|
-
-JOB->res.lines.m4[box2->line]
|
819
|
-
+JOB->res.lines.m1[box2->line]) box2->line=0;
|
820
|
-
if (2*box2->y1>2*JOB->res.lines.m4[box2->line]
|
821
|
-
+JOB->res.lines.m4[box2->line]
|
822
|
-
-JOB->res.lines.m1[box2->line]) box2->line=0;
|
823
|
-
/* do adjustments */
|
824
|
-
if (box2->num_ac>0
|
825
|
-
&& box2->num_ac > 31 && box2->tac[0] < 127 /* islower(>256) may SIGSEGV */
|
826
|
-
&& strchr("cCoOpPsSuUvVwWxXyYzZ",(char)box2->tac[0])) { // no_wchar
|
827
|
-
if (box2->y0-((box2->x1)*dy/dx)
|
828
|
-
< (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line])/2
|
829
|
-
&& islower(box2->tac[0])
|
830
|
-
) { setac(box2,toupper((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
|
831
|
-
if (box2->y0-((box2->x1)*dy/dx)
|
832
|
-
> (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line]+1)/2
|
833
|
-
&& isupper(box2->tac[0])
|
834
|
-
){ setac(box2,tolower((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
|
835
|
-
}
|
836
|
-
box2->m1=JOB->res.lines.m1[box2->line]+((box2->x1)*dy/dx);
|
837
|
-
box2->m2=JOB->res.lines.m2[box2->line]+((box2->x1)*dy/dx);
|
838
|
-
box2->m3=JOB->res.lines.m3[box2->line]+((box2->x1)*dy/dx);
|
839
|
-
box2->m4=JOB->res.lines.m4[box2->line]+((box2->x1)*dy/dx);
|
840
|
-
} end_for_each(&(JOB->res.boxlist));
|
841
|
-
#endif
|
842
|
-
|
843
|
-
free(m);
|
844
|
-
if(JOB->cfg.verbose) fprintf(stderr,"\n# changed_chars= %d\n",diff);
|
845
|
-
return(diff);
|
846
|
-
}
|
847
|
-
|
848
|
-
/* ---- measure mean character
|
849
|
-
* recalculate mean width and high after changes in boxlist
|
850
|
-
* ToDo: only within a Range?
|
851
|
-
*/
|
852
|
-
int calc_average() {
|
853
|
-
int i = 0, x0, y0, x1, y1;
|
854
|
-
struct box *box4;
|
855
|
-
|
856
|
-
JOB->res.numC = 0;
|
857
|
-
JOB->res.sumY = 0;
|
858
|
-
JOB->res.sumX = 0;
|
859
|
-
for_each_data(&(JOB->res.boxlist)) {
|
860
|
-
box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
861
|
-
if( box4->c != PICTURE ){
|
862
|
-
x0 = box4->x0; x1 = box4->x1;
|
863
|
-
y0 = box4->y0; y1 = box4->y1;
|
864
|
-
i++;
|
865
|
-
if (JOB->res.avX * JOB->res.avY > 0) {
|
866
|
-
if (x1 - x0 + 1 > 4 * JOB->res.avX
|
867
|
-
&& y1 - y0 + 1 > 4 * JOB->res.avY) continue; /* small picture */
|
868
|
-
if (4 * (y1 - y0 + 1) < JOB->res.avY || y1 - y0 < 2)
|
869
|
-
continue; // dots .,-_ etc.
|
870
|
-
}
|
871
|
-
if (x1 - x0 + 1 < 4
|
872
|
-
&& y1 - y0 + 1 < 6 ) continue; /* dots etc */
|
873
|
-
JOB->res.sumX += x1 - x0 + 1;
|
874
|
-
JOB->res.sumY += y1 - y0 + 1;
|
875
|
-
JOB->res.numC++;
|
876
|
-
}
|
877
|
-
} end_for_each(&(JOB->res.boxlist));
|
878
|
-
if ( JOB->res.numC ) { /* avoid div 0 */
|
879
|
-
JOB->res.avY = (JOB->res.sumY+JOB->res.numC/2) / JOB->res.numC;
|
880
|
-
JOB->res.avX = (JOB->res.sumX+JOB->res.numC/2) / JOB->res.numC;
|
881
|
-
}
|
882
|
-
if (JOB->cfg.verbose){
|
883
|
-
fprintf(stderr, "# averages: mXmY= %d %d nC= %d n= %d\n",
|
884
|
-
JOB->res.avX, JOB->res.avY, JOB->res.numC, i);
|
885
|
-
}
|
886
|
-
return 0;
|
887
|
-
}
|
888
|
-
|
889
|
-
|
890
|
-
/* ---- analyse boxes, find pictures and mark (do this first!!!)
|
891
|
-
*/
|
892
|
-
int detect_pictures(job_t *job) {
|
893
|
-
int i = 0, x0, y0, x1, y1, num_h;
|
894
|
-
struct box *box2, *box4;
|
895
|
-
|
896
|
-
if ( job->res.numC == 0 ) {
|
897
|
-
if (job->cfg.verbose) fprintf(stderr,
|
898
|
-
"# detect.c L%d Warning: numC=0\n", __LINE__);
|
899
|
-
return -1;
|
900
|
-
}
|
901
|
-
/* ToDo: set Y to uppercase mean value? */
|
902
|
-
job->res.avY = (job->res.sumY+job->res.numC/2) / job->res.numC;
|
903
|
-
job->res.avX = (job->res.sumX+job->res.numC/2) / job->res.numC;
|
904
|
-
/* ToDo: two highest volumes? crosses, on extreme volume + on border */
|
905
|
-
if (job->cfg.verbose)
|
906
|
-
fprintf(stderr, "# detect.c L%d pictures, frames, mXmY= %d %d ... ",
|
907
|
-
__LINE__, job->res.avX, job->res.avY);
|
908
|
-
for_each_data(&(job->res.boxlist)) {
|
909
|
-
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
910
|
-
if (box2->c == PICTURE) continue;
|
911
|
-
x0 = box2->x0; x1 = box2->x1;
|
912
|
-
y0 = box2->y0; y1 = box2->y1;
|
913
|
-
|
914
|
-
/* pictures could be of unusual size */
|
915
|
-
if (x1 - x0 + 1 > 4 * job->res.avX || y1 - y0 + 1 > 4 * job->res.avY) {
|
916
|
-
/* count objects on same baseline which could be chars */
|
917
|
-
/* else: big headlines could be misinterpreted as pictures */
|
918
|
-
num_h=0;
|
919
|
-
for_each_data(&(job->res.boxlist)) {
|
920
|
-
box4 = (struct box *)list_get_current(&(job->res.boxlist));
|
921
|
-
if (box4->c == PICTURE) continue;
|
922
|
-
if (box4->y1-box4->y0 > 2*(y1-y0)) continue;
|
923
|
-
if (2*(box4->y1-box4->y0) < y1-y0) continue;
|
924
|
-
if (box4->y0 > y0 + (y1-y0+1)/2
|
925
|
-
|| box4->y0 < y0 - (y1-y0+1)/2
|
926
|
-
|| box4->y1 > y1 + (y1-y0+1)/2
|
927
|
-
|| box4->y1 < y1 - (y1-y0+1)/2) continue;
|
928
|
-
// ToDo: continue if numcross() only 1, example: |||IIIll|||
|
929
|
-
num_h++;
|
930
|
-
} end_for_each(&(job->res.boxlist));
|
931
|
-
if (num_h>4) continue;
|
932
|
-
box2->c = PICTURE;
|
933
|
-
i++;
|
934
|
-
}
|
935
|
-
/* ToDo: pictures could have low contrast=Sum((pixel(p,x,y)-160)^2) */
|
936
|
-
} end_for_each(&(job->res.boxlist));
|
937
|
-
// start second iteration
|
938
|
-
if (job->cfg.verbose) {
|
939
|
-
fprintf(stderr, " %d - boxes %d\n", i, job->res.numC-i);
|
940
|
-
}
|
941
|
-
calc_average();
|
942
|
-
return 0;
|
943
|
-
}
|