isbn 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/README +9 -0
- data/Rakefile +13 -0
- data/VERSION +1 -0
- data/isbn.gemspec +329 -0
- data/lib/isbn.rb +90 -0
- data/src/gocr-0.48/.cvsignore +6 -0
- data/src/gocr-0.48/AUTHORS +7 -0
- data/src/gocr-0.48/BUGS +55 -0
- data/src/gocr-0.48/CREDITS +17 -0
- data/src/gocr-0.48/HISTORY +243 -0
- data/src/gocr-0.48/INSTALL +83 -0
- data/src/gocr-0.48/Makefile +193 -0
- data/src/gocr-0.48/Makefile.in +193 -0
- data/src/gocr-0.48/README +165 -0
- data/src/gocr-0.48/READMEde.txt +80 -0
- data/src/gocr-0.48/REMARK.txt +18 -0
- data/src/gocr-0.48/REVIEW +538 -0
- data/src/gocr-0.48/TODO +65 -0
- data/src/gocr-0.48/bin/.cvsignore +2 -0
- data/src/gocr-0.48/bin/create_db +38 -0
- data/src/gocr-0.48/bin/gocr.tcl +527 -0
- data/src/gocr-0.48/bin/gocr_chk.sh +44 -0
- data/src/gocr-0.48/configure +4689 -0
- data/src/gocr-0.48/configure.in +71 -0
- data/src/gocr-0.48/doc/.#Makefile.1.6 +39 -0
- data/src/gocr-0.48/doc/.cvsignore +2 -0
- data/src/gocr-0.48/doc/Makefile +39 -0
- data/src/gocr-0.48/doc/Makefile.in +39 -0
- data/src/gocr-0.48/doc/example.dtd +53 -0
- data/src/gocr-0.48/doc/example.xml +21 -0
- data/src/gocr-0.48/doc/examples.txt +67 -0
- data/src/gocr-0.48/doc/gocr.html +578 -0
- data/src/gocr-0.48/doc/unicode.txt +57 -0
- data/src/gocr-0.48/examples/.#Makefile.1.22 +166 -0
- data/src/gocr-0.48/examples/4x6.png +0 -0
- data/src/gocr-0.48/examples/4x6.txt +2 -0
- data/src/gocr-0.48/examples/5x7.png +0 -0
- data/src/gocr-0.48/examples/5x7.png.txt +2 -0
- data/src/gocr-0.48/examples/5x8.png +0 -0
- data/src/gocr-0.48/examples/5x8.png.txt +2 -0
- data/src/gocr-0.48/examples/Makefile +166 -0
- data/src/gocr-0.48/examples/color.fig +20 -0
- data/src/gocr-0.48/examples/ex.fig +16 -0
- data/src/gocr-0.48/examples/font.tex +22 -0
- data/src/gocr-0.48/examples/font1.tex +46 -0
- data/src/gocr-0.48/examples/font2.fig +27 -0
- data/src/gocr-0.48/examples/font_nw.tex +24 -0
- data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
- data/src/gocr-0.48/examples/handwrt1.txt +10 -0
- data/src/gocr-0.48/examples/inverse.fig +20 -0
- data/src/gocr-0.48/examples/matrix.jpg +0 -0
- data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
- data/src/gocr-0.48/examples/ocr-a-subset.png.txt +4 -0
- data/src/gocr-0.48/examples/ocr-a.png +0 -0
- data/src/gocr-0.48/examples/ocr-a.txt +6 -0
- data/src/gocr-0.48/examples/ocr-b.png +0 -0
- data/src/gocr-0.48/examples/ocr-b.png.txt +4 -0
- data/src/gocr-0.48/examples/polish.tex +28 -0
- data/src/gocr-0.48/examples/rotate45.fig +14 -0
- data/src/gocr-0.48/examples/score +36 -0
- data/src/gocr-0.48/examples/text.tex +28 -0
- data/src/gocr-0.48/gocr.spec +143 -0
- data/src/gocr-0.48/gpl.html +537 -0
- data/src/gocr-0.48/include/.cvsignore +2 -0
- data/src/gocr-0.48/include/config.h +36 -0
- data/src/gocr-0.48/include/config.h.in +36 -0
- data/src/gocr-0.48/include/version.h +2 -0
- data/src/gocr-0.48/install-sh +3 -0
- data/src/gocr-0.48/make.bat +57 -0
- data/src/gocr-0.48/man/.cvsignore +2 -0
- data/src/gocr-0.48/man/Makefile +29 -0
- data/src/gocr-0.48/man/Makefile.in +29 -0
- data/src/gocr-0.48/man/man1/gocr.1 +166 -0
- data/src/gocr-0.48/src/.cvsignore +4 -0
- data/src/gocr-0.48/src/Makefile +132 -0
- data/src/gocr-0.48/src/Makefile.in +132 -0
- data/src/gocr-0.48/src/amiga.h +31 -0
- data/src/gocr-0.48/src/barcode.c +846 -0
- data/src/gocr-0.48/src/barcode.c.orig +593 -0
- data/src/gocr-0.48/src/barcode.h +11 -0
- data/src/gocr-0.48/src/box.c +372 -0
- data/src/gocr-0.48/src/database.c +462 -0
- data/src/gocr-0.48/src/detect.c +943 -0
- data/src/gocr-0.48/src/gocr.c +373 -0
- data/src/gocr-0.48/src/gocr.h +288 -0
- data/src/gocr-0.48/src/jconv.c +168 -0
- data/src/gocr-0.48/src/job.c +84 -0
- data/src/gocr-0.48/src/lines.c +350 -0
- data/src/gocr-0.48/src/list.c +334 -0
- data/src/gocr-0.48/src/list.h +90 -0
- data/src/gocr-0.48/src/ocr0.c +6756 -0
- data/src/gocr-0.48/src/ocr0.h +63 -0
- data/src/gocr-0.48/src/ocr0n.c +1475 -0
- data/src/gocr-0.48/src/ocr1.c +85 -0
- data/src/gocr-0.48/src/ocr1.h +3 -0
- data/src/gocr-0.48/src/otsu.c +289 -0
- data/src/gocr-0.48/src/otsu.h +23 -0
- data/src/gocr-0.48/src/output.c +289 -0
- data/src/gocr-0.48/src/output.h +37 -0
- data/src/gocr-0.48/src/pcx.c +153 -0
- data/src/gocr-0.48/src/pcx.h +9 -0
- data/src/gocr-0.48/src/pgm2asc.c +2893 -0
- data/src/gocr-0.48/src/pgm2asc.h +105 -0
- data/src/gocr-0.48/src/pixel.c +537 -0
- data/src/gocr-0.48/src/pnm.c +533 -0
- data/src/gocr-0.48/src/pnm.h +35 -0
- data/src/gocr-0.48/src/progress.c +87 -0
- data/src/gocr-0.48/src/progress.h +42 -0
- data/src/gocr-0.48/src/remove.c +703 -0
- data/src/gocr-0.48/src/tga.c +87 -0
- data/src/gocr-0.48/src/tga.h +6 -0
- data/src/gocr-0.48/src/unicode.c +1314 -0
- data/src/gocr-0.48/src/unicode.h +1257 -0
- data/src/jpeg-7/Makefile.am +133 -0
- data/src/jpeg-7/Makefile.in +1089 -0
- data/src/jpeg-7/README +322 -0
- data/src/jpeg-7/aclocal.m4 +8990 -0
- data/src/jpeg-7/ansi2knr.1 +36 -0
- data/src/jpeg-7/ansi2knr.c +739 -0
- data/src/jpeg-7/cderror.h +132 -0
- data/src/jpeg-7/cdjpeg.c +181 -0
- data/src/jpeg-7/cdjpeg.h +187 -0
- data/src/jpeg-7/change.log +270 -0
- data/src/jpeg-7/cjpeg.1 +325 -0
- data/src/jpeg-7/cjpeg.c +616 -0
- data/src/jpeg-7/ckconfig.c +402 -0
- data/src/jpeg-7/coderules.txt +118 -0
- data/src/jpeg-7/config.guess +1561 -0
- data/src/jpeg-7/config.sub +1686 -0
- data/src/jpeg-7/configure +17139 -0
- data/src/jpeg-7/configure.ac +317 -0
- data/src/jpeg-7/depcomp +630 -0
- data/src/jpeg-7/djpeg.1 +251 -0
- data/src/jpeg-7/djpeg.c +617 -0
- data/src/jpeg-7/example.c +433 -0
- data/src/jpeg-7/filelist.txt +215 -0
- data/src/jpeg-7/install-sh +520 -0
- data/src/jpeg-7/install.txt +1097 -0
- data/src/jpeg-7/jaricom.c +148 -0
- data/src/jpeg-7/jcapimin.c +282 -0
- data/src/jpeg-7/jcapistd.c +161 -0
- data/src/jpeg-7/jcarith.c +921 -0
- data/src/jpeg-7/jccoefct.c +453 -0
- data/src/jpeg-7/jccolor.c +459 -0
- data/src/jpeg-7/jcdctmgr.c +482 -0
- data/src/jpeg-7/jchuff.c +1612 -0
- data/src/jpeg-7/jcinit.c +65 -0
- data/src/jpeg-7/jcmainct.c +293 -0
- data/src/jpeg-7/jcmarker.c +667 -0
- data/src/jpeg-7/jcmaster.c +770 -0
- data/src/jpeg-7/jcomapi.c +106 -0
- data/src/jpeg-7/jconfig.bcc +48 -0
- data/src/jpeg-7/jconfig.cfg +45 -0
- data/src/jpeg-7/jconfig.dj +38 -0
- data/src/jpeg-7/jconfig.mac +43 -0
- data/src/jpeg-7/jconfig.manx +43 -0
- data/src/jpeg-7/jconfig.mc6 +52 -0
- data/src/jpeg-7/jconfig.sas +43 -0
- data/src/jpeg-7/jconfig.st +42 -0
- data/src/jpeg-7/jconfig.txt +155 -0
- data/src/jpeg-7/jconfig.vc +45 -0
- data/src/jpeg-7/jconfig.vms +37 -0
- data/src/jpeg-7/jconfig.wat +38 -0
- data/src/jpeg-7/jcparam.c +632 -0
- data/src/jpeg-7/jcprepct.c +358 -0
- data/src/jpeg-7/jcsample.c +545 -0
- data/src/jpeg-7/jctrans.c +381 -0
- data/src/jpeg-7/jdapimin.c +396 -0
- data/src/jpeg-7/jdapistd.c +275 -0
- data/src/jpeg-7/jdarith.c +762 -0
- data/src/jpeg-7/jdatadst.c +151 -0
- data/src/jpeg-7/jdatasrc.c +212 -0
- data/src/jpeg-7/jdcoefct.c +736 -0
- data/src/jpeg-7/jdcolor.c +396 -0
- data/src/jpeg-7/jdct.h +393 -0
- data/src/jpeg-7/jddctmgr.c +382 -0
- data/src/jpeg-7/jdhuff.c +1309 -0
- data/src/jpeg-7/jdinput.c +384 -0
- data/src/jpeg-7/jdmainct.c +512 -0
- data/src/jpeg-7/jdmarker.c +1360 -0
- data/src/jpeg-7/jdmaster.c +663 -0
- data/src/jpeg-7/jdmerge.c +400 -0
- data/src/jpeg-7/jdpostct.c +290 -0
- data/src/jpeg-7/jdsample.c +361 -0
- data/src/jpeg-7/jdtrans.c +136 -0
- data/src/jpeg-7/jerror.c +252 -0
- data/src/jpeg-7/jerror.h +304 -0
- data/src/jpeg-7/jfdctflt.c +174 -0
- data/src/jpeg-7/jfdctfst.c +230 -0
- data/src/jpeg-7/jfdctint.c +4348 -0
- data/src/jpeg-7/jidctflt.c +242 -0
- data/src/jpeg-7/jidctfst.c +368 -0
- data/src/jpeg-7/jidctint.c +5137 -0
- data/src/jpeg-7/jinclude.h +91 -0
- data/src/jpeg-7/jmemansi.c +167 -0
- data/src/jpeg-7/jmemdos.c +638 -0
- data/src/jpeg-7/jmemdosa.asm +379 -0
- data/src/jpeg-7/jmemmac.c +289 -0
- data/src/jpeg-7/jmemmgr.c +1118 -0
- data/src/jpeg-7/jmemname.c +276 -0
- data/src/jpeg-7/jmemnobs.c +109 -0
- data/src/jpeg-7/jmemsys.h +198 -0
- data/src/jpeg-7/jmorecfg.h +369 -0
- data/src/jpeg-7/jpegint.h +395 -0
- data/src/jpeg-7/jpeglib.h +1135 -0
- data/src/jpeg-7/jpegtran.1 +272 -0
- data/src/jpeg-7/jpegtran.c +546 -0
- data/src/jpeg-7/jquant1.c +856 -0
- data/src/jpeg-7/jquant2.c +1310 -0
- data/src/jpeg-7/jutils.c +179 -0
- data/src/jpeg-7/jversion.h +14 -0
- data/src/jpeg-7/libjpeg.map +4 -0
- data/src/jpeg-7/libjpeg.txt +3067 -0
- data/src/jpeg-7/ltmain.sh +8406 -0
- data/src/jpeg-7/makcjpeg.st +36 -0
- data/src/jpeg-7/makdjpeg.st +36 -0
- data/src/jpeg-7/makeadsw.vc6 +77 -0
- data/src/jpeg-7/makeasln.vc9 +33 -0
- data/src/jpeg-7/makecdep.vc6 +82 -0
- data/src/jpeg-7/makecdsp.vc6 +130 -0
- data/src/jpeg-7/makecmak.vc6 +159 -0
- data/src/jpeg-7/makecvcp.vc9 +186 -0
- data/src/jpeg-7/makeddep.vc6 +82 -0
- data/src/jpeg-7/makeddsp.vc6 +130 -0
- data/src/jpeg-7/makedmak.vc6 +159 -0
- data/src/jpeg-7/makedvcp.vc9 +186 -0
- data/src/jpeg-7/makefile.ansi +220 -0
- data/src/jpeg-7/makefile.bcc +291 -0
- data/src/jpeg-7/makefile.dj +226 -0
- data/src/jpeg-7/makefile.manx +220 -0
- data/src/jpeg-7/makefile.mc6 +255 -0
- data/src/jpeg-7/makefile.mms +224 -0
- data/src/jpeg-7/makefile.sas +258 -0
- data/src/jpeg-7/makefile.unix +234 -0
- data/src/jpeg-7/makefile.vc +217 -0
- data/src/jpeg-7/makefile.vms +142 -0
- data/src/jpeg-7/makefile.wat +239 -0
- data/src/jpeg-7/makejdep.vc6 +423 -0
- data/src/jpeg-7/makejdsp.vc6 +285 -0
- data/src/jpeg-7/makejdsw.vc6 +29 -0
- data/src/jpeg-7/makejmak.vc6 +425 -0
- data/src/jpeg-7/makejsln.vc9 +17 -0
- data/src/jpeg-7/makejvcp.vc9 +328 -0
- data/src/jpeg-7/makeproj.mac +213 -0
- data/src/jpeg-7/makerdep.vc6 +6 -0
- data/src/jpeg-7/makerdsp.vc6 +78 -0
- data/src/jpeg-7/makermak.vc6 +110 -0
- data/src/jpeg-7/makervcp.vc9 +133 -0
- data/src/jpeg-7/maketdep.vc6 +43 -0
- data/src/jpeg-7/maketdsp.vc6 +122 -0
- data/src/jpeg-7/maketmak.vc6 +131 -0
- data/src/jpeg-7/maketvcp.vc9 +178 -0
- data/src/jpeg-7/makewdep.vc6 +6 -0
- data/src/jpeg-7/makewdsp.vc6 +78 -0
- data/src/jpeg-7/makewmak.vc6 +110 -0
- data/src/jpeg-7/makewvcp.vc9 +133 -0
- data/src/jpeg-7/makljpeg.st +68 -0
- data/src/jpeg-7/maktjpeg.st +30 -0
- data/src/jpeg-7/makvms.opt +4 -0
- data/src/jpeg-7/missing +376 -0
- data/src/jpeg-7/rdbmp.c +439 -0
- data/src/jpeg-7/rdcolmap.c +253 -0
- data/src/jpeg-7/rdgif.c +38 -0
- data/src/jpeg-7/rdjpgcom.1 +63 -0
- data/src/jpeg-7/rdjpgcom.c +515 -0
- data/src/jpeg-7/rdppm.c +459 -0
- data/src/jpeg-7/rdrle.c +387 -0
- data/src/jpeg-7/rdswitch.c +365 -0
- data/src/jpeg-7/rdtarga.c +500 -0
- data/src/jpeg-7/structure.txt +945 -0
- data/src/jpeg-7/testimg.bmp +0 -0
- data/src/jpeg-7/testimg.jpg +0 -0
- data/src/jpeg-7/testimg.ppm +4 -0
- data/src/jpeg-7/testimgp.jpg +0 -0
- data/src/jpeg-7/testorig.jpg +0 -0
- data/src/jpeg-7/testprog.jpg +0 -0
- data/src/jpeg-7/transupp.c +1533 -0
- data/src/jpeg-7/transupp.h +205 -0
- data/src/jpeg-7/usage.txt +605 -0
- data/src/jpeg-7/wizard.txt +211 -0
- data/src/jpeg-7/wrbmp.c +442 -0
- data/src/jpeg-7/wrgif.c +399 -0
- data/src/jpeg-7/wrjpgcom.1 +103 -0
- data/src/jpeg-7/wrjpgcom.c +583 -0
- data/src/jpeg-7/wrppm.c +269 -0
- data/src/jpeg-7/wrrle.c +305 -0
- data/src/jpeg-7/wrtarga.c +253 -0
- data/test/isbn_test.rb +7 -0
- data/test/test_helper.rb +7 -0
- metadata +345 -0
@@ -0,0 +1,372 @@
|
|
1
|
+
/*
|
2
|
+
This is a Optical-Character-Recognition program
|
3
|
+
Copyright (C) 2000-2009 Joerg Schulenburg
|
4
|
+
|
5
|
+
This program is free software; you can redistribute it and/or
|
6
|
+
modify it under the terms of the GNU General Public License
|
7
|
+
as published by the Free Software Foundation; either version 2
|
8
|
+
of the License, or (at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
18
|
+
|
19
|
+
see README for EMAIL address
|
20
|
+
|
21
|
+
*/
|
22
|
+
|
23
|
+
#include <stdio.h>
|
24
|
+
#include <stdlib.h>
|
25
|
+
#include <assert.h>
|
26
|
+
#include <string.h>
|
27
|
+
/* do we need #include <math.h>? conflicts with INFINITY in unicode.h */
|
28
|
+
#include "gocr.h"
|
29
|
+
#include "pgm2asc.h"
|
30
|
+
|
31
|
+
/* for sorting letters by position on the image
|
32
|
+
/ ToDo: - use function same line like this or include lines.m1 etc. */
|
33
|
+
int box_gt(struct box *box1, struct box *box2) {
|
34
|
+
// box1 after box2 ?
|
35
|
+
if (box1->line > box2->line)
|
36
|
+
return 1;
|
37
|
+
if (box1->line < box2->line)
|
38
|
+
return 0;
|
39
|
+
if (box1->x0 > box2->x1) // before
|
40
|
+
return 1;
|
41
|
+
if (box1->x1 < box2->x0) // before
|
42
|
+
return 0;
|
43
|
+
if (box1->x0 > box2->x0) // before, overlapping!
|
44
|
+
return 1;
|
45
|
+
|
46
|
+
return 0;
|
47
|
+
}
|
48
|
+
|
49
|
+
/* --- copy part of pix p into new pix b ---- len=10000
|
50
|
+
* Returns: 0 on success, 1 on error.
|
51
|
+
* naming it as copybox isnt very clever, because it dont have to do with the
|
52
|
+
* char boxes (struct box)
|
53
|
+
*/
|
54
|
+
int copybox (pix * p, int x0, int y0, int dx, int dy, pix * b, int len) {
|
55
|
+
int x, y;
|
56
|
+
|
57
|
+
/* test boundaries */
|
58
|
+
if (b->p == NULL || dx < 0 || dy < 0 || dx * dy > len) {
|
59
|
+
fprintf(stderr, " error-copybox x=%5d %5d d=%5d %5d\n", x0, y0, dx, dy);
|
60
|
+
return 1;
|
61
|
+
}
|
62
|
+
|
63
|
+
b->x = dx;
|
64
|
+
b->y = dy;
|
65
|
+
b->bpp = 1;
|
66
|
+
#ifdef FASTER_INCOMPLETE
|
67
|
+
for (y = 0; y < dy; y++)
|
68
|
+
memcpy(&pixel_atp(b, 0, y), &pixel_atp(p, x0, y + y0 ), dx);
|
69
|
+
// and unmark pixels
|
70
|
+
#else
|
71
|
+
for (y = 0; y < dy; y++)
|
72
|
+
for (x = 0; x < dx; x++)
|
73
|
+
pixel_atp(b, x, y) = getpixel(p, x + x0, y + y0);
|
74
|
+
#endif
|
75
|
+
|
76
|
+
return 0;
|
77
|
+
}
|
78
|
+
|
79
|
+
/* reset table of alternative chars (and free memory) */
|
80
|
+
int reset_box_ac(struct box *box){
|
81
|
+
int i;
|
82
|
+
for (i=0; i<box->num_ac; i++)
|
83
|
+
if (box->tas[i]) {
|
84
|
+
/* fprintf(stderr,"DBG free_s[%d] %p %s\n",i,box->tas[i],box->tas[i]); */
|
85
|
+
free(box->tas[i]);
|
86
|
+
box->tas[i]=0; /* prevent double freeing */
|
87
|
+
}
|
88
|
+
box->num_ac=0; /* mark as freed */
|
89
|
+
return 0;
|
90
|
+
}
|
91
|
+
|
92
|
+
/* ini or copy a box: get memory for box and initialize the memory */
|
93
|
+
struct box *malloc_box (struct box *inibox) {
|
94
|
+
struct box *buf;
|
95
|
+
int i;
|
96
|
+
|
97
|
+
buf = (struct box *) malloc(sizeof(struct box));
|
98
|
+
if (!buf)
|
99
|
+
return NULL;
|
100
|
+
if (inibox) {
|
101
|
+
memcpy(buf, inibox, sizeof(struct box));
|
102
|
+
/* only pointer are copied, we want to copy the contents too */
|
103
|
+
for (i=0;i<inibox->num_ac;i++) {
|
104
|
+
if (inibox->tas[i]) {
|
105
|
+
buf->tas[i]=(char *)malloc(strlen(inibox->tas[i])+1);
|
106
|
+
memcpy(buf->tas[i], inibox->tas[i], strlen(inibox->tas[i])+1);
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
else { /* ToDo: init it */
|
111
|
+
buf->num_ac=0;
|
112
|
+
buf->num_frames=0;
|
113
|
+
}
|
114
|
+
/* fprintf(stderr,"\nDBG ini_box %p",buf); */
|
115
|
+
return buf;
|
116
|
+
}
|
117
|
+
|
118
|
+
/* free memory of box */
|
119
|
+
int free_box (struct box *box) {
|
120
|
+
if (!box) return 0;
|
121
|
+
/* fprintf(stderr,"DBG free_box %p\n",box); out_x(box); */
|
122
|
+
reset_box_ac(box); /* free alternative char table */
|
123
|
+
free(box); /* free the box memory */
|
124
|
+
return 0;
|
125
|
+
}
|
126
|
+
|
127
|
+
/* simplify the vectorgraph,
|
128
|
+
* but what is the best way?
|
129
|
+
* a) melting two neighbouring vectors with nearly same direction?
|
130
|
+
* (nearest angle to pi)
|
131
|
+
* b) melting three neigbours with smallest area?
|
132
|
+
* ToDo:
|
133
|
+
* mode = 0 - only lossless
|
134
|
+
* mode = 1 - reduce one vector, smallest possible loss
|
135
|
+
* mode = 2 - remove jitter (todo, or somewhere else)
|
136
|
+
* ToDo: include also loop around (last - first element)
|
137
|
+
* ToDo: reduce by 10..50%
|
138
|
+
*/
|
139
|
+
int reduce_vectors ( struct box *box1, int mode ) {
|
140
|
+
int i1, i2, nx, ny, mx, my, len,
|
141
|
+
minlen=1024, /* minlength of to neighbouring vectors */
|
142
|
+
besti1=0, /* frame for best reduction */
|
143
|
+
besti2=2; /* vector replacing its predecessor */
|
144
|
+
double sprod, maxsprod=-1;
|
145
|
+
if (mode!=1) fprintf(stderr,"ERR not supported yet, ToDo\n");
|
146
|
+
for (i2=1,i1=0; i1<box1->num_frames; i1++) { /* every frame */
|
147
|
+
for (;i2<box1->num_frame_vectors[i1]-1; i2++) { /* every vector */
|
148
|
+
/* predecessor n */
|
149
|
+
nx = box1->frame_vector[i2-0][0] - box1->frame_vector[i2-1][0];
|
150
|
+
ny = box1->frame_vector[i2-0][1] - box1->frame_vector[i2-1][1];
|
151
|
+
/* successor m */
|
152
|
+
mx = box1->frame_vector[i2+1][0] - box1->frame_vector[i2-0][0];
|
153
|
+
my = box1->frame_vector[i2+1][1] - box1->frame_vector[i2-0][1];
|
154
|
+
/* angle is w = a*b/(|a|*|b|) = 1 means parallel */
|
155
|
+
/* normalized: minimize w^2 = (a*b/(|a|*|b|)-1)^2 */
|
156
|
+
/* -1=90grd, 0=0grd, -2=180grd */
|
157
|
+
sprod = /* fabs */(abs(nx*mx+ny*my)*(nx*mx+ny*my)
|
158
|
+
/(1.*(nx*nx+ny*ny)*(mx*mx+my*my))-1);
|
159
|
+
/* we dont include math.h because INFINITY conflicts to unicode,h */
|
160
|
+
if (sprod<0) sprod=-sprod;
|
161
|
+
len = (mx*mx+my*my)*(nx*nx+ny*ny); /* sum lengths^2 */
|
162
|
+
// ..c ###c ... .. ...
|
163
|
+
// .b. len=2+2 #b.. len=2+5 #bc len=1+2 bc len=1+1 b#a len=4+5
|
164
|
+
// a.. spr=0 a... spr=1/10 a.. spr=1/4 a. spr=1 ##c spr=9/5
|
165
|
+
//
|
166
|
+
if ( len* sprod* sprod* sprod* sprod
|
167
|
+
<minlen*maxsprod*maxsprod*maxsprod*maxsprod
|
168
|
+
|| maxsprod<0) /* Bad! ToDo! */
|
169
|
+
{ maxsprod=sprod; besti1=i1; besti2=i2; minlen=len; }
|
170
|
+
}
|
171
|
+
}
|
172
|
+
if (box1->num_frames>0)
|
173
|
+
for (i2=besti2; i2<box1->num_frame_vectors[ box1->num_frames-1 ]-1; i2++) {
|
174
|
+
box1->frame_vector[i2][0]=box1->frame_vector[i2+1][0];
|
175
|
+
box1->frame_vector[i2][1]=box1->frame_vector[i2+1][1];
|
176
|
+
}
|
177
|
+
for (i1=besti1; i1<box1->num_frames; i1++)
|
178
|
+
box1->num_frame_vectors[i1]--;
|
179
|
+
// fprintf(stderr,"\nDBG_reduce_vectors i= %d nv= %d sprod=%f len2=%d\n# ...",
|
180
|
+
// besti2,box1->num_frame_vectors[ box1->num_frames-1 ],maxsprod,minlen);
|
181
|
+
// out_x(box1);
|
182
|
+
return 0;
|
183
|
+
}
|
184
|
+
|
185
|
+
/* add the contents of box2 to box1
|
186
|
+
* especially add vectors of box2 to box1
|
187
|
+
*/
|
188
|
+
int merge_boxes( struct box *box1, struct box *box2 ) {
|
189
|
+
int i1, i2, i3, i4;
|
190
|
+
struct box tmpbox, *bsmaller, *bbigger; /* for mixing and sorting */
|
191
|
+
/* DEBUG, use valgrind to check uninitialized memory */
|
192
|
+
#if 0
|
193
|
+
fprintf(stderr,"\nDBG merge_boxes_input:"); out_x(box1); out_x(box2);
|
194
|
+
#endif
|
195
|
+
/* pair distance is to expendable, taking borders is easier */
|
196
|
+
if ((box2->x1 - box2->x0)*(box2->y1 - box2->y0)
|
197
|
+
>(box1->x1 - box1->x0)*(box1->y1 - box1->y0)) {
|
198
|
+
bbigger=box2; bsmaller=box1; }
|
199
|
+
else {
|
200
|
+
bbigger=box1; bsmaller=box2; }
|
201
|
+
/* ToDo: does not work if a third box is added */
|
202
|
+
if (box2->y0>box1->y1 || box2->y1<box1->y0
|
203
|
+
|| box2->x0>box1->x1 || box2->x1<box1->x0) {
|
204
|
+
box1->num_boxes += box2->num_boxes; /* num seperate objects 2=ij */
|
205
|
+
} else {
|
206
|
+
if (box2->num_boxes>box1->num_boxes) box1->num_boxes=box2->num_boxes;
|
207
|
+
box1->num_subboxes += box2->num_subboxes+1; /* num holes 1=abdepq 2=B */
|
208
|
+
}
|
209
|
+
box1->dots += box2->dots; /* num i-dots */
|
210
|
+
if ( box2->x0 < box1->x0 ) box1->x0 = box2->x0;
|
211
|
+
if ( box2->x1 > box1->x1 ) box1->x1 = box2->x1;
|
212
|
+
if ( box2->y0 < box1->y0 ) box1->y0 = box2->y0;
|
213
|
+
if ( box2->y1 > box1->y1 ) box1->y1 = box2->y1;
|
214
|
+
i1 = i2 = 0;
|
215
|
+
if (bbigger->num_frames)
|
216
|
+
i1 = bbigger->num_frame_vectors[ bbigger->num_frames - 1 ];
|
217
|
+
if (bsmaller->num_frames)
|
218
|
+
i2 = bsmaller->num_frame_vectors[ bsmaller->num_frames - 1 ];
|
219
|
+
while (i1+i2 > MaxFrameVectors) {
|
220
|
+
if (i1>i2) { reduce_vectors( bbigger, 1 ); i1--; }
|
221
|
+
else { reduce_vectors( bsmaller, 1 ); i2--; }
|
222
|
+
}
|
223
|
+
/* if i1+i2>MaxFrameVectors simplify the vectorgraph */
|
224
|
+
/* if sum num_frames>MaxNumFrames through shortest graph away and warn */
|
225
|
+
/* first copy the bigger box */
|
226
|
+
memcpy(&tmpbox, bbigger, sizeof(struct box));
|
227
|
+
/* attach the smaller box */
|
228
|
+
for (i4=i3=0; i3<bsmaller->num_frames; i3++) {
|
229
|
+
if (tmpbox.num_frames>=MaxNumFrames) break;
|
230
|
+
|
231
|
+
for (; i4<bsmaller->num_frame_vectors[i3]; i4++) {
|
232
|
+
memcpy(tmpbox.frame_vector[i1],
|
233
|
+
bsmaller->frame_vector[i4],2*sizeof(int));
|
234
|
+
i1++;
|
235
|
+
}
|
236
|
+
tmpbox.num_frame_vectors[ tmpbox.num_frames ] = i1;
|
237
|
+
tmpbox.frame_vol[ tmpbox.num_frames ] = bsmaller->frame_vol[ i3 ];
|
238
|
+
tmpbox.frame_per[ tmpbox.num_frames ] = bsmaller->frame_per[ i3 ];
|
239
|
+
tmpbox.num_frames++;
|
240
|
+
if (tmpbox.num_frames>=MaxNumFrames) {
|
241
|
+
if (JOB->cfg.verbose)
|
242
|
+
fprintf(stderr,"\nDBG merge_boxes MaxNumFrames reached");
|
243
|
+
break;
|
244
|
+
}
|
245
|
+
}
|
246
|
+
/* copy tmpbox to destination */
|
247
|
+
box1->num_frames = tmpbox.num_frames;
|
248
|
+
memcpy(box1->num_frame_vectors,
|
249
|
+
tmpbox.num_frame_vectors,sizeof(int)*MaxNumFrames);
|
250
|
+
memcpy(box1->frame_vol,
|
251
|
+
tmpbox.frame_vol,sizeof(int)*MaxNumFrames);
|
252
|
+
memcpy(box1->frame_per,
|
253
|
+
tmpbox.frame_per,sizeof(int)*MaxNumFrames);
|
254
|
+
memcpy(box1->frame_vector,
|
255
|
+
tmpbox.frame_vector,sizeof(int)*2*MaxFrameVectors);
|
256
|
+
#if 0
|
257
|
+
if (JOB->cfg.verbose)
|
258
|
+
fprintf(stderr,"\nDBG merge_boxes_result:"); out_x(box1);
|
259
|
+
#endif
|
260
|
+
return 0;
|
261
|
+
}
|
262
|
+
|
263
|
+
/* used for division of glued chars
|
264
|
+
* after a box is splitted into 2, where vectors are copied to both,
|
265
|
+
* vectors outside the new box are cutted and thrown away,
|
266
|
+
* later replaced by
|
267
|
+
* - 1st remove outside vectors with outside neighbours (complete frames?)
|
268
|
+
* add vector on outside vector with inside neighbours
|
269
|
+
* care about connections through box between outside vectors
|
270
|
+
* - 2nd reduce outside crossings (inclusive splitting frames if necessary)
|
271
|
+
* depending on direction (rotation) of outside connections
|
272
|
+
* - 3th shift outside vectors to crossing points
|
273
|
+
* - split add this points, connect only in-out...out-in,
|
274
|
+
* - cutting can result in more objects
|
275
|
+
* ToDo:
|
276
|
+
* dont connect --1---2--------3----4-- new-y1 (inside above not drawn)
|
277
|
+
* \ \->>>>-/ / outside
|
278
|
+
* \----<<<<-----/ old-y1
|
279
|
+
* |======| subtractable?
|
280
|
+
*
|
281
|
+
* only connect --1---2--------3----4-- new-y1
|
282
|
+
* \>>/ \>>>/ old-y1 outside
|
283
|
+
* ToDo: what about cutting 2 frames (example: 2fold melted MN)
|
284
|
+
* better restart framing algo?
|
285
|
+
*
|
286
|
+
* ToDo: new vol, per
|
287
|
+
*/
|
288
|
+
int cut_box( struct box *box1) {
|
289
|
+
int i1, i2, i3, i4, x, y, lx, ly, dbg=0;
|
290
|
+
if (JOB->cfg.verbose) dbg=1; // debug level, enlarge to get more output
|
291
|
+
if (dbg) fprintf(stderr,"\n cut box x= %3d %3d", box1->x0, box1->y0);
|
292
|
+
/* check if complete frames are outside the box */
|
293
|
+
for (i1=0; i1<box1->num_frames; i1++){
|
294
|
+
if (dbg>2) fprintf(stderr,"\n checking frame %d outside", i1);
|
295
|
+
i2 = ((i1)?box1->num_frame_vectors[ i1-1 ]:0); // this frame
|
296
|
+
i3 = box1->num_frame_vectors[ i1 ]; // next frame
|
297
|
+
for (i4=i2; i4 < i3; i4++) {
|
298
|
+
x = box1->frame_vector[i4][0];
|
299
|
+
y = box1->frame_vector[i4][1];
|
300
|
+
/* break, if one vector is lying inside */
|
301
|
+
if (x>=box1->x0 && x<=box1->x1 && y>=box1->y0 && y<=box1->y1) break;
|
302
|
+
}
|
303
|
+
if (i4==i3) { /* all vectors outside */
|
304
|
+
if (dbg>1) fprintf(stderr,"\n remove frame %d",i1);
|
305
|
+
/* replace all frames i1,i1+1,... by i1+1,i1+2,... */
|
306
|
+
/* replace (x,y) pairs first */
|
307
|
+
for (i4=i2; i4<box1->num_frame_vectors[ box1->num_frames-1 ]-(i3-i2);
|
308
|
+
i4++) {
|
309
|
+
box1->frame_vector[i4][0] = box1->frame_vector[i4+i3-i2][0];
|
310
|
+
box1->frame_vector[i4][1] = box1->frame_vector[i4+i3-i2][1];
|
311
|
+
}
|
312
|
+
/* replace the num_frame_vectors */
|
313
|
+
for (i4=i1; i4<box1->num_frames-1; i4++)
|
314
|
+
box1->num_frame_vectors[ i4 ] =
|
315
|
+
box1->num_frame_vectors[ i4+1 ]-(i3-i2);
|
316
|
+
box1->num_frames--; i1--;
|
317
|
+
}
|
318
|
+
}
|
319
|
+
/* remove vectors outside the box */
|
320
|
+
i3=0;
|
321
|
+
for (i1=0; i1<box1->num_frames; i1++){
|
322
|
+
if (dbg>2) fprintf(stderr,"\n check cutting vectors on frame %d", i1);
|
323
|
+
x = box1->frame_vector[0][0]; /* last x */
|
324
|
+
y = box1->frame_vector[0][1]; /* last y */
|
325
|
+
/* ToDo: start inside to get a closed object */
|
326
|
+
if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) i3=1;
|
327
|
+
for (i2=0; i2<box1->num_frame_vectors[ i1 ]; i2++) {
|
328
|
+
lx = x; /* last x */
|
329
|
+
ly = y; /* last y */
|
330
|
+
x = box1->frame_vector[i2][0];
|
331
|
+
y = box1->frame_vector[i2][1];
|
332
|
+
// fprintf(stderr,"DBG LEV3 i2= %3d xy= %3d %3d",i2,x,y);
|
333
|
+
/* check if outside */
|
334
|
+
if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) {
|
335
|
+
/* replace by nearest point at border, ToDo: better crossingpoint */
|
336
|
+
if (i3==0) { /* wrong if it starts outside */
|
337
|
+
if (x < box1->x0) x = box1->frame_vector[i2][0] = box1->x0;
|
338
|
+
if (x > box1->x1) x = box1->frame_vector[i2][0] = box1->x1;
|
339
|
+
if (y < box1->y0) y = box1->frame_vector[i2][1] = box1->y0;
|
340
|
+
if (y > box1->y1) y = box1->frame_vector[i2][1] = box1->y1;
|
341
|
+
} else {
|
342
|
+
/* remove vector */
|
343
|
+
if (dbg>1) fprintf(stderr,"\n remove vector[%d][%d] x= %2d %2d",i1,i2,x-box1->x0,y-box1->y0);
|
344
|
+
for (i4=i2;i4<box1->num_frame_vectors[ box1->num_frames-1 ]-1;i4++) {
|
345
|
+
box1->frame_vector[i4][0] = box1->frame_vector[i4+1][0];
|
346
|
+
box1->frame_vector[i4][1] = box1->frame_vector[i4+1][1];
|
347
|
+
}
|
348
|
+
for (i4=i1; i4<box1->num_frames; i4++)
|
349
|
+
box1->num_frame_vectors[ i4 ]--;
|
350
|
+
i2--; /* next element is shiftet now, setting back the counter */
|
351
|
+
}
|
352
|
+
i3++;
|
353
|
+
// fprintf(stderr," outside i3= %d\n",i3);
|
354
|
+
continue;
|
355
|
+
}
|
356
|
+
// fprintf(stderr," inside i3= %d",i3);
|
357
|
+
if (i3) { /* ToDo: better crossing point last vector and border */
|
358
|
+
if (lx < box1->x0) lx = box1->x0;
|
359
|
+
if (lx > box1->x1) lx = box1->x1;
|
360
|
+
if (ly < box1->y0) ly = box1->y0;
|
361
|
+
if (ly > box1->y1) ly = box1->y1;
|
362
|
+
x = box1->frame_vector[i2][0] = lx;
|
363
|
+
y = box1->frame_vector[i2][1] = ly;
|
364
|
+
i3 = 0;
|
365
|
+
}
|
366
|
+
// fprintf(stderr," xy= %3d %3d\n",x,y);
|
367
|
+
}
|
368
|
+
}
|
369
|
+
if (dbg>2) { fprintf(stderr,"\nDBG cut_box_result:"); out_x(box1); }
|
370
|
+
return 0;
|
371
|
+
}
|
372
|
+
|
@@ -0,0 +1,462 @@
|
|
1
|
+
/*
|
2
|
+
This is a Optical-Character-Recognition program
|
3
|
+
Copyright (C) 2000-2009 Joerg Schulenburg
|
4
|
+
|
5
|
+
This program is free software; you can redistribute it and/or
|
6
|
+
modify it under the terms of the GNU General Public License
|
7
|
+
as published by the Free Software Foundation; either version 2
|
8
|
+
of the License, or (at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
18
|
+
|
19
|
+
see README for EMAIL address
|
20
|
+
*/
|
21
|
+
|
22
|
+
#include <stdio.h>
|
23
|
+
#include <stdlib.h>
|
24
|
+
#include "gocr.h"
|
25
|
+
#include "pnm.h"
|
26
|
+
#include "pgm2asc.h"
|
27
|
+
#include <string.h>
|
28
|
+
#include <time.h>
|
29
|
+
|
30
|
+
#define Blen 256
|
31
|
+
|
32
|
+
// load boxes from database into boxlist (for faster access)
|
33
|
+
// used as alternate engine, comparing chars with database
|
34
|
+
int load_db(void) {
|
35
|
+
FILE *f1;
|
36
|
+
char s1[Blen+1],
|
37
|
+
s2[Blen+1] = "./db/", /* ToDo: replace by constant! by configure */
|
38
|
+
*s3;
|
39
|
+
int i, j, ii, i2, line;
|
40
|
+
struct box *box1;
|
41
|
+
pix *pp;
|
42
|
+
|
43
|
+
if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
|
44
|
+
i2=strlen(s2);
|
45
|
+
if (JOB->cfg.verbose)
|
46
|
+
fprintf(stderr, "# load database %s %s ... ",s2,JOB->cfg.db_path);
|
47
|
+
|
48
|
+
strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
|
49
|
+
f1 = fopen(s2, "r");
|
50
|
+
if (!f1) {
|
51
|
+
fprintf(stderr, " DB %s not found\n",s2);
|
52
|
+
return 1;
|
53
|
+
}
|
54
|
+
|
55
|
+
line = 0; /* line counter for better error report */
|
56
|
+
for (ii = 0; !feof(f1); ii++) {
|
57
|
+
/* bbg: should write a better input routine */
|
58
|
+
if (!fgets(s1, Blen, f1)) break; line++;
|
59
|
+
j = strlen(s1);
|
60
|
+
/* remove carriage return sequences from line */
|
61
|
+
while (j > 0 && (s1[j - 1] == '\r' || s1[j - 1] == '\n'))
|
62
|
+
s1[--j] = 0;
|
63
|
+
if (!j) continue; /* skip empty line */
|
64
|
+
if (s1[0]=='#') continue; /* skip comments (v0.44) */
|
65
|
+
/* copy file name */
|
66
|
+
for (i = 0; i < j && i+i2 < Blen && strchr(" \t,;",s1[i]) == 0; i++)
|
67
|
+
s2[i2 + i] = s1[i];
|
68
|
+
s2[i2+i]=0;
|
69
|
+
/* skip spaces */
|
70
|
+
for (; i < j && strchr(" \t",s1[i]) != 0; i++);
|
71
|
+
/* by now: read pix, fill box, goto next ??? */
|
72
|
+
pp = (pix *)malloc(sizeof(pix));
|
73
|
+
if( !pp ) fprintf(stderr,"malloc error in load_db pix\n");
|
74
|
+
|
75
|
+
// if (JOB->cfg.verbose) fprintf(stderr,"\n# readpgm %s ",s2);
|
76
|
+
if (readpgm(s2, pp, 0 * JOB->cfg.verbose)!=0) {
|
77
|
+
fprintf(stderr,"\ndatabase error: readpgm %s\n", s2);
|
78
|
+
exit(-1);
|
79
|
+
}
|
80
|
+
|
81
|
+
box1 = (struct box *)malloc_box(NULL);
|
82
|
+
if(!box1) fprintf(stderr,"malloc error in load_db box1\n");
|
83
|
+
box1->x0 = 0;
|
84
|
+
box1->x1 = pp->x-1; // white border 1 pixel width
|
85
|
+
box1->y0 = 0;
|
86
|
+
box1->y1 = pp->y-1;
|
87
|
+
box1->x = 1;
|
88
|
+
box1->y = 1;
|
89
|
+
box1->dots = 0;
|
90
|
+
box1->c = 0;
|
91
|
+
box1->modifier = 0; /* ToDo: obsolete */
|
92
|
+
box1->tas[0]=NULL;
|
93
|
+
box1->tac[0]=0;
|
94
|
+
box1->wac[0]=100; /* really 100% sure? */
|
95
|
+
box1->num_ac=1;
|
96
|
+
if (s1[i]=='"'){ /* parse a string */
|
97
|
+
j=strrchr(s1+i+1,'"')-(s1+i+1); /* we only look for first and last "" */
|
98
|
+
if (j>=1) {
|
99
|
+
s3=(char *)malloc(j+1);
|
100
|
+
if (!s3) fprintf (stderr, "malloc error in load_db s3\n");
|
101
|
+
if (s3) {
|
102
|
+
memcpy(s3,s1+i+1,j);
|
103
|
+
s3[j]=0;
|
104
|
+
box1->tas[0]=s3;
|
105
|
+
// fprintf(stderr,"\nstring=%s",s3);
|
106
|
+
}
|
107
|
+
} else { fprintf(stderr,"load_db: string parse error L%d\n",line); }
|
108
|
+
} else {
|
109
|
+
box1->tac[0] = box1->c = s1[i]; /* try to interpret as ASCII */
|
110
|
+
/* we can live without hexcode in future if we use UTF8-strings */
|
111
|
+
s3=s1+i;
|
112
|
+
j=strtol( s1+i, &s3, 16); /* try to read 4 to 8 digit hex unicode */
|
113
|
+
/* if its an hexcode, ASCII interpretation is overwritten */
|
114
|
+
if( j && i+3<=Blen && s3-s1-i>3 ) box1->tac[0] = box1->c = j;
|
115
|
+
// fprintf(stderr,"\nhexcode=%04x=%04x %d",(int)j,(int)box1->c,s3-s1-i);
|
116
|
+
}
|
117
|
+
box1->num = 0;
|
118
|
+
box1->line = -1;
|
119
|
+
box1->m1 = 0; /* ToDo: should be given too in the database! */
|
120
|
+
box1->m2 = 0;
|
121
|
+
box1->m3 = 0;
|
122
|
+
box1->m4 = 0;
|
123
|
+
box1->p = pp;
|
124
|
+
list_app(&JOB->tmp.dblist, box1); // append to list
|
125
|
+
#if 0
|
126
|
+
out_x(box1);
|
127
|
+
#endif
|
128
|
+
}
|
129
|
+
fclose(f1);
|
130
|
+
if (JOB->cfg.verbose)
|
131
|
+
fprintf(stderr, " %d chars loaded\n", ii);
|
132
|
+
return 0;
|
133
|
+
}
|
134
|
+
|
135
|
+
// expand database from box/boxlist name=db_$utime.pbm
|
136
|
+
// this is added in version v0.3.3
|
137
|
+
int store_db(struct box *box1) {
|
138
|
+
FILE *f1;
|
139
|
+
char s2[Blen+1] = "./db/", s3[Blen+1];
|
140
|
+
int i2, dx, dy;
|
141
|
+
unsigned c_out;
|
142
|
+
pix b; /* temporary mini page */
|
143
|
+
|
144
|
+
if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
|
145
|
+
i2=strlen(s2);
|
146
|
+
|
147
|
+
/* add (first) char and time to the file name for better debugging */
|
148
|
+
|
149
|
+
/* decide between 7bit ASCII and UTF8-char or string */
|
150
|
+
c_out = ((box1->num_ac && box1->tas[0]) ?
|
151
|
+
(unsigned char )box1->tas[0][0] /* char */ :
|
152
|
+
box1->c /* wchar */);
|
153
|
+
/* (unsigned int)(( char)0x80) = 0xffffff80 */
|
154
|
+
/* (unsigned int)((unsigned char)0x80) = 0x00000080 */
|
155
|
+
|
156
|
+
/* name generation can cause problems, if called twice within a second */
|
157
|
+
sprintf(s3,"db_%04x_%08lx.pbm", c_out, (unsigned long)time(NULL));
|
158
|
+
/* ToDo: the file name may be not unique */
|
159
|
+
|
160
|
+
strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
|
161
|
+
f1 = fopen(s2, "a");
|
162
|
+
if (!f1) {
|
163
|
+
fprintf(stderr, " could not access %s\n",s2);
|
164
|
+
return 1;
|
165
|
+
}
|
166
|
+
strncpy(s2+i2,s3,strlen(s3)); s2[i2+strlen(s3)]=0;
|
167
|
+
/* store image and infos about the char */
|
168
|
+
/* ToDo: store the vector list instead of the pixelarray */
|
169
|
+
|
170
|
+
if (JOB->cfg.verbose)
|
171
|
+
fprintf(stderr, "store_db: add file %s to database (nac=%d c=%04x)"
|
172
|
+
"\n#",s3, box1->num_ac, c_out);
|
173
|
+
|
174
|
+
dx=box1->x1-box1->x0+1;
|
175
|
+
dy=box1->y1-box1->y0+1;
|
176
|
+
b.p = (unsigned char *) malloc( dx * dy );
|
177
|
+
if( !b.p ){
|
178
|
+
fprintf( stderr, "\nFATAL: malloc failed, skip store_db" );
|
179
|
+
return 2;
|
180
|
+
}
|
181
|
+
if (copybox(box1->p, box1->x0, box1->y0, dx, dy, &b, dx * dy))
|
182
|
+
return -1;
|
183
|
+
|
184
|
+
writepbm(s2,&b); /* What is to do on error? */
|
185
|
+
free(b.p);
|
186
|
+
|
187
|
+
/* store the database line */
|
188
|
+
/* some infos about box1->m1,..,m4 should added (base line, high etc.) */
|
189
|
+
if (box1->num_ac && box1->tas[0]) {
|
190
|
+
fprintf(f1, "%s \"%s\"\n",s3,box1->tas[0]);
|
191
|
+
/* ToDo: what if tas contains '"'? */
|
192
|
+
} else {
|
193
|
+
if( (box1->c >= '0' && box1->c <= '9')
|
194
|
+
|| (box1->c >= 'A' && box1->c <= 'Z')
|
195
|
+
|| (box1->c >= 'a' && box1->c <= 'z') )
|
196
|
+
fprintf(f1, "%s %c\n",s3,(char)box1->c);
|
197
|
+
else {
|
198
|
+
if (((box1->c)>>16)>>16)
|
199
|
+
fprintf(f1, "%s %08x\n",s3,(unsigned int)box1->c);
|
200
|
+
else
|
201
|
+
fprintf(f1, "%s %04x\n",s3,(unsigned int)box1->c);
|
202
|
+
}
|
203
|
+
}
|
204
|
+
fclose(f1);
|
205
|
+
return 0;
|
206
|
+
}
|
207
|
+
|
208
|
+
/* function is only for user prompt on console to identify chars
|
209
|
+
it prints out a part of pixmap b at point x0,y0 to stderr
|
210
|
+
using dots .,; if no pixel, and @xoO for pixels
|
211
|
+
*/
|
212
|
+
void out_env(struct box *px ){
|
213
|
+
int x0,y0,x1,y1,dx,dy,x,y,x2,y2,yy0,tx,ty,i,cs;
|
214
|
+
char c1, c2; pix *b;
|
215
|
+
cs=JOB->cfg.cs;
|
216
|
+
yy0=px->y0;
|
217
|
+
{ /* overwrite rest of arguments */
|
218
|
+
b=px->p;
|
219
|
+
x0=px->x0; x1=px->x1; dx=x1-x0+1;
|
220
|
+
y0=px->y0; y1=px->y1; dy=y1-y0+1;
|
221
|
+
y0-=2; y1+=2;
|
222
|
+
if (px->m4 && y0>px->m1) y0=px->m1;
|
223
|
+
if (px->m4 && y1<px->m4) y1=px->m4;
|
224
|
+
if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
|
225
|
+
if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
|
226
|
+
if (x1-x0+1<62) { x0-=5; x1+=5; }
|
227
|
+
if (y1-y0+1<10) { y0-= 4; y1+= 4; } /* fragment? */
|
228
|
+
if (x0<0) x0=0; if (x1>=b->x) x1=b->x-1;
|
229
|
+
if (y0<0) y0=0; if (y1>=b->y) y1=b->y-1;
|
230
|
+
dx=x1-x0+1;
|
231
|
+
dy=y1-y0+1; yy0=y0;
|
232
|
+
fprintf(stderr,"\n# show box + environment");
|
233
|
+
fprintf(stderr,"\n# show box x= %4d %4d d= %3d %3d r= %d %d",
|
234
|
+
px->x0, px->y0, px->x1 - px->x0 + 1, px->y1 - px->y0 + 1,
|
235
|
+
px->x - px->x0, px->y - px->y0);
|
236
|
+
if (px->num_ac){ /* output table of chars and its probabilities */
|
237
|
+
fprintf(stderr,"\n# list box char: ");
|
238
|
+
for(i=0;i<px->num_ac && i<NumAlt;i++)
|
239
|
+
/* output the (xml-)string (picture position, barcodes, glyphs, ...) */
|
240
|
+
if (px->tas[i])
|
241
|
+
fprintf(stderr," %s(%d)", px->tas[i] ,px->wac[i]);
|
242
|
+
else
|
243
|
+
fprintf(stderr," %s(%d)",decode(px->tac[i],ASCII),px->wac[i]);
|
244
|
+
}
|
245
|
+
fprintf(stderr,"\n");
|
246
|
+
if (px->dots && px->m2 && px->m1<y0) { yy0=px->m1; dy=px->y1-yy0+1; }
|
247
|
+
}
|
248
|
+
tx=dx/80+1;
|
249
|
+
ty=dy/40+1; // step, usually 1, but greater on large maps
|
250
|
+
fprintf(stderr,"# show pattern x= %4d %4d d= %3d %3d t= %d %d\n",
|
251
|
+
x0,y0,dx,dy,tx,ty);
|
252
|
+
if (dx>0)
|
253
|
+
for(y=yy0;y<yy0+dy;y+=ty) { /* reduce the output to max 78x40 */
|
254
|
+
|
255
|
+
/* image is the boxframe + environment in the original bitmap */
|
256
|
+
for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
|
257
|
+
c1='.';
|
258
|
+
for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
|
259
|
+
for(x2=x;x2<x+tx && x2<x0+dx;x2++)
|
260
|
+
{ if((getpixel(b,x2,y2)<cs)) c1='#'; }
|
261
|
+
// show pixels outside the box thinner/weaker
|
262
|
+
if (x+tx-1 < px->x0 || x > px->x1
|
263
|
+
|| y+ty-1 < px->y0 || y > px->y1) c1=((c1=='#')?'O':',');
|
264
|
+
fprintf(stderr,"%c", c1 );
|
265
|
+
}
|
266
|
+
|
267
|
+
c1=c2=' ';
|
268
|
+
/* mark lines with < */
|
269
|
+
if (px) if (y==px->m1 || y==px->m2 || y==px->m3 || y==px->m4) c1='<';
|
270
|
+
if (y==px->y0 || y==px->y1) c2='-'; /* boxmarks */
|
271
|
+
fprintf(stderr,"%c%c\n",c1,c2);
|
272
|
+
}
|
273
|
+
}
|
274
|
+
|
275
|
+
|
276
|
+
/*
|
277
|
+
// second variant, for database (with slightly other behaviour)
|
278
|
+
// new variant
|
279
|
+
// look at the environment of the pixel too (contrast etc.)
|
280
|
+
// detailed analysis only of diff pixels!
|
281
|
+
//
|
282
|
+
// 100% * distance, 0 is best fit
|
283
|
+
// = similarity of 2 chars for recognition of noisy chars
|
284
|
+
// weigth of pixels with only one same neighbour set to 0
|
285
|
+
// look at contours too!
|
286
|
+
ToDo: especially on small boxes distance should only be 0 if
|
287
|
+
characters are 100% identical!
|
288
|
+
*/
|
289
|
+
// #define DEBUG 2
|
290
|
+
int distance2( pix *p1, struct box *box1,
|
291
|
+
pix *p2, struct box *box2, int cs){
|
292
|
+
int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,
|
293
|
+
x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2,tx,ty;
|
294
|
+
#if DEBUG == 2
|
295
|
+
if(JOB->cfg.verbose)
|
296
|
+
fprintf(stderr," DEBUG: distance2\n");
|
297
|
+
#endif
|
298
|
+
x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
|
299
|
+
dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);dx=dx1;
|
300
|
+
dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);dy=dy1;
|
301
|
+
if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) rbad++; // how to weight?
|
302
|
+
// compare relations to baseline and upper line
|
303
|
+
if(box1->m4>0 && box2->m4>0){ // used ???
|
304
|
+
if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
|
305
|
+
if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
|
306
|
+
}
|
307
|
+
tx=dx/16; if(dx<17)tx=1; // raster
|
308
|
+
ty=dy/32; if(dy<33)ty=1;
|
309
|
+
// compare pixels
|
310
|
+
for( y=0;y<dy;y+=ty )
|
311
|
+
for( x=0;x<dx;x+=tx ) { // try global shift too ???
|
312
|
+
v1=((getpixel(p1,x1+x*dx1/dx,y1+y*dy1/dy)<cs)?1:0); i1=8; // better gray?
|
313
|
+
v2=((getpixel(p2,x2+x*dx2/dx,y2+y*dy2/dy)<cs)?1:0); i2=8; // better gray?
|
314
|
+
if(v1==v2) { rgood+=16; continue; } // all things are right!
|
315
|
+
// what about different pixel???
|
316
|
+
// test overlapp of surounding pixels ???
|
317
|
+
v1=1; rbad+=4;
|
318
|
+
v1=-1;
|
319
|
+
for(i1=-1;i1<2;i1++)
|
320
|
+
for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
|
321
|
+
if( ((getpixel(p1,x1+x*dx1/dx+i1*(1+dx1/32),y1+y*dy1/dy+i2*(1+dy1/32))<cs)?1:0)
|
322
|
+
!=((getpixel(p2,x2+x*dx2/dx+i1*(1+dx2/32),y2+y*dy2/dy+i2*(1+dy2/32))<cs)?1:0) ) v1++;
|
323
|
+
}
|
324
|
+
if(v1>0)
|
325
|
+
rbad+=16*v1;
|
326
|
+
}
|
327
|
+
if(rgood+rbad) rc= 100*rbad/(rgood+rbad); else rc=99;
|
328
|
+
/* if width/high is not correct add badness */
|
329
|
+
rc += ( abs(dx1*dy2-dx2*dy1) * 10 ) / (dy1*dy2);
|
330
|
+
if (rc>100) rc=100;
|
331
|
+
if(/* rc<10 && */ JOB->cfg.verbose /* &1024 */){
|
332
|
+
#if DEBUG == 2
|
333
|
+
fprintf(stderr," distance2 rc=%d rgood=%d rbad=%d\n",rc,rgood,rbad);
|
334
|
+
// out_b(NULL,p1,box1->x0,box1->y0,box1->x1-box1->x0+1,
|
335
|
+
// box1->y1-box1->y0+1,cs);
|
336
|
+
// out_b(NULL,p2,box2->x0,box2->y0,box2->x1-box2->x0+1,
|
337
|
+
// box2->y1-box2->y0+1,cs);
|
338
|
+
out_x(box1);
|
339
|
+
out_x(box2);
|
340
|
+
#endif
|
341
|
+
}
|
342
|
+
return rc;
|
343
|
+
}
|
344
|
+
|
345
|
+
wchar_t ocr_db(struct box *box1) {
|
346
|
+
int dd = 1000, dist = 1000;
|
347
|
+
wchar_t c = UNKNOWN;
|
348
|
+
unsigned char buf[200]; /* Oct08 JS: add unsigned to avoid UTF problems */
|
349
|
+
Box *box2, *box3;
|
350
|
+
|
351
|
+
if (!list_empty(&JOB->tmp.dblist)){
|
352
|
+
box3 = (Box *)list_get_header(&JOB->tmp.dblist);
|
353
|
+
if(JOB->cfg.verbose)
|
354
|
+
fprintf(stderr,"\n#DEBUG: ocr_db (%d,%d) ",box1->x0, box1->y0);
|
355
|
+
|
356
|
+
for_each_data(&JOB->tmp.dblist) {
|
357
|
+
box2 = (Box *)list_get_current(&JOB->tmp.dblist);
|
358
|
+
/* do preselect!!! distance() slowly */
|
359
|
+
dd = distance2( box2->p, box2, box1->p, box1, JOB->cfg.cs);
|
360
|
+
if (dd <= dist) { /* new best fit */
|
361
|
+
dist = dd;
|
362
|
+
box3 = box2; /* box3 is a pointer and not copied box2 */
|
363
|
+
|
364
|
+
if (dist<100 && 100-dist >= JOB->cfg.certainty) {
|
365
|
+
/* some deviation of the pattern is tolerated */
|
366
|
+
int i, wa;
|
367
|
+
for (i=0;i<box3->num_ac;i++) {
|
368
|
+
wa = (100-dist)*box3->wac[i]/100; /* weight *= (100-dist) */
|
369
|
+
if (box3->tas[i]) setas(box1,box3->tas[i],wa);
|
370
|
+
else setac(box1,box3->tac[i],wa);
|
371
|
+
}
|
372
|
+
if (box3->num_ac) c=box3->tac[0]; /* 0 for strings (!UNKNOWN) */
|
373
|
+
if (JOB->cfg.verbose)
|
374
|
+
fprintf(stderr, " dist=%4d c= %c 0x%02x %s wc= %3d", dist,
|
375
|
+
((box3->c>32 && box3->c<127) ? (char) box3->c : '.'),
|
376
|
+
(int)box3->c, ((box3->tas[0])?box3->tas[0]:""), box3->wac[0]);
|
377
|
+
}
|
378
|
+
if (dd<=0 && ((box3->num_ac && box3->tas[0]) || box3->c >= 128
|
379
|
+
|| !strchr ("l1|I0O", box3->c)))
|
380
|
+
break; /* speedup if found */
|
381
|
+
}
|
382
|
+
} end_for_each(&JOB->tmp.dblist);
|
383
|
+
|
384
|
+
}
|
385
|
+
|
386
|
+
if( (JOB->cfg.mode&128) != 0 && c == UNKNOWN ) { /* prompt the user */
|
387
|
+
/* should the output go to stderr or special pipe??? */
|
388
|
+
int utf8_ok=0; /* trigger this flag if input is ok */
|
389
|
+
int i, endchar; /* index */
|
390
|
+
out_env(box1); /* old: out_x(box1); */
|
391
|
+
fprintf(stderr,"The above pattern was not recognized.\n"
|
392
|
+
"Enter UTF8 char or string for above pattern. Leave empty if unsure.\n"
|
393
|
+
"Press RET at the end (ALT+RET to store into RAM only) : "
|
394
|
+
); /* ToDo: empty + alt-return (0x1b 0x0a) for help? ^a for skip all */
|
395
|
+
/* UTF-8 (man 7 utf-8):
|
396
|
+
* 7bit = 0xxxxxxx (0000-007F)
|
397
|
+
* 11bit = 110xxxxx 10xxxxxx (0080-07FF)
|
398
|
+
* 16bit = 1110xxxx 10xxxxxx 10xxxxxx (0800-FFFF)
|
399
|
+
* 21bit = 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
400
|
+
* 26bit = 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
401
|
+
* 31bit = 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
402
|
+
*/
|
403
|
+
buf[0]=0;
|
404
|
+
/* shift/ctrl/altgr-enter acts like enter or ^j or ^m,
|
405
|
+
* alt-enter returns 0x1b 0x0a and returns from fgets()
|
406
|
+
* ^d (EOF) returns (nil) from fgets()
|
407
|
+
* x+(2*)ctrl-d returns from fgets() without returning a 0x0a
|
408
|
+
* if not UTF-input-mode, we are in trouble?
|
409
|
+
* ^a=0x01, ^b=0x02, ^e=05, ..., ToDo: meaning of no-input or <=space
|
410
|
+
*/
|
411
|
+
fgets((char *)buf,200,stdin); /* including \n=0x0a */
|
412
|
+
dd=strlen((char *)buf);
|
413
|
+
/* output hexcode if verbose set */
|
414
|
+
if (JOB->cfg.verbose) {
|
415
|
+
fprintf(stderr, "\n# fgets [%d]:", dd);
|
416
|
+
for(i=0; i<dd; i++)
|
417
|
+
fprintf(stderr, " %02x", (unsigned)((unsigned char)buf[i]));
|
418
|
+
fprintf(stderr, "\n#");
|
419
|
+
}
|
420
|
+
/* we dont accept chars which could destroy database file */
|
421
|
+
for (i=0; i<dd; i++) if (buf[i]<32) break; /* need unsigned char here */
|
422
|
+
endchar=buf[i]; /* last char is 0x0a (ret) 0x00 (EOF) or 0x1b (alt+ret) */
|
423
|
+
if (endchar==0x01) { i=0;JOB->cfg.mode&=~128; } /* skip all */
|
424
|
+
buf[dd=i]=0; /* replace final 0x0a or other special codes */
|
425
|
+
if (dd==1 && !(buf[0]&128)) { c=buf[0]; utf8_ok=1; } /* single char */
|
426
|
+
if (dd>1 && dd<7) { /* try to decode single wide char (utf8) */
|
427
|
+
int u0, u1; /* define UTF8-start sequences, u0=0bits u1=1bits */
|
428
|
+
u0= 1<<(7-dd); /* compute start byte from UTF8-length */
|
429
|
+
u1=255&~((1<<(8-dd))-1);
|
430
|
+
/* count number of following 10xxxxxx bytes to i */
|
431
|
+
for (i=1;i<dd;i++) if ((buf[i]&0xc0)!=0x80) break; /* 10xxxxxx */
|
432
|
+
if (i==dd && (buf[0]&(u0|u1))==u1) { utf8_ok=1;
|
433
|
+
c=buf[0]&(u0-1); /* 11..0x.. */
|
434
|
+
for (i=1;i<dd;i++) { c<<=6; c|=buf[i]&0x3F; } /* 10xxxxxx */
|
435
|
+
}
|
436
|
+
}
|
437
|
+
if (dd>0){ /* ToDo: skip space and tab too? */
|
438
|
+
if (utf8_ok==1) { setac(box1, c, 100); } /* store single wchar */
|
439
|
+
if (utf8_ok==0) { /* store a string of chars (UTF8-string) */
|
440
|
+
c='_'; /* what should we do with c? probably a bad idea? */
|
441
|
+
setas(box1, (char *)buf, 100);
|
442
|
+
}
|
443
|
+
/* decide between
|
444
|
+
* 0) just help gocr to find the results and (dont remember, 0x01)
|
445
|
+
* 1) help and remember in the same run (store to memory, 0x1b)
|
446
|
+
* 2) expand the database (dont store ugly chars to the database!)
|
447
|
+
*/
|
448
|
+
if (endchar!=0x01){ /* ^a before hit return */
|
449
|
+
/* is there a reason to dont store to memory? */
|
450
|
+
list_app(&JOB->tmp.dblist, box1); /* append to list for 1+2 */
|
451
|
+
}
|
452
|
+
if (endchar!=0x01 && endchar!=0x1b){
|
453
|
+
store_db(box1); /* store to disk for 2 */
|
454
|
+
}
|
455
|
+
if (JOB->cfg.verbose)
|
456
|
+
fprintf(stderr, " got char= %c 16bit= 0x%04x string= \"%s\"\n",
|
457
|
+
((c>32 && c<127)?(char)c:'.'), (int)c, buf);
|
458
|
+
}
|
459
|
+
}
|
460
|
+
|
461
|
+
return c;
|
462
|
+
}
|