isbn 2.0.4 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (288) hide show
  1. data/{README → README.md} +5 -11
  2. data/Rakefile +20 -14
  3. data/isbn.gemspec +23 -0
  4. data/lib/isbn.rb +2 -0
  5. data/test/isbn_spec.rb +1 -1
  6. metadata +29 -316
  7. data/VERSION +0 -1
  8. data/src/gocr-0.48/.cvsignore +0 -6
  9. data/src/gocr-0.48/AUTHORS +0 -7
  10. data/src/gocr-0.48/BUGS +0 -55
  11. data/src/gocr-0.48/CREDITS +0 -17
  12. data/src/gocr-0.48/HISTORY +0 -243
  13. data/src/gocr-0.48/INSTALL +0 -83
  14. data/src/gocr-0.48/Makefile +0 -193
  15. data/src/gocr-0.48/Makefile.in +0 -193
  16. data/src/gocr-0.48/README +0 -165
  17. data/src/gocr-0.48/READMEde.txt +0 -80
  18. data/src/gocr-0.48/REMARK.txt +0 -18
  19. data/src/gocr-0.48/REVIEW +0 -538
  20. data/src/gocr-0.48/TODO +0 -65
  21. data/src/gocr-0.48/bin/.cvsignore +0 -2
  22. data/src/gocr-0.48/bin/create_db +0 -38
  23. data/src/gocr-0.48/bin/gocr.tcl +0 -527
  24. data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
  25. data/src/gocr-0.48/configure +0 -4689
  26. data/src/gocr-0.48/configure.in +0 -71
  27. data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
  28. data/src/gocr-0.48/doc/.cvsignore +0 -2
  29. data/src/gocr-0.48/doc/Makefile +0 -39
  30. data/src/gocr-0.48/doc/Makefile.in +0 -39
  31. data/src/gocr-0.48/doc/example.dtd +0 -53
  32. data/src/gocr-0.48/doc/example.xml +0 -21
  33. data/src/gocr-0.48/doc/examples.txt +0 -67
  34. data/src/gocr-0.48/doc/gocr.html +0 -578
  35. data/src/gocr-0.48/doc/unicode.txt +0 -57
  36. data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
  37. data/src/gocr-0.48/examples/4x6.png +0 -0
  38. data/src/gocr-0.48/examples/4x6.txt +0 -2
  39. data/src/gocr-0.48/examples/5x7.png +0 -0
  40. data/src/gocr-0.48/examples/5x7.png.txt +0 -2
  41. data/src/gocr-0.48/examples/5x8.png +0 -0
  42. data/src/gocr-0.48/examples/5x8.png.txt +0 -2
  43. data/src/gocr-0.48/examples/Makefile +0 -166
  44. data/src/gocr-0.48/examples/color.fig +0 -20
  45. data/src/gocr-0.48/examples/ex.fig +0 -16
  46. data/src/gocr-0.48/examples/font.tex +0 -22
  47. data/src/gocr-0.48/examples/font1.tex +0 -46
  48. data/src/gocr-0.48/examples/font2.fig +0 -27
  49. data/src/gocr-0.48/examples/font_nw.tex +0 -24
  50. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  51. data/src/gocr-0.48/examples/handwrt1.txt +0 -10
  52. data/src/gocr-0.48/examples/inverse.fig +0 -20
  53. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
  56. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-a.txt +0 -6
  58. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  59. data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
  60. data/src/gocr-0.48/examples/polish.tex +0 -28
  61. data/src/gocr-0.48/examples/rotate45.fig +0 -14
  62. data/src/gocr-0.48/examples/score +0 -36
  63. data/src/gocr-0.48/examples/text.tex +0 -28
  64. data/src/gocr-0.48/gpl.html +0 -537
  65. data/src/gocr-0.48/include/.cvsignore +0 -2
  66. data/src/gocr-0.48/include/config.h +0 -36
  67. data/src/gocr-0.48/include/config.h.in +0 -36
  68. data/src/gocr-0.48/include/version.h +0 -2
  69. data/src/gocr-0.48/install-sh +0 -3
  70. data/src/gocr-0.48/make.bat +0 -57
  71. data/src/gocr-0.48/man/.cvsignore +0 -2
  72. data/src/gocr-0.48/man/Makefile +0 -29
  73. data/src/gocr-0.48/man/Makefile.in +0 -29
  74. data/src/gocr-0.48/man/man1/gocr.1 +0 -166
  75. data/src/gocr-0.48/src/.cvsignore +0 -4
  76. data/src/gocr-0.48/src/Makefile +0 -132
  77. data/src/gocr-0.48/src/Makefile.in +0 -132
  78. data/src/gocr-0.48/src/amiga.h +0 -31
  79. data/src/gocr-0.48/src/barcode.c +0 -846
  80. data/src/gocr-0.48/src/barcode.c.orig +0 -593
  81. data/src/gocr-0.48/src/barcode.h +0 -11
  82. data/src/gocr-0.48/src/box.c +0 -372
  83. data/src/gocr-0.48/src/database.c +0 -462
  84. data/src/gocr-0.48/src/detect.c +0 -943
  85. data/src/gocr-0.48/src/gocr.c +0 -373
  86. data/src/gocr-0.48/src/gocr.h +0 -288
  87. data/src/gocr-0.48/src/jconv.c +0 -168
  88. data/src/gocr-0.48/src/job.c +0 -84
  89. data/src/gocr-0.48/src/lines.c +0 -350
  90. data/src/gocr-0.48/src/list.c +0 -334
  91. data/src/gocr-0.48/src/list.h +0 -90
  92. data/src/gocr-0.48/src/ocr0.c +0 -6756
  93. data/src/gocr-0.48/src/ocr0.h +0 -63
  94. data/src/gocr-0.48/src/ocr0n.c +0 -1475
  95. data/src/gocr-0.48/src/ocr1.c +0 -85
  96. data/src/gocr-0.48/src/ocr1.h +0 -3
  97. data/src/gocr-0.48/src/otsu.c +0 -289
  98. data/src/gocr-0.48/src/otsu.h +0 -23
  99. data/src/gocr-0.48/src/output.c +0 -289
  100. data/src/gocr-0.48/src/output.h +0 -37
  101. data/src/gocr-0.48/src/pcx.c +0 -153
  102. data/src/gocr-0.48/src/pcx.h +0 -9
  103. data/src/gocr-0.48/src/pgm2asc.c +0 -2893
  104. data/src/gocr-0.48/src/pgm2asc.h +0 -105
  105. data/src/gocr-0.48/src/pixel.c +0 -537
  106. data/src/gocr-0.48/src/pnm.c +0 -533
  107. data/src/gocr-0.48/src/pnm.h +0 -35
  108. data/src/gocr-0.48/src/progress.c +0 -87
  109. data/src/gocr-0.48/src/progress.h +0 -42
  110. data/src/gocr-0.48/src/remove.c +0 -703
  111. data/src/gocr-0.48/src/tga.c +0 -87
  112. data/src/gocr-0.48/src/tga.h +0 -6
  113. data/src/gocr-0.48/src/unicode.c +0 -1314
  114. data/src/gocr-0.48/src/unicode.h +0 -1257
  115. data/src/jpeg-7/Makefile.am +0 -133
  116. data/src/jpeg-7/Makefile.in +0 -1089
  117. data/src/jpeg-7/README +0 -322
  118. data/src/jpeg-7/aclocal.m4 +0 -8990
  119. data/src/jpeg-7/ansi2knr.1 +0 -36
  120. data/src/jpeg-7/ansi2knr.c +0 -739
  121. data/src/jpeg-7/cderror.h +0 -132
  122. data/src/jpeg-7/cdjpeg.c +0 -181
  123. data/src/jpeg-7/cdjpeg.h +0 -187
  124. data/src/jpeg-7/change.log +0 -270
  125. data/src/jpeg-7/cjpeg.1 +0 -325
  126. data/src/jpeg-7/cjpeg.c +0 -616
  127. data/src/jpeg-7/ckconfig.c +0 -402
  128. data/src/jpeg-7/coderules.txt +0 -118
  129. data/src/jpeg-7/config.guess +0 -1561
  130. data/src/jpeg-7/config.sub +0 -1686
  131. data/src/jpeg-7/configure +0 -17139
  132. data/src/jpeg-7/configure.ac +0 -317
  133. data/src/jpeg-7/depcomp +0 -630
  134. data/src/jpeg-7/djpeg.1 +0 -251
  135. data/src/jpeg-7/djpeg.c +0 -617
  136. data/src/jpeg-7/example.c +0 -433
  137. data/src/jpeg-7/filelist.txt +0 -215
  138. data/src/jpeg-7/install-sh +0 -520
  139. data/src/jpeg-7/install.txt +0 -1097
  140. data/src/jpeg-7/jaricom.c +0 -148
  141. data/src/jpeg-7/jcapimin.c +0 -282
  142. data/src/jpeg-7/jcapistd.c +0 -161
  143. data/src/jpeg-7/jcarith.c +0 -921
  144. data/src/jpeg-7/jccoefct.c +0 -453
  145. data/src/jpeg-7/jccolor.c +0 -459
  146. data/src/jpeg-7/jcdctmgr.c +0 -482
  147. data/src/jpeg-7/jchuff.c +0 -1612
  148. data/src/jpeg-7/jcinit.c +0 -65
  149. data/src/jpeg-7/jcmainct.c +0 -293
  150. data/src/jpeg-7/jcmarker.c +0 -667
  151. data/src/jpeg-7/jcmaster.c +0 -770
  152. data/src/jpeg-7/jcomapi.c +0 -106
  153. data/src/jpeg-7/jconfig.bcc +0 -48
  154. data/src/jpeg-7/jconfig.cfg +0 -45
  155. data/src/jpeg-7/jconfig.dj +0 -38
  156. data/src/jpeg-7/jconfig.mac +0 -43
  157. data/src/jpeg-7/jconfig.manx +0 -43
  158. data/src/jpeg-7/jconfig.mc6 +0 -52
  159. data/src/jpeg-7/jconfig.sas +0 -43
  160. data/src/jpeg-7/jconfig.st +0 -42
  161. data/src/jpeg-7/jconfig.txt +0 -155
  162. data/src/jpeg-7/jconfig.vc +0 -45
  163. data/src/jpeg-7/jconfig.vms +0 -37
  164. data/src/jpeg-7/jconfig.wat +0 -38
  165. data/src/jpeg-7/jcparam.c +0 -632
  166. data/src/jpeg-7/jcprepct.c +0 -358
  167. data/src/jpeg-7/jcsample.c +0 -545
  168. data/src/jpeg-7/jctrans.c +0 -381
  169. data/src/jpeg-7/jdapimin.c +0 -396
  170. data/src/jpeg-7/jdapistd.c +0 -275
  171. data/src/jpeg-7/jdarith.c +0 -762
  172. data/src/jpeg-7/jdatadst.c +0 -151
  173. data/src/jpeg-7/jdatasrc.c +0 -212
  174. data/src/jpeg-7/jdcoefct.c +0 -736
  175. data/src/jpeg-7/jdcolor.c +0 -396
  176. data/src/jpeg-7/jdct.h +0 -393
  177. data/src/jpeg-7/jddctmgr.c +0 -382
  178. data/src/jpeg-7/jdhuff.c +0 -1309
  179. data/src/jpeg-7/jdinput.c +0 -384
  180. data/src/jpeg-7/jdmainct.c +0 -512
  181. data/src/jpeg-7/jdmarker.c +0 -1360
  182. data/src/jpeg-7/jdmaster.c +0 -663
  183. data/src/jpeg-7/jdmerge.c +0 -400
  184. data/src/jpeg-7/jdpostct.c +0 -290
  185. data/src/jpeg-7/jdsample.c +0 -361
  186. data/src/jpeg-7/jdtrans.c +0 -136
  187. data/src/jpeg-7/jerror.c +0 -252
  188. data/src/jpeg-7/jerror.h +0 -304
  189. data/src/jpeg-7/jfdctflt.c +0 -174
  190. data/src/jpeg-7/jfdctfst.c +0 -230
  191. data/src/jpeg-7/jfdctint.c +0 -4348
  192. data/src/jpeg-7/jidctflt.c +0 -242
  193. data/src/jpeg-7/jidctfst.c +0 -368
  194. data/src/jpeg-7/jidctint.c +0 -5137
  195. data/src/jpeg-7/jinclude.h +0 -91
  196. data/src/jpeg-7/jmemansi.c +0 -167
  197. data/src/jpeg-7/jmemdos.c +0 -638
  198. data/src/jpeg-7/jmemdosa.asm +0 -379
  199. data/src/jpeg-7/jmemmac.c +0 -289
  200. data/src/jpeg-7/jmemmgr.c +0 -1118
  201. data/src/jpeg-7/jmemname.c +0 -276
  202. data/src/jpeg-7/jmemnobs.c +0 -109
  203. data/src/jpeg-7/jmemsys.h +0 -198
  204. data/src/jpeg-7/jmorecfg.h +0 -369
  205. data/src/jpeg-7/jpegint.h +0 -395
  206. data/src/jpeg-7/jpeglib.h +0 -1135
  207. data/src/jpeg-7/jpegtran.1 +0 -272
  208. data/src/jpeg-7/jpegtran.c +0 -546
  209. data/src/jpeg-7/jquant1.c +0 -856
  210. data/src/jpeg-7/jquant2.c +0 -1310
  211. data/src/jpeg-7/jutils.c +0 -179
  212. data/src/jpeg-7/jversion.h +0 -14
  213. data/src/jpeg-7/libjpeg.map +0 -4
  214. data/src/jpeg-7/libjpeg.txt +0 -3067
  215. data/src/jpeg-7/ltmain.sh +0 -8406
  216. data/src/jpeg-7/makcjpeg.st +0 -36
  217. data/src/jpeg-7/makdjpeg.st +0 -36
  218. data/src/jpeg-7/makeadsw.vc6 +0 -77
  219. data/src/jpeg-7/makeasln.vc9 +0 -33
  220. data/src/jpeg-7/makecdep.vc6 +0 -82
  221. data/src/jpeg-7/makecdsp.vc6 +0 -130
  222. data/src/jpeg-7/makecmak.vc6 +0 -159
  223. data/src/jpeg-7/makecvcp.vc9 +0 -186
  224. data/src/jpeg-7/makeddep.vc6 +0 -82
  225. data/src/jpeg-7/makeddsp.vc6 +0 -130
  226. data/src/jpeg-7/makedmak.vc6 +0 -159
  227. data/src/jpeg-7/makedvcp.vc9 +0 -186
  228. data/src/jpeg-7/makefile.ansi +0 -220
  229. data/src/jpeg-7/makefile.bcc +0 -291
  230. data/src/jpeg-7/makefile.dj +0 -226
  231. data/src/jpeg-7/makefile.manx +0 -220
  232. data/src/jpeg-7/makefile.mc6 +0 -255
  233. data/src/jpeg-7/makefile.mms +0 -224
  234. data/src/jpeg-7/makefile.sas +0 -258
  235. data/src/jpeg-7/makefile.unix +0 -234
  236. data/src/jpeg-7/makefile.vc +0 -217
  237. data/src/jpeg-7/makefile.vms +0 -142
  238. data/src/jpeg-7/makefile.wat +0 -239
  239. data/src/jpeg-7/makejdep.vc6 +0 -423
  240. data/src/jpeg-7/makejdsp.vc6 +0 -285
  241. data/src/jpeg-7/makejdsw.vc6 +0 -29
  242. data/src/jpeg-7/makejmak.vc6 +0 -425
  243. data/src/jpeg-7/makejsln.vc9 +0 -17
  244. data/src/jpeg-7/makejvcp.vc9 +0 -328
  245. data/src/jpeg-7/makeproj.mac +0 -213
  246. data/src/jpeg-7/makerdep.vc6 +0 -6
  247. data/src/jpeg-7/makerdsp.vc6 +0 -78
  248. data/src/jpeg-7/makermak.vc6 +0 -110
  249. data/src/jpeg-7/makervcp.vc9 +0 -133
  250. data/src/jpeg-7/maketdep.vc6 +0 -43
  251. data/src/jpeg-7/maketdsp.vc6 +0 -122
  252. data/src/jpeg-7/maketmak.vc6 +0 -131
  253. data/src/jpeg-7/maketvcp.vc9 +0 -178
  254. data/src/jpeg-7/makewdep.vc6 +0 -6
  255. data/src/jpeg-7/makewdsp.vc6 +0 -78
  256. data/src/jpeg-7/makewmak.vc6 +0 -110
  257. data/src/jpeg-7/makewvcp.vc9 +0 -133
  258. data/src/jpeg-7/makljpeg.st +0 -68
  259. data/src/jpeg-7/maktjpeg.st +0 -30
  260. data/src/jpeg-7/makvms.opt +0 -4
  261. data/src/jpeg-7/missing +0 -376
  262. data/src/jpeg-7/rdbmp.c +0 -439
  263. data/src/jpeg-7/rdcolmap.c +0 -253
  264. data/src/jpeg-7/rdgif.c +0 -38
  265. data/src/jpeg-7/rdjpgcom.1 +0 -63
  266. data/src/jpeg-7/rdjpgcom.c +0 -515
  267. data/src/jpeg-7/rdppm.c +0 -459
  268. data/src/jpeg-7/rdrle.c +0 -387
  269. data/src/jpeg-7/rdswitch.c +0 -365
  270. data/src/jpeg-7/rdtarga.c +0 -500
  271. data/src/jpeg-7/structure.txt +0 -945
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +0 -4
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +0 -1533
  279. data/src/jpeg-7/transupp.h +0 -205
  280. data/src/jpeg-7/usage.txt +0 -605
  281. data/src/jpeg-7/wizard.txt +0 -211
  282. data/src/jpeg-7/wrbmp.c +0 -442
  283. data/src/jpeg-7/wrgif.c +0 -399
  284. data/src/jpeg-7/wrjpgcom.1 +0 -103
  285. data/src/jpeg-7/wrjpgcom.c +0 -583
  286. data/src/jpeg-7/wrppm.c +0 -269
  287. data/src/jpeg-7/wrrle.c +0 -305
  288. data/src/jpeg-7/wrtarga.c +0 -253
@@ -1,11 +0,0 @@
1
- #ifndef _BARCODE_H
2
- #define _BARCODE_H
3
- #include "pnm.h"
4
-
5
- /*
6
- detect barcode and add a string to the box (obj-pointer)
7
- */
8
-
9
- int detect_barcode(job_t *job);
10
-
11
- #endif
@@ -1,372 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2009 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL address
20
-
21
- */
22
-
23
- #include <stdio.h>
24
- #include <stdlib.h>
25
- #include <assert.h>
26
- #include <string.h>
27
- /* do we need #include <math.h>? conflicts with INFINITY in unicode.h */
28
- #include "gocr.h"
29
- #include "pgm2asc.h"
30
-
31
- /* for sorting letters by position on the image
32
- / ToDo: - use function same line like this or include lines.m1 etc. */
33
- int box_gt(struct box *box1, struct box *box2) {
34
- // box1 after box2 ?
35
- if (box1->line > box2->line)
36
- return 1;
37
- if (box1->line < box2->line)
38
- return 0;
39
- if (box1->x0 > box2->x1) // before
40
- return 1;
41
- if (box1->x1 < box2->x0) // before
42
- return 0;
43
- if (box1->x0 > box2->x0) // before, overlapping!
44
- return 1;
45
-
46
- return 0;
47
- }
48
-
49
- /* --- copy part of pix p into new pix b ---- len=10000
50
- * Returns: 0 on success, 1 on error.
51
- * naming it as copybox isnt very clever, because it dont have to do with the
52
- * char boxes (struct box)
53
- */
54
- int copybox (pix * p, int x0, int y0, int dx, int dy, pix * b, int len) {
55
- int x, y;
56
-
57
- /* test boundaries */
58
- if (b->p == NULL || dx < 0 || dy < 0 || dx * dy > len) {
59
- fprintf(stderr, " error-copybox x=%5d %5d d=%5d %5d\n", x0, y0, dx, dy);
60
- return 1;
61
- }
62
-
63
- b->x = dx;
64
- b->y = dy;
65
- b->bpp = 1;
66
- #ifdef FASTER_INCOMPLETE
67
- for (y = 0; y < dy; y++)
68
- memcpy(&pixel_atp(b, 0, y), &pixel_atp(p, x0, y + y0 ), dx);
69
- // and unmark pixels
70
- #else
71
- for (y = 0; y < dy; y++)
72
- for (x = 0; x < dx; x++)
73
- pixel_atp(b, x, y) = getpixel(p, x + x0, y + y0);
74
- #endif
75
-
76
- return 0;
77
- }
78
-
79
- /* reset table of alternative chars (and free memory) */
80
- int reset_box_ac(struct box *box){
81
- int i;
82
- for (i=0; i<box->num_ac; i++)
83
- if (box->tas[i]) {
84
- /* fprintf(stderr,"DBG free_s[%d] %p %s\n",i,box->tas[i],box->tas[i]); */
85
- free(box->tas[i]);
86
- box->tas[i]=0; /* prevent double freeing */
87
- }
88
- box->num_ac=0; /* mark as freed */
89
- return 0;
90
- }
91
-
92
- /* ini or copy a box: get memory for box and initialize the memory */
93
- struct box *malloc_box (struct box *inibox) {
94
- struct box *buf;
95
- int i;
96
-
97
- buf = (struct box *) malloc(sizeof(struct box));
98
- if (!buf)
99
- return NULL;
100
- if (inibox) {
101
- memcpy(buf, inibox, sizeof(struct box));
102
- /* only pointer are copied, we want to copy the contents too */
103
- for (i=0;i<inibox->num_ac;i++) {
104
- if (inibox->tas[i]) {
105
- buf->tas[i]=(char *)malloc(strlen(inibox->tas[i])+1);
106
- memcpy(buf->tas[i], inibox->tas[i], strlen(inibox->tas[i])+1);
107
- }
108
- }
109
- }
110
- else { /* ToDo: init it */
111
- buf->num_ac=0;
112
- buf->num_frames=0;
113
- }
114
- /* fprintf(stderr,"\nDBG ini_box %p",buf); */
115
- return buf;
116
- }
117
-
118
- /* free memory of box */
119
- int free_box (struct box *box) {
120
- if (!box) return 0;
121
- /* fprintf(stderr,"DBG free_box %p\n",box); out_x(box); */
122
- reset_box_ac(box); /* free alternative char table */
123
- free(box); /* free the box memory */
124
- return 0;
125
- }
126
-
127
- /* simplify the vectorgraph,
128
- * but what is the best way?
129
- * a) melting two neighbouring vectors with nearly same direction?
130
- * (nearest angle to pi)
131
- * b) melting three neigbours with smallest area?
132
- * ToDo:
133
- * mode = 0 - only lossless
134
- * mode = 1 - reduce one vector, smallest possible loss
135
- * mode = 2 - remove jitter (todo, or somewhere else)
136
- * ToDo: include also loop around (last - first element)
137
- * ToDo: reduce by 10..50%
138
- */
139
- int reduce_vectors ( struct box *box1, int mode ) {
140
- int i1, i2, nx, ny, mx, my, len,
141
- minlen=1024, /* minlength of to neighbouring vectors */
142
- besti1=0, /* frame for best reduction */
143
- besti2=2; /* vector replacing its predecessor */
144
- double sprod, maxsprod=-1;
145
- if (mode!=1) fprintf(stderr,"ERR not supported yet, ToDo\n");
146
- for (i2=1,i1=0; i1<box1->num_frames; i1++) { /* every frame */
147
- for (;i2<box1->num_frame_vectors[i1]-1; i2++) { /* every vector */
148
- /* predecessor n */
149
- nx = box1->frame_vector[i2-0][0] - box1->frame_vector[i2-1][0];
150
- ny = box1->frame_vector[i2-0][1] - box1->frame_vector[i2-1][1];
151
- /* successor m */
152
- mx = box1->frame_vector[i2+1][0] - box1->frame_vector[i2-0][0];
153
- my = box1->frame_vector[i2+1][1] - box1->frame_vector[i2-0][1];
154
- /* angle is w = a*b/(|a|*|b|) = 1 means parallel */
155
- /* normalized: minimize w^2 = (a*b/(|a|*|b|)-1)^2 */
156
- /* -1=90grd, 0=0grd, -2=180grd */
157
- sprod = /* fabs */(abs(nx*mx+ny*my)*(nx*mx+ny*my)
158
- /(1.*(nx*nx+ny*ny)*(mx*mx+my*my))-1);
159
- /* we dont include math.h because INFINITY conflicts to unicode,h */
160
- if (sprod<0) sprod=-sprod;
161
- len = (mx*mx+my*my)*(nx*nx+ny*ny); /* sum lengths^2 */
162
- // ..c ###c ... .. ...
163
- // .b. len=2+2 #b.. len=2+5 #bc len=1+2 bc len=1+1 b#a len=4+5
164
- // a.. spr=0 a... spr=1/10 a.. spr=1/4 a. spr=1 ##c spr=9/5
165
- //
166
- if ( len* sprod* sprod* sprod* sprod
167
- <minlen*maxsprod*maxsprod*maxsprod*maxsprod
168
- || maxsprod<0) /* Bad! ToDo! */
169
- { maxsprod=sprod; besti1=i1; besti2=i2; minlen=len; }
170
- }
171
- }
172
- if (box1->num_frames>0)
173
- for (i2=besti2; i2<box1->num_frame_vectors[ box1->num_frames-1 ]-1; i2++) {
174
- box1->frame_vector[i2][0]=box1->frame_vector[i2+1][0];
175
- box1->frame_vector[i2][1]=box1->frame_vector[i2+1][1];
176
- }
177
- for (i1=besti1; i1<box1->num_frames; i1++)
178
- box1->num_frame_vectors[i1]--;
179
- // fprintf(stderr,"\nDBG_reduce_vectors i= %d nv= %d sprod=%f len2=%d\n# ...",
180
- // besti2,box1->num_frame_vectors[ box1->num_frames-1 ],maxsprod,minlen);
181
- // out_x(box1);
182
- return 0;
183
- }
184
-
185
- /* add the contents of box2 to box1
186
- * especially add vectors of box2 to box1
187
- */
188
- int merge_boxes( struct box *box1, struct box *box2 ) {
189
- int i1, i2, i3, i4;
190
- struct box tmpbox, *bsmaller, *bbigger; /* for mixing and sorting */
191
- /* DEBUG, use valgrind to check uninitialized memory */
192
- #if 0
193
- fprintf(stderr,"\nDBG merge_boxes_input:"); out_x(box1); out_x(box2);
194
- #endif
195
- /* pair distance is to expendable, taking borders is easier */
196
- if ((box2->x1 - box2->x0)*(box2->y1 - box2->y0)
197
- >(box1->x1 - box1->x0)*(box1->y1 - box1->y0)) {
198
- bbigger=box2; bsmaller=box1; }
199
- else {
200
- bbigger=box1; bsmaller=box2; }
201
- /* ToDo: does not work if a third box is added */
202
- if (box2->y0>box1->y1 || box2->y1<box1->y0
203
- || box2->x0>box1->x1 || box2->x1<box1->x0) {
204
- box1->num_boxes += box2->num_boxes; /* num seperate objects 2=ij */
205
- } else {
206
- if (box2->num_boxes>box1->num_boxes) box1->num_boxes=box2->num_boxes;
207
- box1->num_subboxes += box2->num_subboxes+1; /* num holes 1=abdepq 2=B */
208
- }
209
- box1->dots += box2->dots; /* num i-dots */
210
- if ( box2->x0 < box1->x0 ) box1->x0 = box2->x0;
211
- if ( box2->x1 > box1->x1 ) box1->x1 = box2->x1;
212
- if ( box2->y0 < box1->y0 ) box1->y0 = box2->y0;
213
- if ( box2->y1 > box1->y1 ) box1->y1 = box2->y1;
214
- i1 = i2 = 0;
215
- if (bbigger->num_frames)
216
- i1 = bbigger->num_frame_vectors[ bbigger->num_frames - 1 ];
217
- if (bsmaller->num_frames)
218
- i2 = bsmaller->num_frame_vectors[ bsmaller->num_frames - 1 ];
219
- while (i1+i2 > MaxFrameVectors) {
220
- if (i1>i2) { reduce_vectors( bbigger, 1 ); i1--; }
221
- else { reduce_vectors( bsmaller, 1 ); i2--; }
222
- }
223
- /* if i1+i2>MaxFrameVectors simplify the vectorgraph */
224
- /* if sum num_frames>MaxNumFrames through shortest graph away and warn */
225
- /* first copy the bigger box */
226
- memcpy(&tmpbox, bbigger, sizeof(struct box));
227
- /* attach the smaller box */
228
- for (i4=i3=0; i3<bsmaller->num_frames; i3++) {
229
- if (tmpbox.num_frames>=MaxNumFrames) break;
230
-
231
- for (; i4<bsmaller->num_frame_vectors[i3]; i4++) {
232
- memcpy(tmpbox.frame_vector[i1],
233
- bsmaller->frame_vector[i4],2*sizeof(int));
234
- i1++;
235
- }
236
- tmpbox.num_frame_vectors[ tmpbox.num_frames ] = i1;
237
- tmpbox.frame_vol[ tmpbox.num_frames ] = bsmaller->frame_vol[ i3 ];
238
- tmpbox.frame_per[ tmpbox.num_frames ] = bsmaller->frame_per[ i3 ];
239
- tmpbox.num_frames++;
240
- if (tmpbox.num_frames>=MaxNumFrames) {
241
- if (JOB->cfg.verbose)
242
- fprintf(stderr,"\nDBG merge_boxes MaxNumFrames reached");
243
- break;
244
- }
245
- }
246
- /* copy tmpbox to destination */
247
- box1->num_frames = tmpbox.num_frames;
248
- memcpy(box1->num_frame_vectors,
249
- tmpbox.num_frame_vectors,sizeof(int)*MaxNumFrames);
250
- memcpy(box1->frame_vol,
251
- tmpbox.frame_vol,sizeof(int)*MaxNumFrames);
252
- memcpy(box1->frame_per,
253
- tmpbox.frame_per,sizeof(int)*MaxNumFrames);
254
- memcpy(box1->frame_vector,
255
- tmpbox.frame_vector,sizeof(int)*2*MaxFrameVectors);
256
- #if 0
257
- if (JOB->cfg.verbose)
258
- fprintf(stderr,"\nDBG merge_boxes_result:"); out_x(box1);
259
- #endif
260
- return 0;
261
- }
262
-
263
- /* used for division of glued chars
264
- * after a box is splitted into 2, where vectors are copied to both,
265
- * vectors outside the new box are cutted and thrown away,
266
- * later replaced by
267
- * - 1st remove outside vectors with outside neighbours (complete frames?)
268
- * add vector on outside vector with inside neighbours
269
- * care about connections through box between outside vectors
270
- * - 2nd reduce outside crossings (inclusive splitting frames if necessary)
271
- * depending on direction (rotation) of outside connections
272
- * - 3th shift outside vectors to crossing points
273
- * - split add this points, connect only in-out...out-in,
274
- * - cutting can result in more objects
275
- * ToDo:
276
- * dont connect --1---2--------3----4-- new-y1 (inside above not drawn)
277
- * \ \->>>>-/ / outside
278
- * \----<<<<-----/ old-y1
279
- * |======| subtractable?
280
- *
281
- * only connect --1---2--------3----4-- new-y1
282
- * \>>/ \>>>/ old-y1 outside
283
- * ToDo: what about cutting 2 frames (example: 2fold melted MN)
284
- * better restart framing algo?
285
- *
286
- * ToDo: new vol, per
287
- */
288
- int cut_box( struct box *box1) {
289
- int i1, i2, i3, i4, x, y, lx, ly, dbg=0;
290
- if (JOB->cfg.verbose) dbg=1; // debug level, enlarge to get more output
291
- if (dbg) fprintf(stderr,"\n cut box x= %3d %3d", box1->x0, box1->y0);
292
- /* check if complete frames are outside the box */
293
- for (i1=0; i1<box1->num_frames; i1++){
294
- if (dbg>2) fprintf(stderr,"\n checking frame %d outside", i1);
295
- i2 = ((i1)?box1->num_frame_vectors[ i1-1 ]:0); // this frame
296
- i3 = box1->num_frame_vectors[ i1 ]; // next frame
297
- for (i4=i2; i4 < i3; i4++) {
298
- x = box1->frame_vector[i4][0];
299
- y = box1->frame_vector[i4][1];
300
- /* break, if one vector is lying inside */
301
- if (x>=box1->x0 && x<=box1->x1 && y>=box1->y0 && y<=box1->y1) break;
302
- }
303
- if (i4==i3) { /* all vectors outside */
304
- if (dbg>1) fprintf(stderr,"\n remove frame %d",i1);
305
- /* replace all frames i1,i1+1,... by i1+1,i1+2,... */
306
- /* replace (x,y) pairs first */
307
- for (i4=i2; i4<box1->num_frame_vectors[ box1->num_frames-1 ]-(i3-i2);
308
- i4++) {
309
- box1->frame_vector[i4][0] = box1->frame_vector[i4+i3-i2][0];
310
- box1->frame_vector[i4][1] = box1->frame_vector[i4+i3-i2][1];
311
- }
312
- /* replace the num_frame_vectors */
313
- for (i4=i1; i4<box1->num_frames-1; i4++)
314
- box1->num_frame_vectors[ i4 ] =
315
- box1->num_frame_vectors[ i4+1 ]-(i3-i2);
316
- box1->num_frames--; i1--;
317
- }
318
- }
319
- /* remove vectors outside the box */
320
- i3=0;
321
- for (i1=0; i1<box1->num_frames; i1++){
322
- if (dbg>2) fprintf(stderr,"\n check cutting vectors on frame %d", i1);
323
- x = box1->frame_vector[0][0]; /* last x */
324
- y = box1->frame_vector[0][1]; /* last y */
325
- /* ToDo: start inside to get a closed object */
326
- if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) i3=1;
327
- for (i2=0; i2<box1->num_frame_vectors[ i1 ]; i2++) {
328
- lx = x; /* last x */
329
- ly = y; /* last y */
330
- x = box1->frame_vector[i2][0];
331
- y = box1->frame_vector[i2][1];
332
- // fprintf(stderr,"DBG LEV3 i2= %3d xy= %3d %3d",i2,x,y);
333
- /* check if outside */
334
- if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) {
335
- /* replace by nearest point at border, ToDo: better crossingpoint */
336
- if (i3==0) { /* wrong if it starts outside */
337
- if (x < box1->x0) x = box1->frame_vector[i2][0] = box1->x0;
338
- if (x > box1->x1) x = box1->frame_vector[i2][0] = box1->x1;
339
- if (y < box1->y0) y = box1->frame_vector[i2][1] = box1->y0;
340
- if (y > box1->y1) y = box1->frame_vector[i2][1] = box1->y1;
341
- } else {
342
- /* remove vector */
343
- if (dbg>1) fprintf(stderr,"\n remove vector[%d][%d] x= %2d %2d",i1,i2,x-box1->x0,y-box1->y0);
344
- for (i4=i2;i4<box1->num_frame_vectors[ box1->num_frames-1 ]-1;i4++) {
345
- box1->frame_vector[i4][0] = box1->frame_vector[i4+1][0];
346
- box1->frame_vector[i4][1] = box1->frame_vector[i4+1][1];
347
- }
348
- for (i4=i1; i4<box1->num_frames; i4++)
349
- box1->num_frame_vectors[ i4 ]--;
350
- i2--; /* next element is shiftet now, setting back the counter */
351
- }
352
- i3++;
353
- // fprintf(stderr," outside i3= %d\n",i3);
354
- continue;
355
- }
356
- // fprintf(stderr," inside i3= %d",i3);
357
- if (i3) { /* ToDo: better crossing point last vector and border */
358
- if (lx < box1->x0) lx = box1->x0;
359
- if (lx > box1->x1) lx = box1->x1;
360
- if (ly < box1->y0) ly = box1->y0;
361
- if (ly > box1->y1) ly = box1->y1;
362
- x = box1->frame_vector[i2][0] = lx;
363
- y = box1->frame_vector[i2][1] = ly;
364
- i3 = 0;
365
- }
366
- // fprintf(stderr," xy= %3d %3d\n",x,y);
367
- }
368
- }
369
- if (dbg>2) { fprintf(stderr,"\nDBG cut_box_result:"); out_x(box1); }
370
- return 0;
371
- }
372
-
@@ -1,462 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2009 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL address
20
- */
21
-
22
- #include <stdio.h>
23
- #include <stdlib.h>
24
- #include "gocr.h"
25
- #include "pnm.h"
26
- #include "pgm2asc.h"
27
- #include <string.h>
28
- #include <time.h>
29
-
30
- #define Blen 256
31
-
32
- // load boxes from database into boxlist (for faster access)
33
- // used as alternate engine, comparing chars with database
34
- int load_db(void) {
35
- FILE *f1;
36
- char s1[Blen+1],
37
- s2[Blen+1] = "./db/", /* ToDo: replace by constant! by configure */
38
- *s3;
39
- int i, j, ii, i2, line;
40
- struct box *box1;
41
- pix *pp;
42
-
43
- if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
44
- i2=strlen(s2);
45
- if (JOB->cfg.verbose)
46
- fprintf(stderr, "# load database %s %s ... ",s2,JOB->cfg.db_path);
47
-
48
- strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
49
- f1 = fopen(s2, "r");
50
- if (!f1) {
51
- fprintf(stderr, " DB %s not found\n",s2);
52
- return 1;
53
- }
54
-
55
- line = 0; /* line counter for better error report */
56
- for (ii = 0; !feof(f1); ii++) {
57
- /* bbg: should write a better input routine */
58
- if (!fgets(s1, Blen, f1)) break; line++;
59
- j = strlen(s1);
60
- /* remove carriage return sequences from line */
61
- while (j > 0 && (s1[j - 1] == '\r' || s1[j - 1] == '\n'))
62
- s1[--j] = 0;
63
- if (!j) continue; /* skip empty line */
64
- if (s1[0]=='#') continue; /* skip comments (v0.44) */
65
- /* copy file name */
66
- for (i = 0; i < j && i+i2 < Blen && strchr(" \t,;",s1[i]) == 0; i++)
67
- s2[i2 + i] = s1[i];
68
- s2[i2+i]=0;
69
- /* skip spaces */
70
- for (; i < j && strchr(" \t",s1[i]) != 0; i++);
71
- /* by now: read pix, fill box, goto next ??? */
72
- pp = (pix *)malloc(sizeof(pix));
73
- if( !pp ) fprintf(stderr,"malloc error in load_db pix\n");
74
-
75
- // if (JOB->cfg.verbose) fprintf(stderr,"\n# readpgm %s ",s2);
76
- if (readpgm(s2, pp, 0 * JOB->cfg.verbose)!=0) {
77
- fprintf(stderr,"\ndatabase error: readpgm %s\n", s2);
78
- exit(-1);
79
- }
80
-
81
- box1 = (struct box *)malloc_box(NULL);
82
- if(!box1) fprintf(stderr,"malloc error in load_db box1\n");
83
- box1->x0 = 0;
84
- box1->x1 = pp->x-1; // white border 1 pixel width
85
- box1->y0 = 0;
86
- box1->y1 = pp->y-1;
87
- box1->x = 1;
88
- box1->y = 1;
89
- box1->dots = 0;
90
- box1->c = 0;
91
- box1->modifier = 0; /* ToDo: obsolete */
92
- box1->tas[0]=NULL;
93
- box1->tac[0]=0;
94
- box1->wac[0]=100; /* really 100% sure? */
95
- box1->num_ac=1;
96
- if (s1[i]=='"'){ /* parse a string */
97
- j=strrchr(s1+i+1,'"')-(s1+i+1); /* we only look for first and last "" */
98
- if (j>=1) {
99
- s3=(char *)malloc(j+1);
100
- if (!s3) fprintf (stderr, "malloc error in load_db s3\n");
101
- if (s3) {
102
- memcpy(s3,s1+i+1,j);
103
- s3[j]=0;
104
- box1->tas[0]=s3;
105
- // fprintf(stderr,"\nstring=%s",s3);
106
- }
107
- } else { fprintf(stderr,"load_db: string parse error L%d\n",line); }
108
- } else {
109
- box1->tac[0] = box1->c = s1[i]; /* try to interpret as ASCII */
110
- /* we can live without hexcode in future if we use UTF8-strings */
111
- s3=s1+i;
112
- j=strtol( s1+i, &s3, 16); /* try to read 4 to 8 digit hex unicode */
113
- /* if its an hexcode, ASCII interpretation is overwritten */
114
- if( j && i+3<=Blen && s3-s1-i>3 ) box1->tac[0] = box1->c = j;
115
- // fprintf(stderr,"\nhexcode=%04x=%04x %d",(int)j,(int)box1->c,s3-s1-i);
116
- }
117
- box1->num = 0;
118
- box1->line = -1;
119
- box1->m1 = 0; /* ToDo: should be given too in the database! */
120
- box1->m2 = 0;
121
- box1->m3 = 0;
122
- box1->m4 = 0;
123
- box1->p = pp;
124
- list_app(&JOB->tmp.dblist, box1); // append to list
125
- #if 0
126
- out_x(box1);
127
- #endif
128
- }
129
- fclose(f1);
130
- if (JOB->cfg.verbose)
131
- fprintf(stderr, " %d chars loaded\n", ii);
132
- return 0;
133
- }
134
-
135
- // expand database from box/boxlist name=db_$utime.pbm
136
- // this is added in version v0.3.3
137
- int store_db(struct box *box1) {
138
- FILE *f1;
139
- char s2[Blen+1] = "./db/", s3[Blen+1];
140
- int i2, dx, dy;
141
- unsigned c_out;
142
- pix b; /* temporary mini page */
143
-
144
- if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
145
- i2=strlen(s2);
146
-
147
- /* add (first) char and time to the file name for better debugging */
148
-
149
- /* decide between 7bit ASCII and UTF8-char or string */
150
- c_out = ((box1->num_ac && box1->tas[0]) ?
151
- (unsigned char )box1->tas[0][0] /* char */ :
152
- box1->c /* wchar */);
153
- /* (unsigned int)(( char)0x80) = 0xffffff80 */
154
- /* (unsigned int)((unsigned char)0x80) = 0x00000080 */
155
-
156
- /* name generation can cause problems, if called twice within a second */
157
- sprintf(s3,"db_%04x_%08lx.pbm", c_out, (unsigned long)time(NULL));
158
- /* ToDo: the file name may be not unique */
159
-
160
- strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
161
- f1 = fopen(s2, "a");
162
- if (!f1) {
163
- fprintf(stderr, " could not access %s\n",s2);
164
- return 1;
165
- }
166
- strncpy(s2+i2,s3,strlen(s3)); s2[i2+strlen(s3)]=0;
167
- /* store image and infos about the char */
168
- /* ToDo: store the vector list instead of the pixelarray */
169
-
170
- if (JOB->cfg.verbose)
171
- fprintf(stderr, "store_db: add file %s to database (nac=%d c=%04x)"
172
- "\n#",s3, box1->num_ac, c_out);
173
-
174
- dx=box1->x1-box1->x0+1;
175
- dy=box1->y1-box1->y0+1;
176
- b.p = (unsigned char *) malloc( dx * dy );
177
- if( !b.p ){
178
- fprintf( stderr, "\nFATAL: malloc failed, skip store_db" );
179
- return 2;
180
- }
181
- if (copybox(box1->p, box1->x0, box1->y0, dx, dy, &b, dx * dy))
182
- return -1;
183
-
184
- writepbm(s2,&b); /* What is to do on error? */
185
- free(b.p);
186
-
187
- /* store the database line */
188
- /* some infos about box1->m1,..,m4 should added (base line, high etc.) */
189
- if (box1->num_ac && box1->tas[0]) {
190
- fprintf(f1, "%s \"%s\"\n",s3,box1->tas[0]);
191
- /* ToDo: what if tas contains '"'? */
192
- } else {
193
- if( (box1->c >= '0' && box1->c <= '9')
194
- || (box1->c >= 'A' && box1->c <= 'Z')
195
- || (box1->c >= 'a' && box1->c <= 'z') )
196
- fprintf(f1, "%s %c\n",s3,(char)box1->c);
197
- else {
198
- if (((box1->c)>>16)>>16)
199
- fprintf(f1, "%s %08x\n",s3,(unsigned int)box1->c);
200
- else
201
- fprintf(f1, "%s %04x\n",s3,(unsigned int)box1->c);
202
- }
203
- }
204
- fclose(f1);
205
- return 0;
206
- }
207
-
208
- /* function is only for user prompt on console to identify chars
209
- it prints out a part of pixmap b at point x0,y0 to stderr
210
- using dots .,; if no pixel, and @xoO for pixels
211
- */
212
- void out_env(struct box *px ){
213
- int x0,y0,x1,y1,dx,dy,x,y,x2,y2,yy0,tx,ty,i,cs;
214
- char c1, c2; pix *b;
215
- cs=JOB->cfg.cs;
216
- yy0=px->y0;
217
- { /* overwrite rest of arguments */
218
- b=px->p;
219
- x0=px->x0; x1=px->x1; dx=x1-x0+1;
220
- y0=px->y0; y1=px->y1; dy=y1-y0+1;
221
- y0-=2; y1+=2;
222
- if (px->m4 && y0>px->m1) y0=px->m1;
223
- if (px->m4 && y1<px->m4) y1=px->m4;
224
- if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
225
- if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
226
- if (x1-x0+1<62) { x0-=5; x1+=5; }
227
- if (y1-y0+1<10) { y0-= 4; y1+= 4; } /* fragment? */
228
- if (x0<0) x0=0; if (x1>=b->x) x1=b->x-1;
229
- if (y0<0) y0=0; if (y1>=b->y) y1=b->y-1;
230
- dx=x1-x0+1;
231
- dy=y1-y0+1; yy0=y0;
232
- fprintf(stderr,"\n# show box + environment");
233
- fprintf(stderr,"\n# show box x= %4d %4d d= %3d %3d r= %d %d",
234
- px->x0, px->y0, px->x1 - px->x0 + 1, px->y1 - px->y0 + 1,
235
- px->x - px->x0, px->y - px->y0);
236
- if (px->num_ac){ /* output table of chars and its probabilities */
237
- fprintf(stderr,"\n# list box char: ");
238
- for(i=0;i<px->num_ac && i<NumAlt;i++)
239
- /* output the (xml-)string (picture position, barcodes, glyphs, ...) */
240
- if (px->tas[i])
241
- fprintf(stderr," %s(%d)", px->tas[i] ,px->wac[i]);
242
- else
243
- fprintf(stderr," %s(%d)",decode(px->tac[i],ASCII),px->wac[i]);
244
- }
245
- fprintf(stderr,"\n");
246
- if (px->dots && px->m2 && px->m1<y0) { yy0=px->m1; dy=px->y1-yy0+1; }
247
- }
248
- tx=dx/80+1;
249
- ty=dy/40+1; // step, usually 1, but greater on large maps
250
- fprintf(stderr,"# show pattern x= %4d %4d d= %3d %3d t= %d %d\n",
251
- x0,y0,dx,dy,tx,ty);
252
- if (dx>0)
253
- for(y=yy0;y<yy0+dy;y+=ty) { /* reduce the output to max 78x40 */
254
-
255
- /* image is the boxframe + environment in the original bitmap */
256
- for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
257
- c1='.';
258
- for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
259
- for(x2=x;x2<x+tx && x2<x0+dx;x2++)
260
- { if((getpixel(b,x2,y2)<cs)) c1='#'; }
261
- // show pixels outside the box thinner/weaker
262
- if (x+tx-1 < px->x0 || x > px->x1
263
- || y+ty-1 < px->y0 || y > px->y1) c1=((c1=='#')?'O':',');
264
- fprintf(stderr,"%c", c1 );
265
- }
266
-
267
- c1=c2=' ';
268
- /* mark lines with < */
269
- if (px) if (y==px->m1 || y==px->m2 || y==px->m3 || y==px->m4) c1='<';
270
- if (y==px->y0 || y==px->y1) c2='-'; /* boxmarks */
271
- fprintf(stderr,"%c%c\n",c1,c2);
272
- }
273
- }
274
-
275
-
276
- /*
277
- // second variant, for database (with slightly other behaviour)
278
- // new variant
279
- // look at the environment of the pixel too (contrast etc.)
280
- // detailed analysis only of diff pixels!
281
- //
282
- // 100% * distance, 0 is best fit
283
- // = similarity of 2 chars for recognition of noisy chars
284
- // weigth of pixels with only one same neighbour set to 0
285
- // look at contours too!
286
- ToDo: especially on small boxes distance should only be 0 if
287
- characters are 100% identical!
288
- */
289
- // #define DEBUG 2
290
- int distance2( pix *p1, struct box *box1,
291
- pix *p2, struct box *box2, int cs){
292
- int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,
293
- x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2,tx,ty;
294
- #if DEBUG == 2
295
- if(JOB->cfg.verbose)
296
- fprintf(stderr," DEBUG: distance2\n");
297
- #endif
298
- x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
299
- dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);dx=dx1;
300
- dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);dy=dy1;
301
- if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) rbad++; // how to weight?
302
- // compare relations to baseline and upper line
303
- if(box1->m4>0 && box2->m4>0){ // used ???
304
- if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
305
- if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
306
- }
307
- tx=dx/16; if(dx<17)tx=1; // raster
308
- ty=dy/32; if(dy<33)ty=1;
309
- // compare pixels
310
- for( y=0;y<dy;y+=ty )
311
- for( x=0;x<dx;x+=tx ) { // try global shift too ???
312
- v1=((getpixel(p1,x1+x*dx1/dx,y1+y*dy1/dy)<cs)?1:0); i1=8; // better gray?
313
- v2=((getpixel(p2,x2+x*dx2/dx,y2+y*dy2/dy)<cs)?1:0); i2=8; // better gray?
314
- if(v1==v2) { rgood+=16; continue; } // all things are right!
315
- // what about different pixel???
316
- // test overlapp of surounding pixels ???
317
- v1=1; rbad+=4;
318
- v1=-1;
319
- for(i1=-1;i1<2;i1++)
320
- for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
321
- if( ((getpixel(p1,x1+x*dx1/dx+i1*(1+dx1/32),y1+y*dy1/dy+i2*(1+dy1/32))<cs)?1:0)
322
- !=((getpixel(p2,x2+x*dx2/dx+i1*(1+dx2/32),y2+y*dy2/dy+i2*(1+dy2/32))<cs)?1:0) ) v1++;
323
- }
324
- if(v1>0)
325
- rbad+=16*v1;
326
- }
327
- if(rgood+rbad) rc= 100*rbad/(rgood+rbad); else rc=99;
328
- /* if width/high is not correct add badness */
329
- rc += ( abs(dx1*dy2-dx2*dy1) * 10 ) / (dy1*dy2);
330
- if (rc>100) rc=100;
331
- if(/* rc<10 && */ JOB->cfg.verbose /* &1024 */){
332
- #if DEBUG == 2
333
- fprintf(stderr," distance2 rc=%d rgood=%d rbad=%d\n",rc,rgood,rbad);
334
- // out_b(NULL,p1,box1->x0,box1->y0,box1->x1-box1->x0+1,
335
- // box1->y1-box1->y0+1,cs);
336
- // out_b(NULL,p2,box2->x0,box2->y0,box2->x1-box2->x0+1,
337
- // box2->y1-box2->y0+1,cs);
338
- out_x(box1);
339
- out_x(box2);
340
- #endif
341
- }
342
- return rc;
343
- }
344
-
345
- wchar_t ocr_db(struct box *box1) {
346
- int dd = 1000, dist = 1000;
347
- wchar_t c = UNKNOWN;
348
- unsigned char buf[200]; /* Oct08 JS: add unsigned to avoid UTF problems */
349
- Box *box2, *box3;
350
-
351
- if (!list_empty(&JOB->tmp.dblist)){
352
- box3 = (Box *)list_get_header(&JOB->tmp.dblist);
353
- if(JOB->cfg.verbose)
354
- fprintf(stderr,"\n#DEBUG: ocr_db (%d,%d) ",box1->x0, box1->y0);
355
-
356
- for_each_data(&JOB->tmp.dblist) {
357
- box2 = (Box *)list_get_current(&JOB->tmp.dblist);
358
- /* do preselect!!! distance() slowly */
359
- dd = distance2( box2->p, box2, box1->p, box1, JOB->cfg.cs);
360
- if (dd <= dist) { /* new best fit */
361
- dist = dd;
362
- box3 = box2; /* box3 is a pointer and not copied box2 */
363
-
364
- if (dist<100 && 100-dist >= JOB->cfg.certainty) {
365
- /* some deviation of the pattern is tolerated */
366
- int i, wa;
367
- for (i=0;i<box3->num_ac;i++) {
368
- wa = (100-dist)*box3->wac[i]/100; /* weight *= (100-dist) */
369
- if (box3->tas[i]) setas(box1,box3->tas[i],wa);
370
- else setac(box1,box3->tac[i],wa);
371
- }
372
- if (box3->num_ac) c=box3->tac[0]; /* 0 for strings (!UNKNOWN) */
373
- if (JOB->cfg.verbose)
374
- fprintf(stderr, " dist=%4d c= %c 0x%02x %s wc= %3d", dist,
375
- ((box3->c>32 && box3->c<127) ? (char) box3->c : '.'),
376
- (int)box3->c, ((box3->tas[0])?box3->tas[0]:""), box3->wac[0]);
377
- }
378
- if (dd<=0 && ((box3->num_ac && box3->tas[0]) || box3->c >= 128
379
- || !strchr ("l1|I0O", box3->c)))
380
- break; /* speedup if found */
381
- }
382
- } end_for_each(&JOB->tmp.dblist);
383
-
384
- }
385
-
386
- if( (JOB->cfg.mode&128) != 0 && c == UNKNOWN ) { /* prompt the user */
387
- /* should the output go to stderr or special pipe??? */
388
- int utf8_ok=0; /* trigger this flag if input is ok */
389
- int i, endchar; /* index */
390
- out_env(box1); /* old: out_x(box1); */
391
- fprintf(stderr,"The above pattern was not recognized.\n"
392
- "Enter UTF8 char or string for above pattern. Leave empty if unsure.\n"
393
- "Press RET at the end (ALT+RET to store into RAM only) : "
394
- ); /* ToDo: empty + alt-return (0x1b 0x0a) for help? ^a for skip all */
395
- /* UTF-8 (man 7 utf-8):
396
- * 7bit = 0xxxxxxx (0000-007F)
397
- * 11bit = 110xxxxx 10xxxxxx (0080-07FF)
398
- * 16bit = 1110xxxx 10xxxxxx 10xxxxxx (0800-FFFF)
399
- * 21bit = 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
400
- * 26bit = 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
401
- * 31bit = 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
402
- */
403
- buf[0]=0;
404
- /* shift/ctrl/altgr-enter acts like enter or ^j or ^m,
405
- * alt-enter returns 0x1b 0x0a and returns from fgets()
406
- * ^d (EOF) returns (nil) from fgets()
407
- * x+(2*)ctrl-d returns from fgets() without returning a 0x0a
408
- * if not UTF-input-mode, we are in trouble?
409
- * ^a=0x01, ^b=0x02, ^e=05, ..., ToDo: meaning of no-input or <=space
410
- */
411
- fgets((char *)buf,200,stdin); /* including \n=0x0a */
412
- dd=strlen((char *)buf);
413
- /* output hexcode if verbose set */
414
- if (JOB->cfg.verbose) {
415
- fprintf(stderr, "\n# fgets [%d]:", dd);
416
- for(i=0; i<dd; i++)
417
- fprintf(stderr, " %02x", (unsigned)((unsigned char)buf[i]));
418
- fprintf(stderr, "\n#");
419
- }
420
- /* we dont accept chars which could destroy database file */
421
- for (i=0; i<dd; i++) if (buf[i]<32) break; /* need unsigned char here */
422
- endchar=buf[i]; /* last char is 0x0a (ret) 0x00 (EOF) or 0x1b (alt+ret) */
423
- if (endchar==0x01) { i=0;JOB->cfg.mode&=~128; } /* skip all */
424
- buf[dd=i]=0; /* replace final 0x0a or other special codes */
425
- if (dd==1 && !(buf[0]&128)) { c=buf[0]; utf8_ok=1; } /* single char */
426
- if (dd>1 && dd<7) { /* try to decode single wide char (utf8) */
427
- int u0, u1; /* define UTF8-start sequences, u0=0bits u1=1bits */
428
- u0= 1<<(7-dd); /* compute start byte from UTF8-length */
429
- u1=255&~((1<<(8-dd))-1);
430
- /* count number of following 10xxxxxx bytes to i */
431
- for (i=1;i<dd;i++) if ((buf[i]&0xc0)!=0x80) break; /* 10xxxxxx */
432
- if (i==dd && (buf[0]&(u0|u1))==u1) { utf8_ok=1;
433
- c=buf[0]&(u0-1); /* 11..0x.. */
434
- for (i=1;i<dd;i++) { c<<=6; c|=buf[i]&0x3F; } /* 10xxxxxx */
435
- }
436
- }
437
- if (dd>0){ /* ToDo: skip space and tab too? */
438
- if (utf8_ok==1) { setac(box1, c, 100); } /* store single wchar */
439
- if (utf8_ok==0) { /* store a string of chars (UTF8-string) */
440
- c='_'; /* what should we do with c? probably a bad idea? */
441
- setas(box1, (char *)buf, 100);
442
- }
443
- /* decide between
444
- * 0) just help gocr to find the results and (dont remember, 0x01)
445
- * 1) help and remember in the same run (store to memory, 0x1b)
446
- * 2) expand the database (dont store ugly chars to the database!)
447
- */
448
- if (endchar!=0x01){ /* ^a before hit return */
449
- /* is there a reason to dont store to memory? */
450
- list_app(&JOB->tmp.dblist, box1); /* append to list for 1+2 */
451
- }
452
- if (endchar!=0x01 && endchar!=0x1b){
453
- store_db(box1); /* store to disk for 2 */
454
- }
455
- if (JOB->cfg.verbose)
456
- fprintf(stderr, " got char= %c 16bit= 0x%04x string= \"%s\"\n",
457
- ((c>32 && c<127)?(char)c:'.'), (int)c, buf);
458
- }
459
- }
460
-
461
- return c;
462
- }