isbn 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. data/{README → README.md} +5 -11
  2. data/Rakefile +20 -14
  3. data/isbn.gemspec +23 -0
  4. data/lib/isbn.rb +2 -0
  5. data/test/isbn_spec.rb +1 -1
  6. metadata +29 -316
  7. data/VERSION +0 -1
  8. data/src/gocr-0.48/.cvsignore +0 -6
  9. data/src/gocr-0.48/AUTHORS +0 -7
  10. data/src/gocr-0.48/BUGS +0 -55
  11. data/src/gocr-0.48/CREDITS +0 -17
  12. data/src/gocr-0.48/HISTORY +0 -243
  13. data/src/gocr-0.48/INSTALL +0 -83
  14. data/src/gocr-0.48/Makefile +0 -193
  15. data/src/gocr-0.48/Makefile.in +0 -193
  16. data/src/gocr-0.48/README +0 -165
  17. data/src/gocr-0.48/READMEde.txt +0 -80
  18. data/src/gocr-0.48/REMARK.txt +0 -18
  19. data/src/gocr-0.48/REVIEW +0 -538
  20. data/src/gocr-0.48/TODO +0 -65
  21. data/src/gocr-0.48/bin/.cvsignore +0 -2
  22. data/src/gocr-0.48/bin/create_db +0 -38
  23. data/src/gocr-0.48/bin/gocr.tcl +0 -527
  24. data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
  25. data/src/gocr-0.48/configure +0 -4689
  26. data/src/gocr-0.48/configure.in +0 -71
  27. data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
  28. data/src/gocr-0.48/doc/.cvsignore +0 -2
  29. data/src/gocr-0.48/doc/Makefile +0 -39
  30. data/src/gocr-0.48/doc/Makefile.in +0 -39
  31. data/src/gocr-0.48/doc/example.dtd +0 -53
  32. data/src/gocr-0.48/doc/example.xml +0 -21
  33. data/src/gocr-0.48/doc/examples.txt +0 -67
  34. data/src/gocr-0.48/doc/gocr.html +0 -578
  35. data/src/gocr-0.48/doc/unicode.txt +0 -57
  36. data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
  37. data/src/gocr-0.48/examples/4x6.png +0 -0
  38. data/src/gocr-0.48/examples/4x6.txt +0 -2
  39. data/src/gocr-0.48/examples/5x7.png +0 -0
  40. data/src/gocr-0.48/examples/5x7.png.txt +0 -2
  41. data/src/gocr-0.48/examples/5x8.png +0 -0
  42. data/src/gocr-0.48/examples/5x8.png.txt +0 -2
  43. data/src/gocr-0.48/examples/Makefile +0 -166
  44. data/src/gocr-0.48/examples/color.fig +0 -20
  45. data/src/gocr-0.48/examples/ex.fig +0 -16
  46. data/src/gocr-0.48/examples/font.tex +0 -22
  47. data/src/gocr-0.48/examples/font1.tex +0 -46
  48. data/src/gocr-0.48/examples/font2.fig +0 -27
  49. data/src/gocr-0.48/examples/font_nw.tex +0 -24
  50. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  51. data/src/gocr-0.48/examples/handwrt1.txt +0 -10
  52. data/src/gocr-0.48/examples/inverse.fig +0 -20
  53. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
  56. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-a.txt +0 -6
  58. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  59. data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
  60. data/src/gocr-0.48/examples/polish.tex +0 -28
  61. data/src/gocr-0.48/examples/rotate45.fig +0 -14
  62. data/src/gocr-0.48/examples/score +0 -36
  63. data/src/gocr-0.48/examples/text.tex +0 -28
  64. data/src/gocr-0.48/gpl.html +0 -537
  65. data/src/gocr-0.48/include/.cvsignore +0 -2
  66. data/src/gocr-0.48/include/config.h +0 -36
  67. data/src/gocr-0.48/include/config.h.in +0 -36
  68. data/src/gocr-0.48/include/version.h +0 -2
  69. data/src/gocr-0.48/install-sh +0 -3
  70. data/src/gocr-0.48/make.bat +0 -57
  71. data/src/gocr-0.48/man/.cvsignore +0 -2
  72. data/src/gocr-0.48/man/Makefile +0 -29
  73. data/src/gocr-0.48/man/Makefile.in +0 -29
  74. data/src/gocr-0.48/man/man1/gocr.1 +0 -166
  75. data/src/gocr-0.48/src/.cvsignore +0 -4
  76. data/src/gocr-0.48/src/Makefile +0 -132
  77. data/src/gocr-0.48/src/Makefile.in +0 -132
  78. data/src/gocr-0.48/src/amiga.h +0 -31
  79. data/src/gocr-0.48/src/barcode.c +0 -846
  80. data/src/gocr-0.48/src/barcode.c.orig +0 -593
  81. data/src/gocr-0.48/src/barcode.h +0 -11
  82. data/src/gocr-0.48/src/box.c +0 -372
  83. data/src/gocr-0.48/src/database.c +0 -462
  84. data/src/gocr-0.48/src/detect.c +0 -943
  85. data/src/gocr-0.48/src/gocr.c +0 -373
  86. data/src/gocr-0.48/src/gocr.h +0 -288
  87. data/src/gocr-0.48/src/jconv.c +0 -168
  88. data/src/gocr-0.48/src/job.c +0 -84
  89. data/src/gocr-0.48/src/lines.c +0 -350
  90. data/src/gocr-0.48/src/list.c +0 -334
  91. data/src/gocr-0.48/src/list.h +0 -90
  92. data/src/gocr-0.48/src/ocr0.c +0 -6756
  93. data/src/gocr-0.48/src/ocr0.h +0 -63
  94. data/src/gocr-0.48/src/ocr0n.c +0 -1475
  95. data/src/gocr-0.48/src/ocr1.c +0 -85
  96. data/src/gocr-0.48/src/ocr1.h +0 -3
  97. data/src/gocr-0.48/src/otsu.c +0 -289
  98. data/src/gocr-0.48/src/otsu.h +0 -23
  99. data/src/gocr-0.48/src/output.c +0 -289
  100. data/src/gocr-0.48/src/output.h +0 -37
  101. data/src/gocr-0.48/src/pcx.c +0 -153
  102. data/src/gocr-0.48/src/pcx.h +0 -9
  103. data/src/gocr-0.48/src/pgm2asc.c +0 -2893
  104. data/src/gocr-0.48/src/pgm2asc.h +0 -105
  105. data/src/gocr-0.48/src/pixel.c +0 -537
  106. data/src/gocr-0.48/src/pnm.c +0 -533
  107. data/src/gocr-0.48/src/pnm.h +0 -35
  108. data/src/gocr-0.48/src/progress.c +0 -87
  109. data/src/gocr-0.48/src/progress.h +0 -42
  110. data/src/gocr-0.48/src/remove.c +0 -703
  111. data/src/gocr-0.48/src/tga.c +0 -87
  112. data/src/gocr-0.48/src/tga.h +0 -6
  113. data/src/gocr-0.48/src/unicode.c +0 -1314
  114. data/src/gocr-0.48/src/unicode.h +0 -1257
  115. data/src/jpeg-7/Makefile.am +0 -133
  116. data/src/jpeg-7/Makefile.in +0 -1089
  117. data/src/jpeg-7/README +0 -322
  118. data/src/jpeg-7/aclocal.m4 +0 -8990
  119. data/src/jpeg-7/ansi2knr.1 +0 -36
  120. data/src/jpeg-7/ansi2knr.c +0 -739
  121. data/src/jpeg-7/cderror.h +0 -132
  122. data/src/jpeg-7/cdjpeg.c +0 -181
  123. data/src/jpeg-7/cdjpeg.h +0 -187
  124. data/src/jpeg-7/change.log +0 -270
  125. data/src/jpeg-7/cjpeg.1 +0 -325
  126. data/src/jpeg-7/cjpeg.c +0 -616
  127. data/src/jpeg-7/ckconfig.c +0 -402
  128. data/src/jpeg-7/coderules.txt +0 -118
  129. data/src/jpeg-7/config.guess +0 -1561
  130. data/src/jpeg-7/config.sub +0 -1686
  131. data/src/jpeg-7/configure +0 -17139
  132. data/src/jpeg-7/configure.ac +0 -317
  133. data/src/jpeg-7/depcomp +0 -630
  134. data/src/jpeg-7/djpeg.1 +0 -251
  135. data/src/jpeg-7/djpeg.c +0 -617
  136. data/src/jpeg-7/example.c +0 -433
  137. data/src/jpeg-7/filelist.txt +0 -215
  138. data/src/jpeg-7/install-sh +0 -520
  139. data/src/jpeg-7/install.txt +0 -1097
  140. data/src/jpeg-7/jaricom.c +0 -148
  141. data/src/jpeg-7/jcapimin.c +0 -282
  142. data/src/jpeg-7/jcapistd.c +0 -161
  143. data/src/jpeg-7/jcarith.c +0 -921
  144. data/src/jpeg-7/jccoefct.c +0 -453
  145. data/src/jpeg-7/jccolor.c +0 -459
  146. data/src/jpeg-7/jcdctmgr.c +0 -482
  147. data/src/jpeg-7/jchuff.c +0 -1612
  148. data/src/jpeg-7/jcinit.c +0 -65
  149. data/src/jpeg-7/jcmainct.c +0 -293
  150. data/src/jpeg-7/jcmarker.c +0 -667
  151. data/src/jpeg-7/jcmaster.c +0 -770
  152. data/src/jpeg-7/jcomapi.c +0 -106
  153. data/src/jpeg-7/jconfig.bcc +0 -48
  154. data/src/jpeg-7/jconfig.cfg +0 -45
  155. data/src/jpeg-7/jconfig.dj +0 -38
  156. data/src/jpeg-7/jconfig.mac +0 -43
  157. data/src/jpeg-7/jconfig.manx +0 -43
  158. data/src/jpeg-7/jconfig.mc6 +0 -52
  159. data/src/jpeg-7/jconfig.sas +0 -43
  160. data/src/jpeg-7/jconfig.st +0 -42
  161. data/src/jpeg-7/jconfig.txt +0 -155
  162. data/src/jpeg-7/jconfig.vc +0 -45
  163. data/src/jpeg-7/jconfig.vms +0 -37
  164. data/src/jpeg-7/jconfig.wat +0 -38
  165. data/src/jpeg-7/jcparam.c +0 -632
  166. data/src/jpeg-7/jcprepct.c +0 -358
  167. data/src/jpeg-7/jcsample.c +0 -545
  168. data/src/jpeg-7/jctrans.c +0 -381
  169. data/src/jpeg-7/jdapimin.c +0 -396
  170. data/src/jpeg-7/jdapistd.c +0 -275
  171. data/src/jpeg-7/jdarith.c +0 -762
  172. data/src/jpeg-7/jdatadst.c +0 -151
  173. data/src/jpeg-7/jdatasrc.c +0 -212
  174. data/src/jpeg-7/jdcoefct.c +0 -736
  175. data/src/jpeg-7/jdcolor.c +0 -396
  176. data/src/jpeg-7/jdct.h +0 -393
  177. data/src/jpeg-7/jddctmgr.c +0 -382
  178. data/src/jpeg-7/jdhuff.c +0 -1309
  179. data/src/jpeg-7/jdinput.c +0 -384
  180. data/src/jpeg-7/jdmainct.c +0 -512
  181. data/src/jpeg-7/jdmarker.c +0 -1360
  182. data/src/jpeg-7/jdmaster.c +0 -663
  183. data/src/jpeg-7/jdmerge.c +0 -400
  184. data/src/jpeg-7/jdpostct.c +0 -290
  185. data/src/jpeg-7/jdsample.c +0 -361
  186. data/src/jpeg-7/jdtrans.c +0 -136
  187. data/src/jpeg-7/jerror.c +0 -252
  188. data/src/jpeg-7/jerror.h +0 -304
  189. data/src/jpeg-7/jfdctflt.c +0 -174
  190. data/src/jpeg-7/jfdctfst.c +0 -230
  191. data/src/jpeg-7/jfdctint.c +0 -4348
  192. data/src/jpeg-7/jidctflt.c +0 -242
  193. data/src/jpeg-7/jidctfst.c +0 -368
  194. data/src/jpeg-7/jidctint.c +0 -5137
  195. data/src/jpeg-7/jinclude.h +0 -91
  196. data/src/jpeg-7/jmemansi.c +0 -167
  197. data/src/jpeg-7/jmemdos.c +0 -638
  198. data/src/jpeg-7/jmemdosa.asm +0 -379
  199. data/src/jpeg-7/jmemmac.c +0 -289
  200. data/src/jpeg-7/jmemmgr.c +0 -1118
  201. data/src/jpeg-7/jmemname.c +0 -276
  202. data/src/jpeg-7/jmemnobs.c +0 -109
  203. data/src/jpeg-7/jmemsys.h +0 -198
  204. data/src/jpeg-7/jmorecfg.h +0 -369
  205. data/src/jpeg-7/jpegint.h +0 -395
  206. data/src/jpeg-7/jpeglib.h +0 -1135
  207. data/src/jpeg-7/jpegtran.1 +0 -272
  208. data/src/jpeg-7/jpegtran.c +0 -546
  209. data/src/jpeg-7/jquant1.c +0 -856
  210. data/src/jpeg-7/jquant2.c +0 -1310
  211. data/src/jpeg-7/jutils.c +0 -179
  212. data/src/jpeg-7/jversion.h +0 -14
  213. data/src/jpeg-7/libjpeg.map +0 -4
  214. data/src/jpeg-7/libjpeg.txt +0 -3067
  215. data/src/jpeg-7/ltmain.sh +0 -8406
  216. data/src/jpeg-7/makcjpeg.st +0 -36
  217. data/src/jpeg-7/makdjpeg.st +0 -36
  218. data/src/jpeg-7/makeadsw.vc6 +0 -77
  219. data/src/jpeg-7/makeasln.vc9 +0 -33
  220. data/src/jpeg-7/makecdep.vc6 +0 -82
  221. data/src/jpeg-7/makecdsp.vc6 +0 -130
  222. data/src/jpeg-7/makecmak.vc6 +0 -159
  223. data/src/jpeg-7/makecvcp.vc9 +0 -186
  224. data/src/jpeg-7/makeddep.vc6 +0 -82
  225. data/src/jpeg-7/makeddsp.vc6 +0 -130
  226. data/src/jpeg-7/makedmak.vc6 +0 -159
  227. data/src/jpeg-7/makedvcp.vc9 +0 -186
  228. data/src/jpeg-7/makefile.ansi +0 -220
  229. data/src/jpeg-7/makefile.bcc +0 -291
  230. data/src/jpeg-7/makefile.dj +0 -226
  231. data/src/jpeg-7/makefile.manx +0 -220
  232. data/src/jpeg-7/makefile.mc6 +0 -255
  233. data/src/jpeg-7/makefile.mms +0 -224
  234. data/src/jpeg-7/makefile.sas +0 -258
  235. data/src/jpeg-7/makefile.unix +0 -234
  236. data/src/jpeg-7/makefile.vc +0 -217
  237. data/src/jpeg-7/makefile.vms +0 -142
  238. data/src/jpeg-7/makefile.wat +0 -239
  239. data/src/jpeg-7/makejdep.vc6 +0 -423
  240. data/src/jpeg-7/makejdsp.vc6 +0 -285
  241. data/src/jpeg-7/makejdsw.vc6 +0 -29
  242. data/src/jpeg-7/makejmak.vc6 +0 -425
  243. data/src/jpeg-7/makejsln.vc9 +0 -17
  244. data/src/jpeg-7/makejvcp.vc9 +0 -328
  245. data/src/jpeg-7/makeproj.mac +0 -213
  246. data/src/jpeg-7/makerdep.vc6 +0 -6
  247. data/src/jpeg-7/makerdsp.vc6 +0 -78
  248. data/src/jpeg-7/makermak.vc6 +0 -110
  249. data/src/jpeg-7/makervcp.vc9 +0 -133
  250. data/src/jpeg-7/maketdep.vc6 +0 -43
  251. data/src/jpeg-7/maketdsp.vc6 +0 -122
  252. data/src/jpeg-7/maketmak.vc6 +0 -131
  253. data/src/jpeg-7/maketvcp.vc9 +0 -178
  254. data/src/jpeg-7/makewdep.vc6 +0 -6
  255. data/src/jpeg-7/makewdsp.vc6 +0 -78
  256. data/src/jpeg-7/makewmak.vc6 +0 -110
  257. data/src/jpeg-7/makewvcp.vc9 +0 -133
  258. data/src/jpeg-7/makljpeg.st +0 -68
  259. data/src/jpeg-7/maktjpeg.st +0 -30
  260. data/src/jpeg-7/makvms.opt +0 -4
  261. data/src/jpeg-7/missing +0 -376
  262. data/src/jpeg-7/rdbmp.c +0 -439
  263. data/src/jpeg-7/rdcolmap.c +0 -253
  264. data/src/jpeg-7/rdgif.c +0 -38
  265. data/src/jpeg-7/rdjpgcom.1 +0 -63
  266. data/src/jpeg-7/rdjpgcom.c +0 -515
  267. data/src/jpeg-7/rdppm.c +0 -459
  268. data/src/jpeg-7/rdrle.c +0 -387
  269. data/src/jpeg-7/rdswitch.c +0 -365
  270. data/src/jpeg-7/rdtarga.c +0 -500
  271. data/src/jpeg-7/structure.txt +0 -945
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +0 -4
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +0 -1533
  279. data/src/jpeg-7/transupp.h +0 -205
  280. data/src/jpeg-7/usage.txt +0 -605
  281. data/src/jpeg-7/wizard.txt +0 -211
  282. data/src/jpeg-7/wrbmp.c +0 -442
  283. data/src/jpeg-7/wrgif.c +0 -399
  284. data/src/jpeg-7/wrjpgcom.1 +0 -103
  285. data/src/jpeg-7/wrjpgcom.c +0 -583
  286. data/src/jpeg-7/wrppm.c +0 -269
  287. data/src/jpeg-7/wrrle.c +0 -305
  288. data/src/jpeg-7/wrtarga.c +0 -253
@@ -1,11 +0,0 @@
1
- #ifndef _BARCODE_H
2
- #define _BARCODE_H
3
- #include "pnm.h"
4
-
5
- /*
6
- detect barcode and add a string to the box (obj-pointer)
7
- */
8
-
9
- int detect_barcode(job_t *job);
10
-
11
- #endif
@@ -1,372 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2009 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL address
20
-
21
- */
22
-
23
- #include <stdio.h>
24
- #include <stdlib.h>
25
- #include <assert.h>
26
- #include <string.h>
27
- /* do we need #include <math.h>? conflicts with INFINITY in unicode.h */
28
- #include "gocr.h"
29
- #include "pgm2asc.h"
30
-
31
- /* for sorting letters by position on the image
32
- / ToDo: - use function same line like this or include lines.m1 etc. */
33
- int box_gt(struct box *box1, struct box *box2) {
34
- // box1 after box2 ?
35
- if (box1->line > box2->line)
36
- return 1;
37
- if (box1->line < box2->line)
38
- return 0;
39
- if (box1->x0 > box2->x1) // before
40
- return 1;
41
- if (box1->x1 < box2->x0) // before
42
- return 0;
43
- if (box1->x0 > box2->x0) // before, overlapping!
44
- return 1;
45
-
46
- return 0;
47
- }
48
-
49
- /* --- copy part of pix p into new pix b ---- len=10000
50
- * Returns: 0 on success, 1 on error.
51
- * naming it as copybox isnt very clever, because it dont have to do with the
52
- * char boxes (struct box)
53
- */
54
- int copybox (pix * p, int x0, int y0, int dx, int dy, pix * b, int len) {
55
- int x, y;
56
-
57
- /* test boundaries */
58
- if (b->p == NULL || dx < 0 || dy < 0 || dx * dy > len) {
59
- fprintf(stderr, " error-copybox x=%5d %5d d=%5d %5d\n", x0, y0, dx, dy);
60
- return 1;
61
- }
62
-
63
- b->x = dx;
64
- b->y = dy;
65
- b->bpp = 1;
66
- #ifdef FASTER_INCOMPLETE
67
- for (y = 0; y < dy; y++)
68
- memcpy(&pixel_atp(b, 0, y), &pixel_atp(p, x0, y + y0 ), dx);
69
- // and unmark pixels
70
- #else
71
- for (y = 0; y < dy; y++)
72
- for (x = 0; x < dx; x++)
73
- pixel_atp(b, x, y) = getpixel(p, x + x0, y + y0);
74
- #endif
75
-
76
- return 0;
77
- }
78
-
79
- /* reset table of alternative chars (and free memory) */
80
- int reset_box_ac(struct box *box){
81
- int i;
82
- for (i=0; i<box->num_ac; i++)
83
- if (box->tas[i]) {
84
- /* fprintf(stderr,"DBG free_s[%d] %p %s\n",i,box->tas[i],box->tas[i]); */
85
- free(box->tas[i]);
86
- box->tas[i]=0; /* prevent double freeing */
87
- }
88
- box->num_ac=0; /* mark as freed */
89
- return 0;
90
- }
91
-
92
- /* ini or copy a box: get memory for box and initialize the memory */
93
- struct box *malloc_box (struct box *inibox) {
94
- struct box *buf;
95
- int i;
96
-
97
- buf = (struct box *) malloc(sizeof(struct box));
98
- if (!buf)
99
- return NULL;
100
- if (inibox) {
101
- memcpy(buf, inibox, sizeof(struct box));
102
- /* only pointer are copied, we want to copy the contents too */
103
- for (i=0;i<inibox->num_ac;i++) {
104
- if (inibox->tas[i]) {
105
- buf->tas[i]=(char *)malloc(strlen(inibox->tas[i])+1);
106
- memcpy(buf->tas[i], inibox->tas[i], strlen(inibox->tas[i])+1);
107
- }
108
- }
109
- }
110
- else { /* ToDo: init it */
111
- buf->num_ac=0;
112
- buf->num_frames=0;
113
- }
114
- /* fprintf(stderr,"\nDBG ini_box %p",buf); */
115
- return buf;
116
- }
117
-
118
- /* free memory of box */
119
- int free_box (struct box *box) {
120
- if (!box) return 0;
121
- /* fprintf(stderr,"DBG free_box %p\n",box); out_x(box); */
122
- reset_box_ac(box); /* free alternative char table */
123
- free(box); /* free the box memory */
124
- return 0;
125
- }
126
-
127
- /* simplify the vectorgraph,
128
- * but what is the best way?
129
- * a) melting two neighbouring vectors with nearly same direction?
130
- * (nearest angle to pi)
131
- * b) melting three neigbours with smallest area?
132
- * ToDo:
133
- * mode = 0 - only lossless
134
- * mode = 1 - reduce one vector, smallest possible loss
135
- * mode = 2 - remove jitter (todo, or somewhere else)
136
- * ToDo: include also loop around (last - first element)
137
- * ToDo: reduce by 10..50%
138
- */
139
- int reduce_vectors ( struct box *box1, int mode ) {
140
- int i1, i2, nx, ny, mx, my, len,
141
- minlen=1024, /* minlength of to neighbouring vectors */
142
- besti1=0, /* frame for best reduction */
143
- besti2=2; /* vector replacing its predecessor */
144
- double sprod, maxsprod=-1;
145
- if (mode!=1) fprintf(stderr,"ERR not supported yet, ToDo\n");
146
- for (i2=1,i1=0; i1<box1->num_frames; i1++) { /* every frame */
147
- for (;i2<box1->num_frame_vectors[i1]-1; i2++) { /* every vector */
148
- /* predecessor n */
149
- nx = box1->frame_vector[i2-0][0] - box1->frame_vector[i2-1][0];
150
- ny = box1->frame_vector[i2-0][1] - box1->frame_vector[i2-1][1];
151
- /* successor m */
152
- mx = box1->frame_vector[i2+1][0] - box1->frame_vector[i2-0][0];
153
- my = box1->frame_vector[i2+1][1] - box1->frame_vector[i2-0][1];
154
- /* angle is w = a*b/(|a|*|b|) = 1 means parallel */
155
- /* normalized: minimize w^2 = (a*b/(|a|*|b|)-1)^2 */
156
- /* -1=90grd, 0=0grd, -2=180grd */
157
- sprod = /* fabs */(abs(nx*mx+ny*my)*(nx*mx+ny*my)
158
- /(1.*(nx*nx+ny*ny)*(mx*mx+my*my))-1);
159
- /* we dont include math.h because INFINITY conflicts to unicode,h */
160
- if (sprod<0) sprod=-sprod;
161
- len = (mx*mx+my*my)*(nx*nx+ny*ny); /* sum lengths^2 */
162
- // ..c ###c ... .. ...
163
- // .b. len=2+2 #b.. len=2+5 #bc len=1+2 bc len=1+1 b#a len=4+5
164
- // a.. spr=0 a... spr=1/10 a.. spr=1/4 a. spr=1 ##c spr=9/5
165
- //
166
- if ( len* sprod* sprod* sprod* sprod
167
- <minlen*maxsprod*maxsprod*maxsprod*maxsprod
168
- || maxsprod<0) /* Bad! ToDo! */
169
- { maxsprod=sprod; besti1=i1; besti2=i2; minlen=len; }
170
- }
171
- }
172
- if (box1->num_frames>0)
173
- for (i2=besti2; i2<box1->num_frame_vectors[ box1->num_frames-1 ]-1; i2++) {
174
- box1->frame_vector[i2][0]=box1->frame_vector[i2+1][0];
175
- box1->frame_vector[i2][1]=box1->frame_vector[i2+1][1];
176
- }
177
- for (i1=besti1; i1<box1->num_frames; i1++)
178
- box1->num_frame_vectors[i1]--;
179
- // fprintf(stderr,"\nDBG_reduce_vectors i= %d nv= %d sprod=%f len2=%d\n# ...",
180
- // besti2,box1->num_frame_vectors[ box1->num_frames-1 ],maxsprod,minlen);
181
- // out_x(box1);
182
- return 0;
183
- }
184
-
185
- /* add the contents of box2 to box1
186
- * especially add vectors of box2 to box1
187
- */
188
- int merge_boxes( struct box *box1, struct box *box2 ) {
189
- int i1, i2, i3, i4;
190
- struct box tmpbox, *bsmaller, *bbigger; /* for mixing and sorting */
191
- /* DEBUG, use valgrind to check uninitialized memory */
192
- #if 0
193
- fprintf(stderr,"\nDBG merge_boxes_input:"); out_x(box1); out_x(box2);
194
- #endif
195
- /* pair distance is to expendable, taking borders is easier */
196
- if ((box2->x1 - box2->x0)*(box2->y1 - box2->y0)
197
- >(box1->x1 - box1->x0)*(box1->y1 - box1->y0)) {
198
- bbigger=box2; bsmaller=box1; }
199
- else {
200
- bbigger=box1; bsmaller=box2; }
201
- /* ToDo: does not work if a third box is added */
202
- if (box2->y0>box1->y1 || box2->y1<box1->y0
203
- || box2->x0>box1->x1 || box2->x1<box1->x0) {
204
- box1->num_boxes += box2->num_boxes; /* num seperate objects 2=ij */
205
- } else {
206
- if (box2->num_boxes>box1->num_boxes) box1->num_boxes=box2->num_boxes;
207
- box1->num_subboxes += box2->num_subboxes+1; /* num holes 1=abdepq 2=B */
208
- }
209
- box1->dots += box2->dots; /* num i-dots */
210
- if ( box2->x0 < box1->x0 ) box1->x0 = box2->x0;
211
- if ( box2->x1 > box1->x1 ) box1->x1 = box2->x1;
212
- if ( box2->y0 < box1->y0 ) box1->y0 = box2->y0;
213
- if ( box2->y1 > box1->y1 ) box1->y1 = box2->y1;
214
- i1 = i2 = 0;
215
- if (bbigger->num_frames)
216
- i1 = bbigger->num_frame_vectors[ bbigger->num_frames - 1 ];
217
- if (bsmaller->num_frames)
218
- i2 = bsmaller->num_frame_vectors[ bsmaller->num_frames - 1 ];
219
- while (i1+i2 > MaxFrameVectors) {
220
- if (i1>i2) { reduce_vectors( bbigger, 1 ); i1--; }
221
- else { reduce_vectors( bsmaller, 1 ); i2--; }
222
- }
223
- /* if i1+i2>MaxFrameVectors simplify the vectorgraph */
224
- /* if sum num_frames>MaxNumFrames through shortest graph away and warn */
225
- /* first copy the bigger box */
226
- memcpy(&tmpbox, bbigger, sizeof(struct box));
227
- /* attach the smaller box */
228
- for (i4=i3=0; i3<bsmaller->num_frames; i3++) {
229
- if (tmpbox.num_frames>=MaxNumFrames) break;
230
-
231
- for (; i4<bsmaller->num_frame_vectors[i3]; i4++) {
232
- memcpy(tmpbox.frame_vector[i1],
233
- bsmaller->frame_vector[i4],2*sizeof(int));
234
- i1++;
235
- }
236
- tmpbox.num_frame_vectors[ tmpbox.num_frames ] = i1;
237
- tmpbox.frame_vol[ tmpbox.num_frames ] = bsmaller->frame_vol[ i3 ];
238
- tmpbox.frame_per[ tmpbox.num_frames ] = bsmaller->frame_per[ i3 ];
239
- tmpbox.num_frames++;
240
- if (tmpbox.num_frames>=MaxNumFrames) {
241
- if (JOB->cfg.verbose)
242
- fprintf(stderr,"\nDBG merge_boxes MaxNumFrames reached");
243
- break;
244
- }
245
- }
246
- /* copy tmpbox to destination */
247
- box1->num_frames = tmpbox.num_frames;
248
- memcpy(box1->num_frame_vectors,
249
- tmpbox.num_frame_vectors,sizeof(int)*MaxNumFrames);
250
- memcpy(box1->frame_vol,
251
- tmpbox.frame_vol,sizeof(int)*MaxNumFrames);
252
- memcpy(box1->frame_per,
253
- tmpbox.frame_per,sizeof(int)*MaxNumFrames);
254
- memcpy(box1->frame_vector,
255
- tmpbox.frame_vector,sizeof(int)*2*MaxFrameVectors);
256
- #if 0
257
- if (JOB->cfg.verbose)
258
- fprintf(stderr,"\nDBG merge_boxes_result:"); out_x(box1);
259
- #endif
260
- return 0;
261
- }
262
-
263
- /* used for division of glued chars
264
- * after a box is splitted into 2, where vectors are copied to both,
265
- * vectors outside the new box are cutted and thrown away,
266
- * later replaced by
267
- * - 1st remove outside vectors with outside neighbours (complete frames?)
268
- * add vector on outside vector with inside neighbours
269
- * care about connections through box between outside vectors
270
- * - 2nd reduce outside crossings (inclusive splitting frames if necessary)
271
- * depending on direction (rotation) of outside connections
272
- * - 3th shift outside vectors to crossing points
273
- * - split add this points, connect only in-out...out-in,
274
- * - cutting can result in more objects
275
- * ToDo:
276
- * dont connect --1---2--------3----4-- new-y1 (inside above not drawn)
277
- * \ \->>>>-/ / outside
278
- * \----<<<<-----/ old-y1
279
- * |======| subtractable?
280
- *
281
- * only connect --1---2--------3----4-- new-y1
282
- * \>>/ \>>>/ old-y1 outside
283
- * ToDo: what about cutting 2 frames (example: 2fold melted MN)
284
- * better restart framing algo?
285
- *
286
- * ToDo: new vol, per
287
- */
288
- int cut_box( struct box *box1) {
289
- int i1, i2, i3, i4, x, y, lx, ly, dbg=0;
290
- if (JOB->cfg.verbose) dbg=1; // debug level, enlarge to get more output
291
- if (dbg) fprintf(stderr,"\n cut box x= %3d %3d", box1->x0, box1->y0);
292
- /* check if complete frames are outside the box */
293
- for (i1=0; i1<box1->num_frames; i1++){
294
- if (dbg>2) fprintf(stderr,"\n checking frame %d outside", i1);
295
- i2 = ((i1)?box1->num_frame_vectors[ i1-1 ]:0); // this frame
296
- i3 = box1->num_frame_vectors[ i1 ]; // next frame
297
- for (i4=i2; i4 < i3; i4++) {
298
- x = box1->frame_vector[i4][0];
299
- y = box1->frame_vector[i4][1];
300
- /* break, if one vector is lying inside */
301
- if (x>=box1->x0 && x<=box1->x1 && y>=box1->y0 && y<=box1->y1) break;
302
- }
303
- if (i4==i3) { /* all vectors outside */
304
- if (dbg>1) fprintf(stderr,"\n remove frame %d",i1);
305
- /* replace all frames i1,i1+1,... by i1+1,i1+2,... */
306
- /* replace (x,y) pairs first */
307
- for (i4=i2; i4<box1->num_frame_vectors[ box1->num_frames-1 ]-(i3-i2);
308
- i4++) {
309
- box1->frame_vector[i4][0] = box1->frame_vector[i4+i3-i2][0];
310
- box1->frame_vector[i4][1] = box1->frame_vector[i4+i3-i2][1];
311
- }
312
- /* replace the num_frame_vectors */
313
- for (i4=i1; i4<box1->num_frames-1; i4++)
314
- box1->num_frame_vectors[ i4 ] =
315
- box1->num_frame_vectors[ i4+1 ]-(i3-i2);
316
- box1->num_frames--; i1--;
317
- }
318
- }
319
- /* remove vectors outside the box */
320
- i3=0;
321
- for (i1=0; i1<box1->num_frames; i1++){
322
- if (dbg>2) fprintf(stderr,"\n check cutting vectors on frame %d", i1);
323
- x = box1->frame_vector[0][0]; /* last x */
324
- y = box1->frame_vector[0][1]; /* last y */
325
- /* ToDo: start inside to get a closed object */
326
- if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) i3=1;
327
- for (i2=0; i2<box1->num_frame_vectors[ i1 ]; i2++) {
328
- lx = x; /* last x */
329
- ly = y; /* last y */
330
- x = box1->frame_vector[i2][0];
331
- y = box1->frame_vector[i2][1];
332
- // fprintf(stderr,"DBG LEV3 i2= %3d xy= %3d %3d",i2,x,y);
333
- /* check if outside */
334
- if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) {
335
- /* replace by nearest point at border, ToDo: better crossingpoint */
336
- if (i3==0) { /* wrong if it starts outside */
337
- if (x < box1->x0) x = box1->frame_vector[i2][0] = box1->x0;
338
- if (x > box1->x1) x = box1->frame_vector[i2][0] = box1->x1;
339
- if (y < box1->y0) y = box1->frame_vector[i2][1] = box1->y0;
340
- if (y > box1->y1) y = box1->frame_vector[i2][1] = box1->y1;
341
- } else {
342
- /* remove vector */
343
- if (dbg>1) fprintf(stderr,"\n remove vector[%d][%d] x= %2d %2d",i1,i2,x-box1->x0,y-box1->y0);
344
- for (i4=i2;i4<box1->num_frame_vectors[ box1->num_frames-1 ]-1;i4++) {
345
- box1->frame_vector[i4][0] = box1->frame_vector[i4+1][0];
346
- box1->frame_vector[i4][1] = box1->frame_vector[i4+1][1];
347
- }
348
- for (i4=i1; i4<box1->num_frames; i4++)
349
- box1->num_frame_vectors[ i4 ]--;
350
- i2--; /* next element is shiftet now, setting back the counter */
351
- }
352
- i3++;
353
- // fprintf(stderr," outside i3= %d\n",i3);
354
- continue;
355
- }
356
- // fprintf(stderr," inside i3= %d",i3);
357
- if (i3) { /* ToDo: better crossing point last vector and border */
358
- if (lx < box1->x0) lx = box1->x0;
359
- if (lx > box1->x1) lx = box1->x1;
360
- if (ly < box1->y0) ly = box1->y0;
361
- if (ly > box1->y1) ly = box1->y1;
362
- x = box1->frame_vector[i2][0] = lx;
363
- y = box1->frame_vector[i2][1] = ly;
364
- i3 = 0;
365
- }
366
- // fprintf(stderr," xy= %3d %3d\n",x,y);
367
- }
368
- }
369
- if (dbg>2) { fprintf(stderr,"\nDBG cut_box_result:"); out_x(box1); }
370
- return 0;
371
- }
372
-
@@ -1,462 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2009 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL address
20
- */
21
-
22
- #include <stdio.h>
23
- #include <stdlib.h>
24
- #include "gocr.h"
25
- #include "pnm.h"
26
- #include "pgm2asc.h"
27
- #include <string.h>
28
- #include <time.h>
29
-
30
- #define Blen 256
31
-
32
- // load boxes from database into boxlist (for faster access)
33
- // used as alternate engine, comparing chars with database
34
- int load_db(void) {
35
- FILE *f1;
36
- char s1[Blen+1],
37
- s2[Blen+1] = "./db/", /* ToDo: replace by constant! by configure */
38
- *s3;
39
- int i, j, ii, i2, line;
40
- struct box *box1;
41
- pix *pp;
42
-
43
- if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
44
- i2=strlen(s2);
45
- if (JOB->cfg.verbose)
46
- fprintf(stderr, "# load database %s %s ... ",s2,JOB->cfg.db_path);
47
-
48
- strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
49
- f1 = fopen(s2, "r");
50
- if (!f1) {
51
- fprintf(stderr, " DB %s not found\n",s2);
52
- return 1;
53
- }
54
-
55
- line = 0; /* line counter for better error report */
56
- for (ii = 0; !feof(f1); ii++) {
57
- /* bbg: should write a better input routine */
58
- if (!fgets(s1, Blen, f1)) break; line++;
59
- j = strlen(s1);
60
- /* remove carriage return sequences from line */
61
- while (j > 0 && (s1[j - 1] == '\r' || s1[j - 1] == '\n'))
62
- s1[--j] = 0;
63
- if (!j) continue; /* skip empty line */
64
- if (s1[0]=='#') continue; /* skip comments (v0.44) */
65
- /* copy file name */
66
- for (i = 0; i < j && i+i2 < Blen && strchr(" \t,;",s1[i]) == 0; i++)
67
- s2[i2 + i] = s1[i];
68
- s2[i2+i]=0;
69
- /* skip spaces */
70
- for (; i < j && strchr(" \t",s1[i]) != 0; i++);
71
- /* by now: read pix, fill box, goto next ??? */
72
- pp = (pix *)malloc(sizeof(pix));
73
- if( !pp ) fprintf(stderr,"malloc error in load_db pix\n");
74
-
75
- // if (JOB->cfg.verbose) fprintf(stderr,"\n# readpgm %s ",s2);
76
- if (readpgm(s2, pp, 0 * JOB->cfg.verbose)!=0) {
77
- fprintf(stderr,"\ndatabase error: readpgm %s\n", s2);
78
- exit(-1);
79
- }
80
-
81
- box1 = (struct box *)malloc_box(NULL);
82
- if(!box1) fprintf(stderr,"malloc error in load_db box1\n");
83
- box1->x0 = 0;
84
- box1->x1 = pp->x-1; // white border 1 pixel width
85
- box1->y0 = 0;
86
- box1->y1 = pp->y-1;
87
- box1->x = 1;
88
- box1->y = 1;
89
- box1->dots = 0;
90
- box1->c = 0;
91
- box1->modifier = 0; /* ToDo: obsolete */
92
- box1->tas[0]=NULL;
93
- box1->tac[0]=0;
94
- box1->wac[0]=100; /* really 100% sure? */
95
- box1->num_ac=1;
96
- if (s1[i]=='"'){ /* parse a string */
97
- j=strrchr(s1+i+1,'"')-(s1+i+1); /* we only look for first and last "" */
98
- if (j>=1) {
99
- s3=(char *)malloc(j+1);
100
- if (!s3) fprintf (stderr, "malloc error in load_db s3\n");
101
- if (s3) {
102
- memcpy(s3,s1+i+1,j);
103
- s3[j]=0;
104
- box1->tas[0]=s3;
105
- // fprintf(stderr,"\nstring=%s",s3);
106
- }
107
- } else { fprintf(stderr,"load_db: string parse error L%d\n",line); }
108
- } else {
109
- box1->tac[0] = box1->c = s1[i]; /* try to interpret as ASCII */
110
- /* we can live without hexcode in future if we use UTF8-strings */
111
- s3=s1+i;
112
- j=strtol( s1+i, &s3, 16); /* try to read 4 to 8 digit hex unicode */
113
- /* if its an hexcode, ASCII interpretation is overwritten */
114
- if( j && i+3<=Blen && s3-s1-i>3 ) box1->tac[0] = box1->c = j;
115
- // fprintf(stderr,"\nhexcode=%04x=%04x %d",(int)j,(int)box1->c,s3-s1-i);
116
- }
117
- box1->num = 0;
118
- box1->line = -1;
119
- box1->m1 = 0; /* ToDo: should be given too in the database! */
120
- box1->m2 = 0;
121
- box1->m3 = 0;
122
- box1->m4 = 0;
123
- box1->p = pp;
124
- list_app(&JOB->tmp.dblist, box1); // append to list
125
- #if 0
126
- out_x(box1);
127
- #endif
128
- }
129
- fclose(f1);
130
- if (JOB->cfg.verbose)
131
- fprintf(stderr, " %d chars loaded\n", ii);
132
- return 0;
133
- }
134
-
135
- // expand database from box/boxlist name=db_$utime.pbm
136
- // this is added in version v0.3.3
137
- int store_db(struct box *box1) {
138
- FILE *f1;
139
- char s2[Blen+1] = "./db/", s3[Blen+1];
140
- int i2, dx, dy;
141
- unsigned c_out;
142
- pix b; /* temporary mini page */
143
-
144
- if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
145
- i2=strlen(s2);
146
-
147
- /* add (first) char and time to the file name for better debugging */
148
-
149
- /* decide between 7bit ASCII and UTF8-char or string */
150
- c_out = ((box1->num_ac && box1->tas[0]) ?
151
- (unsigned char )box1->tas[0][0] /* char */ :
152
- box1->c /* wchar */);
153
- /* (unsigned int)(( char)0x80) = 0xffffff80 */
154
- /* (unsigned int)((unsigned char)0x80) = 0x00000080 */
155
-
156
- /* name generation can cause problems, if called twice within a second */
157
- sprintf(s3,"db_%04x_%08lx.pbm", c_out, (unsigned long)time(NULL));
158
- /* ToDo: the file name may be not unique */
159
-
160
- strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
161
- f1 = fopen(s2, "a");
162
- if (!f1) {
163
- fprintf(stderr, " could not access %s\n",s2);
164
- return 1;
165
- }
166
- strncpy(s2+i2,s3,strlen(s3)); s2[i2+strlen(s3)]=0;
167
- /* store image and infos about the char */
168
- /* ToDo: store the vector list instead of the pixelarray */
169
-
170
- if (JOB->cfg.verbose)
171
- fprintf(stderr, "store_db: add file %s to database (nac=%d c=%04x)"
172
- "\n#",s3, box1->num_ac, c_out);
173
-
174
- dx=box1->x1-box1->x0+1;
175
- dy=box1->y1-box1->y0+1;
176
- b.p = (unsigned char *) malloc( dx * dy );
177
- if( !b.p ){
178
- fprintf( stderr, "\nFATAL: malloc failed, skip store_db" );
179
- return 2;
180
- }
181
- if (copybox(box1->p, box1->x0, box1->y0, dx, dy, &b, dx * dy))
182
- return -1;
183
-
184
- writepbm(s2,&b); /* What is to do on error? */
185
- free(b.p);
186
-
187
- /* store the database line */
188
- /* some infos about box1->m1,..,m4 should added (base line, high etc.) */
189
- if (box1->num_ac && box1->tas[0]) {
190
- fprintf(f1, "%s \"%s\"\n",s3,box1->tas[0]);
191
- /* ToDo: what if tas contains '"'? */
192
- } else {
193
- if( (box1->c >= '0' && box1->c <= '9')
194
- || (box1->c >= 'A' && box1->c <= 'Z')
195
- || (box1->c >= 'a' && box1->c <= 'z') )
196
- fprintf(f1, "%s %c\n",s3,(char)box1->c);
197
- else {
198
- if (((box1->c)>>16)>>16)
199
- fprintf(f1, "%s %08x\n",s3,(unsigned int)box1->c);
200
- else
201
- fprintf(f1, "%s %04x\n",s3,(unsigned int)box1->c);
202
- }
203
- }
204
- fclose(f1);
205
- return 0;
206
- }
207
-
208
- /* function is only for user prompt on console to identify chars
209
- it prints out a part of pixmap b at point x0,y0 to stderr
210
- using dots .,; if no pixel, and @xoO for pixels
211
- */
212
- void out_env(struct box *px ){
213
- int x0,y0,x1,y1,dx,dy,x,y,x2,y2,yy0,tx,ty,i,cs;
214
- char c1, c2; pix *b;
215
- cs=JOB->cfg.cs;
216
- yy0=px->y0;
217
- { /* overwrite rest of arguments */
218
- b=px->p;
219
- x0=px->x0; x1=px->x1; dx=x1-x0+1;
220
- y0=px->y0; y1=px->y1; dy=y1-y0+1;
221
- y0-=2; y1+=2;
222
- if (px->m4 && y0>px->m1) y0=px->m1;
223
- if (px->m4 && y1<px->m4) y1=px->m4;
224
- if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
225
- if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
226
- if (x1-x0+1<62) { x0-=5; x1+=5; }
227
- if (y1-y0+1<10) { y0-= 4; y1+= 4; } /* fragment? */
228
- if (x0<0) x0=0; if (x1>=b->x) x1=b->x-1;
229
- if (y0<0) y0=0; if (y1>=b->y) y1=b->y-1;
230
- dx=x1-x0+1;
231
- dy=y1-y0+1; yy0=y0;
232
- fprintf(stderr,"\n# show box + environment");
233
- fprintf(stderr,"\n# show box x= %4d %4d d= %3d %3d r= %d %d",
234
- px->x0, px->y0, px->x1 - px->x0 + 1, px->y1 - px->y0 + 1,
235
- px->x - px->x0, px->y - px->y0);
236
- if (px->num_ac){ /* output table of chars and its probabilities */
237
- fprintf(stderr,"\n# list box char: ");
238
- for(i=0;i<px->num_ac && i<NumAlt;i++)
239
- /* output the (xml-)string (picture position, barcodes, glyphs, ...) */
240
- if (px->tas[i])
241
- fprintf(stderr," %s(%d)", px->tas[i] ,px->wac[i]);
242
- else
243
- fprintf(stderr," %s(%d)",decode(px->tac[i],ASCII),px->wac[i]);
244
- }
245
- fprintf(stderr,"\n");
246
- if (px->dots && px->m2 && px->m1<y0) { yy0=px->m1; dy=px->y1-yy0+1; }
247
- }
248
- tx=dx/80+1;
249
- ty=dy/40+1; // step, usually 1, but greater on large maps
250
- fprintf(stderr,"# show pattern x= %4d %4d d= %3d %3d t= %d %d\n",
251
- x0,y0,dx,dy,tx,ty);
252
- if (dx>0)
253
- for(y=yy0;y<yy0+dy;y+=ty) { /* reduce the output to max 78x40 */
254
-
255
- /* image is the boxframe + environment in the original bitmap */
256
- for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
257
- c1='.';
258
- for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
259
- for(x2=x;x2<x+tx && x2<x0+dx;x2++)
260
- { if((getpixel(b,x2,y2)<cs)) c1='#'; }
261
- // show pixels outside the box thinner/weaker
262
- if (x+tx-1 < px->x0 || x > px->x1
263
- || y+ty-1 < px->y0 || y > px->y1) c1=((c1=='#')?'O':',');
264
- fprintf(stderr,"%c", c1 );
265
- }
266
-
267
- c1=c2=' ';
268
- /* mark lines with < */
269
- if (px) if (y==px->m1 || y==px->m2 || y==px->m3 || y==px->m4) c1='<';
270
- if (y==px->y0 || y==px->y1) c2='-'; /* boxmarks */
271
- fprintf(stderr,"%c%c\n",c1,c2);
272
- }
273
- }
274
-
275
-
276
- /*
277
- // second variant, for database (with slightly other behaviour)
278
- // new variant
279
- // look at the environment of the pixel too (contrast etc.)
280
- // detailed analysis only of diff pixels!
281
- //
282
- // 100% * distance, 0 is best fit
283
- // = similarity of 2 chars for recognition of noisy chars
284
- // weigth of pixels with only one same neighbour set to 0
285
- // look at contours too!
286
- ToDo: especially on small boxes distance should only be 0 if
287
- characters are 100% identical!
288
- */
289
- // #define DEBUG 2
290
- int distance2( pix *p1, struct box *box1,
291
- pix *p2, struct box *box2, int cs){
292
- int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,
293
- x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2,tx,ty;
294
- #if DEBUG == 2
295
- if(JOB->cfg.verbose)
296
- fprintf(stderr," DEBUG: distance2\n");
297
- #endif
298
- x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
299
- dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);dx=dx1;
300
- dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);dy=dy1;
301
- if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) rbad++; // how to weight?
302
- // compare relations to baseline and upper line
303
- if(box1->m4>0 && box2->m4>0){ // used ???
304
- if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
305
- if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
306
- }
307
- tx=dx/16; if(dx<17)tx=1; // raster
308
- ty=dy/32; if(dy<33)ty=1;
309
- // compare pixels
310
- for( y=0;y<dy;y+=ty )
311
- for( x=0;x<dx;x+=tx ) { // try global shift too ???
312
- v1=((getpixel(p1,x1+x*dx1/dx,y1+y*dy1/dy)<cs)?1:0); i1=8; // better gray?
313
- v2=((getpixel(p2,x2+x*dx2/dx,y2+y*dy2/dy)<cs)?1:0); i2=8; // better gray?
314
- if(v1==v2) { rgood+=16; continue; } // all things are right!
315
- // what about different pixel???
316
- // test overlapp of surounding pixels ???
317
- v1=1; rbad+=4;
318
- v1=-1;
319
- for(i1=-1;i1<2;i1++)
320
- for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
321
- if( ((getpixel(p1,x1+x*dx1/dx+i1*(1+dx1/32),y1+y*dy1/dy+i2*(1+dy1/32))<cs)?1:0)
322
- !=((getpixel(p2,x2+x*dx2/dx+i1*(1+dx2/32),y2+y*dy2/dy+i2*(1+dy2/32))<cs)?1:0) ) v1++;
323
- }
324
- if(v1>0)
325
- rbad+=16*v1;
326
- }
327
- if(rgood+rbad) rc= 100*rbad/(rgood+rbad); else rc=99;
328
- /* if width/high is not correct add badness */
329
- rc += ( abs(dx1*dy2-dx2*dy1) * 10 ) / (dy1*dy2);
330
- if (rc>100) rc=100;
331
- if(/* rc<10 && */ JOB->cfg.verbose /* &1024 */){
332
- #if DEBUG == 2
333
- fprintf(stderr," distance2 rc=%d rgood=%d rbad=%d\n",rc,rgood,rbad);
334
- // out_b(NULL,p1,box1->x0,box1->y0,box1->x1-box1->x0+1,
335
- // box1->y1-box1->y0+1,cs);
336
- // out_b(NULL,p2,box2->x0,box2->y0,box2->x1-box2->x0+1,
337
- // box2->y1-box2->y0+1,cs);
338
- out_x(box1);
339
- out_x(box2);
340
- #endif
341
- }
342
- return rc;
343
- }
344
-
345
- wchar_t ocr_db(struct box *box1) {
346
- int dd = 1000, dist = 1000;
347
- wchar_t c = UNKNOWN;
348
- unsigned char buf[200]; /* Oct08 JS: add unsigned to avoid UTF problems */
349
- Box *box2, *box3;
350
-
351
- if (!list_empty(&JOB->tmp.dblist)){
352
- box3 = (Box *)list_get_header(&JOB->tmp.dblist);
353
- if(JOB->cfg.verbose)
354
- fprintf(stderr,"\n#DEBUG: ocr_db (%d,%d) ",box1->x0, box1->y0);
355
-
356
- for_each_data(&JOB->tmp.dblist) {
357
- box2 = (Box *)list_get_current(&JOB->tmp.dblist);
358
- /* do preselect!!! distance() slowly */
359
- dd = distance2( box2->p, box2, box1->p, box1, JOB->cfg.cs);
360
- if (dd <= dist) { /* new best fit */
361
- dist = dd;
362
- box3 = box2; /* box3 is a pointer and not copied box2 */
363
-
364
- if (dist<100 && 100-dist >= JOB->cfg.certainty) {
365
- /* some deviation of the pattern is tolerated */
366
- int i, wa;
367
- for (i=0;i<box3->num_ac;i++) {
368
- wa = (100-dist)*box3->wac[i]/100; /* weight *= (100-dist) */
369
- if (box3->tas[i]) setas(box1,box3->tas[i],wa);
370
- else setac(box1,box3->tac[i],wa);
371
- }
372
- if (box3->num_ac) c=box3->tac[0]; /* 0 for strings (!UNKNOWN) */
373
- if (JOB->cfg.verbose)
374
- fprintf(stderr, " dist=%4d c= %c 0x%02x %s wc= %3d", dist,
375
- ((box3->c>32 && box3->c<127) ? (char) box3->c : '.'),
376
- (int)box3->c, ((box3->tas[0])?box3->tas[0]:""), box3->wac[0]);
377
- }
378
- if (dd<=0 && ((box3->num_ac && box3->tas[0]) || box3->c >= 128
379
- || !strchr ("l1|I0O", box3->c)))
380
- break; /* speedup if found */
381
- }
382
- } end_for_each(&JOB->tmp.dblist);
383
-
384
- }
385
-
386
- if( (JOB->cfg.mode&128) != 0 && c == UNKNOWN ) { /* prompt the user */
387
- /* should the output go to stderr or special pipe??? */
388
- int utf8_ok=0; /* trigger this flag if input is ok */
389
- int i, endchar; /* index */
390
- out_env(box1); /* old: out_x(box1); */
391
- fprintf(stderr,"The above pattern was not recognized.\n"
392
- "Enter UTF8 char or string for above pattern. Leave empty if unsure.\n"
393
- "Press RET at the end (ALT+RET to store into RAM only) : "
394
- ); /* ToDo: empty + alt-return (0x1b 0x0a) for help? ^a for skip all */
395
- /* UTF-8 (man 7 utf-8):
396
- * 7bit = 0xxxxxxx (0000-007F)
397
- * 11bit = 110xxxxx 10xxxxxx (0080-07FF)
398
- * 16bit = 1110xxxx 10xxxxxx 10xxxxxx (0800-FFFF)
399
- * 21bit = 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
400
- * 26bit = 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
401
- * 31bit = 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
402
- */
403
- buf[0]=0;
404
- /* shift/ctrl/altgr-enter acts like enter or ^j or ^m,
405
- * alt-enter returns 0x1b 0x0a and returns from fgets()
406
- * ^d (EOF) returns (nil) from fgets()
407
- * x+(2*)ctrl-d returns from fgets() without returning a 0x0a
408
- * if not UTF-input-mode, we are in trouble?
409
- * ^a=0x01, ^b=0x02, ^e=05, ..., ToDo: meaning of no-input or <=space
410
- */
411
- fgets((char *)buf,200,stdin); /* including \n=0x0a */
412
- dd=strlen((char *)buf);
413
- /* output hexcode if verbose set */
414
- if (JOB->cfg.verbose) {
415
- fprintf(stderr, "\n# fgets [%d]:", dd);
416
- for(i=0; i<dd; i++)
417
- fprintf(stderr, " %02x", (unsigned)((unsigned char)buf[i]));
418
- fprintf(stderr, "\n#");
419
- }
420
- /* we dont accept chars which could destroy database file */
421
- for (i=0; i<dd; i++) if (buf[i]<32) break; /* need unsigned char here */
422
- endchar=buf[i]; /* last char is 0x0a (ret) 0x00 (EOF) or 0x1b (alt+ret) */
423
- if (endchar==0x01) { i=0;JOB->cfg.mode&=~128; } /* skip all */
424
- buf[dd=i]=0; /* replace final 0x0a or other special codes */
425
- if (dd==1 && !(buf[0]&128)) { c=buf[0]; utf8_ok=1; } /* single char */
426
- if (dd>1 && dd<7) { /* try to decode single wide char (utf8) */
427
- int u0, u1; /* define UTF8-start sequences, u0=0bits u1=1bits */
428
- u0= 1<<(7-dd); /* compute start byte from UTF8-length */
429
- u1=255&~((1<<(8-dd))-1);
430
- /* count number of following 10xxxxxx bytes to i */
431
- for (i=1;i<dd;i++) if ((buf[i]&0xc0)!=0x80) break; /* 10xxxxxx */
432
- if (i==dd && (buf[0]&(u0|u1))==u1) { utf8_ok=1;
433
- c=buf[0]&(u0-1); /* 11..0x.. */
434
- for (i=1;i<dd;i++) { c<<=6; c|=buf[i]&0x3F; } /* 10xxxxxx */
435
- }
436
- }
437
- if (dd>0){ /* ToDo: skip space and tab too? */
438
- if (utf8_ok==1) { setac(box1, c, 100); } /* store single wchar */
439
- if (utf8_ok==0) { /* store a string of chars (UTF8-string) */
440
- c='_'; /* what should we do with c? probably a bad idea? */
441
- setas(box1, (char *)buf, 100);
442
- }
443
- /* decide between
444
- * 0) just help gocr to find the results and (dont remember, 0x01)
445
- * 1) help and remember in the same run (store to memory, 0x1b)
446
- * 2) expand the database (dont store ugly chars to the database!)
447
- */
448
- if (endchar!=0x01){ /* ^a before hit return */
449
- /* is there a reason to dont store to memory? */
450
- list_app(&JOB->tmp.dblist, box1); /* append to list for 1+2 */
451
- }
452
- if (endchar!=0x01 && endchar!=0x1b){
453
- store_db(box1); /* store to disk for 2 */
454
- }
455
- if (JOB->cfg.verbose)
456
- fprintf(stderr, " got char= %c 16bit= 0x%04x string= \"%s\"\n",
457
- ((c>32 && c<127)?(char)c:'.'), (int)c, buf);
458
- }
459
- }
460
-
461
- return c;
462
- }