isbn 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. data/.gitignore +4 -0
  2. data/README +9 -0
  3. data/Rakefile +13 -0
  4. data/VERSION +1 -0
  5. data/isbn.gemspec +329 -0
  6. data/lib/isbn.rb +90 -0
  7. data/src/gocr-0.48/.cvsignore +6 -0
  8. data/src/gocr-0.48/AUTHORS +7 -0
  9. data/src/gocr-0.48/BUGS +55 -0
  10. data/src/gocr-0.48/CREDITS +17 -0
  11. data/src/gocr-0.48/HISTORY +243 -0
  12. data/src/gocr-0.48/INSTALL +83 -0
  13. data/src/gocr-0.48/Makefile +193 -0
  14. data/src/gocr-0.48/Makefile.in +193 -0
  15. data/src/gocr-0.48/README +165 -0
  16. data/src/gocr-0.48/READMEde.txt +80 -0
  17. data/src/gocr-0.48/REMARK.txt +18 -0
  18. data/src/gocr-0.48/REVIEW +538 -0
  19. data/src/gocr-0.48/TODO +65 -0
  20. data/src/gocr-0.48/bin/.cvsignore +2 -0
  21. data/src/gocr-0.48/bin/create_db +38 -0
  22. data/src/gocr-0.48/bin/gocr.tcl +527 -0
  23. data/src/gocr-0.48/bin/gocr_chk.sh +44 -0
  24. data/src/gocr-0.48/configure +4689 -0
  25. data/src/gocr-0.48/configure.in +71 -0
  26. data/src/gocr-0.48/doc/.#Makefile.1.6 +39 -0
  27. data/src/gocr-0.48/doc/.cvsignore +2 -0
  28. data/src/gocr-0.48/doc/Makefile +39 -0
  29. data/src/gocr-0.48/doc/Makefile.in +39 -0
  30. data/src/gocr-0.48/doc/example.dtd +53 -0
  31. data/src/gocr-0.48/doc/example.xml +21 -0
  32. data/src/gocr-0.48/doc/examples.txt +67 -0
  33. data/src/gocr-0.48/doc/gocr.html +578 -0
  34. data/src/gocr-0.48/doc/unicode.txt +57 -0
  35. data/src/gocr-0.48/examples/.#Makefile.1.22 +166 -0
  36. data/src/gocr-0.48/examples/4x6.png +0 -0
  37. data/src/gocr-0.48/examples/4x6.txt +2 -0
  38. data/src/gocr-0.48/examples/5x7.png +0 -0
  39. data/src/gocr-0.48/examples/5x7.png.txt +2 -0
  40. data/src/gocr-0.48/examples/5x8.png +0 -0
  41. data/src/gocr-0.48/examples/5x8.png.txt +2 -0
  42. data/src/gocr-0.48/examples/Makefile +166 -0
  43. data/src/gocr-0.48/examples/color.fig +20 -0
  44. data/src/gocr-0.48/examples/ex.fig +16 -0
  45. data/src/gocr-0.48/examples/font.tex +22 -0
  46. data/src/gocr-0.48/examples/font1.tex +46 -0
  47. data/src/gocr-0.48/examples/font2.fig +27 -0
  48. data/src/gocr-0.48/examples/font_nw.tex +24 -0
  49. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  50. data/src/gocr-0.48/examples/handwrt1.txt +10 -0
  51. data/src/gocr-0.48/examples/inverse.fig +20 -0
  52. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  53. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +4 -0
  55. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  56. data/src/gocr-0.48/examples/ocr-a.txt +6 -0
  57. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  58. data/src/gocr-0.48/examples/ocr-b.png.txt +4 -0
  59. data/src/gocr-0.48/examples/polish.tex +28 -0
  60. data/src/gocr-0.48/examples/rotate45.fig +14 -0
  61. data/src/gocr-0.48/examples/score +36 -0
  62. data/src/gocr-0.48/examples/text.tex +28 -0
  63. data/src/gocr-0.48/gocr.spec +143 -0
  64. data/src/gocr-0.48/gpl.html +537 -0
  65. data/src/gocr-0.48/include/.cvsignore +2 -0
  66. data/src/gocr-0.48/include/config.h +36 -0
  67. data/src/gocr-0.48/include/config.h.in +36 -0
  68. data/src/gocr-0.48/include/version.h +2 -0
  69. data/src/gocr-0.48/install-sh +3 -0
  70. data/src/gocr-0.48/make.bat +57 -0
  71. data/src/gocr-0.48/man/.cvsignore +2 -0
  72. data/src/gocr-0.48/man/Makefile +29 -0
  73. data/src/gocr-0.48/man/Makefile.in +29 -0
  74. data/src/gocr-0.48/man/man1/gocr.1 +166 -0
  75. data/src/gocr-0.48/src/.cvsignore +4 -0
  76. data/src/gocr-0.48/src/Makefile +132 -0
  77. data/src/gocr-0.48/src/Makefile.in +132 -0
  78. data/src/gocr-0.48/src/amiga.h +31 -0
  79. data/src/gocr-0.48/src/barcode.c +846 -0
  80. data/src/gocr-0.48/src/barcode.c.orig +593 -0
  81. data/src/gocr-0.48/src/barcode.h +11 -0
  82. data/src/gocr-0.48/src/box.c +372 -0
  83. data/src/gocr-0.48/src/database.c +462 -0
  84. data/src/gocr-0.48/src/detect.c +943 -0
  85. data/src/gocr-0.48/src/gocr.c +373 -0
  86. data/src/gocr-0.48/src/gocr.h +288 -0
  87. data/src/gocr-0.48/src/jconv.c +168 -0
  88. data/src/gocr-0.48/src/job.c +84 -0
  89. data/src/gocr-0.48/src/lines.c +350 -0
  90. data/src/gocr-0.48/src/list.c +334 -0
  91. data/src/gocr-0.48/src/list.h +90 -0
  92. data/src/gocr-0.48/src/ocr0.c +6756 -0
  93. data/src/gocr-0.48/src/ocr0.h +63 -0
  94. data/src/gocr-0.48/src/ocr0n.c +1475 -0
  95. data/src/gocr-0.48/src/ocr1.c +85 -0
  96. data/src/gocr-0.48/src/ocr1.h +3 -0
  97. data/src/gocr-0.48/src/otsu.c +289 -0
  98. data/src/gocr-0.48/src/otsu.h +23 -0
  99. data/src/gocr-0.48/src/output.c +289 -0
  100. data/src/gocr-0.48/src/output.h +37 -0
  101. data/src/gocr-0.48/src/pcx.c +153 -0
  102. data/src/gocr-0.48/src/pcx.h +9 -0
  103. data/src/gocr-0.48/src/pgm2asc.c +2893 -0
  104. data/src/gocr-0.48/src/pgm2asc.h +105 -0
  105. data/src/gocr-0.48/src/pixel.c +537 -0
  106. data/src/gocr-0.48/src/pnm.c +533 -0
  107. data/src/gocr-0.48/src/pnm.h +35 -0
  108. data/src/gocr-0.48/src/progress.c +87 -0
  109. data/src/gocr-0.48/src/progress.h +42 -0
  110. data/src/gocr-0.48/src/remove.c +703 -0
  111. data/src/gocr-0.48/src/tga.c +87 -0
  112. data/src/gocr-0.48/src/tga.h +6 -0
  113. data/src/gocr-0.48/src/unicode.c +1314 -0
  114. data/src/gocr-0.48/src/unicode.h +1257 -0
  115. data/src/jpeg-7/Makefile.am +133 -0
  116. data/src/jpeg-7/Makefile.in +1089 -0
  117. data/src/jpeg-7/README +322 -0
  118. data/src/jpeg-7/aclocal.m4 +8990 -0
  119. data/src/jpeg-7/ansi2knr.1 +36 -0
  120. data/src/jpeg-7/ansi2knr.c +739 -0
  121. data/src/jpeg-7/cderror.h +132 -0
  122. data/src/jpeg-7/cdjpeg.c +181 -0
  123. data/src/jpeg-7/cdjpeg.h +187 -0
  124. data/src/jpeg-7/change.log +270 -0
  125. data/src/jpeg-7/cjpeg.1 +325 -0
  126. data/src/jpeg-7/cjpeg.c +616 -0
  127. data/src/jpeg-7/ckconfig.c +402 -0
  128. data/src/jpeg-7/coderules.txt +118 -0
  129. data/src/jpeg-7/config.guess +1561 -0
  130. data/src/jpeg-7/config.sub +1686 -0
  131. data/src/jpeg-7/configure +17139 -0
  132. data/src/jpeg-7/configure.ac +317 -0
  133. data/src/jpeg-7/depcomp +630 -0
  134. data/src/jpeg-7/djpeg.1 +251 -0
  135. data/src/jpeg-7/djpeg.c +617 -0
  136. data/src/jpeg-7/example.c +433 -0
  137. data/src/jpeg-7/filelist.txt +215 -0
  138. data/src/jpeg-7/install-sh +520 -0
  139. data/src/jpeg-7/install.txt +1097 -0
  140. data/src/jpeg-7/jaricom.c +148 -0
  141. data/src/jpeg-7/jcapimin.c +282 -0
  142. data/src/jpeg-7/jcapistd.c +161 -0
  143. data/src/jpeg-7/jcarith.c +921 -0
  144. data/src/jpeg-7/jccoefct.c +453 -0
  145. data/src/jpeg-7/jccolor.c +459 -0
  146. data/src/jpeg-7/jcdctmgr.c +482 -0
  147. data/src/jpeg-7/jchuff.c +1612 -0
  148. data/src/jpeg-7/jcinit.c +65 -0
  149. data/src/jpeg-7/jcmainct.c +293 -0
  150. data/src/jpeg-7/jcmarker.c +667 -0
  151. data/src/jpeg-7/jcmaster.c +770 -0
  152. data/src/jpeg-7/jcomapi.c +106 -0
  153. data/src/jpeg-7/jconfig.bcc +48 -0
  154. data/src/jpeg-7/jconfig.cfg +45 -0
  155. data/src/jpeg-7/jconfig.dj +38 -0
  156. data/src/jpeg-7/jconfig.mac +43 -0
  157. data/src/jpeg-7/jconfig.manx +43 -0
  158. data/src/jpeg-7/jconfig.mc6 +52 -0
  159. data/src/jpeg-7/jconfig.sas +43 -0
  160. data/src/jpeg-7/jconfig.st +42 -0
  161. data/src/jpeg-7/jconfig.txt +155 -0
  162. data/src/jpeg-7/jconfig.vc +45 -0
  163. data/src/jpeg-7/jconfig.vms +37 -0
  164. data/src/jpeg-7/jconfig.wat +38 -0
  165. data/src/jpeg-7/jcparam.c +632 -0
  166. data/src/jpeg-7/jcprepct.c +358 -0
  167. data/src/jpeg-7/jcsample.c +545 -0
  168. data/src/jpeg-7/jctrans.c +381 -0
  169. data/src/jpeg-7/jdapimin.c +396 -0
  170. data/src/jpeg-7/jdapistd.c +275 -0
  171. data/src/jpeg-7/jdarith.c +762 -0
  172. data/src/jpeg-7/jdatadst.c +151 -0
  173. data/src/jpeg-7/jdatasrc.c +212 -0
  174. data/src/jpeg-7/jdcoefct.c +736 -0
  175. data/src/jpeg-7/jdcolor.c +396 -0
  176. data/src/jpeg-7/jdct.h +393 -0
  177. data/src/jpeg-7/jddctmgr.c +382 -0
  178. data/src/jpeg-7/jdhuff.c +1309 -0
  179. data/src/jpeg-7/jdinput.c +384 -0
  180. data/src/jpeg-7/jdmainct.c +512 -0
  181. data/src/jpeg-7/jdmarker.c +1360 -0
  182. data/src/jpeg-7/jdmaster.c +663 -0
  183. data/src/jpeg-7/jdmerge.c +400 -0
  184. data/src/jpeg-7/jdpostct.c +290 -0
  185. data/src/jpeg-7/jdsample.c +361 -0
  186. data/src/jpeg-7/jdtrans.c +136 -0
  187. data/src/jpeg-7/jerror.c +252 -0
  188. data/src/jpeg-7/jerror.h +304 -0
  189. data/src/jpeg-7/jfdctflt.c +174 -0
  190. data/src/jpeg-7/jfdctfst.c +230 -0
  191. data/src/jpeg-7/jfdctint.c +4348 -0
  192. data/src/jpeg-7/jidctflt.c +242 -0
  193. data/src/jpeg-7/jidctfst.c +368 -0
  194. data/src/jpeg-7/jidctint.c +5137 -0
  195. data/src/jpeg-7/jinclude.h +91 -0
  196. data/src/jpeg-7/jmemansi.c +167 -0
  197. data/src/jpeg-7/jmemdos.c +638 -0
  198. data/src/jpeg-7/jmemdosa.asm +379 -0
  199. data/src/jpeg-7/jmemmac.c +289 -0
  200. data/src/jpeg-7/jmemmgr.c +1118 -0
  201. data/src/jpeg-7/jmemname.c +276 -0
  202. data/src/jpeg-7/jmemnobs.c +109 -0
  203. data/src/jpeg-7/jmemsys.h +198 -0
  204. data/src/jpeg-7/jmorecfg.h +369 -0
  205. data/src/jpeg-7/jpegint.h +395 -0
  206. data/src/jpeg-7/jpeglib.h +1135 -0
  207. data/src/jpeg-7/jpegtran.1 +272 -0
  208. data/src/jpeg-7/jpegtran.c +546 -0
  209. data/src/jpeg-7/jquant1.c +856 -0
  210. data/src/jpeg-7/jquant2.c +1310 -0
  211. data/src/jpeg-7/jutils.c +179 -0
  212. data/src/jpeg-7/jversion.h +14 -0
  213. data/src/jpeg-7/libjpeg.map +4 -0
  214. data/src/jpeg-7/libjpeg.txt +3067 -0
  215. data/src/jpeg-7/ltmain.sh +8406 -0
  216. data/src/jpeg-7/makcjpeg.st +36 -0
  217. data/src/jpeg-7/makdjpeg.st +36 -0
  218. data/src/jpeg-7/makeadsw.vc6 +77 -0
  219. data/src/jpeg-7/makeasln.vc9 +33 -0
  220. data/src/jpeg-7/makecdep.vc6 +82 -0
  221. data/src/jpeg-7/makecdsp.vc6 +130 -0
  222. data/src/jpeg-7/makecmak.vc6 +159 -0
  223. data/src/jpeg-7/makecvcp.vc9 +186 -0
  224. data/src/jpeg-7/makeddep.vc6 +82 -0
  225. data/src/jpeg-7/makeddsp.vc6 +130 -0
  226. data/src/jpeg-7/makedmak.vc6 +159 -0
  227. data/src/jpeg-7/makedvcp.vc9 +186 -0
  228. data/src/jpeg-7/makefile.ansi +220 -0
  229. data/src/jpeg-7/makefile.bcc +291 -0
  230. data/src/jpeg-7/makefile.dj +226 -0
  231. data/src/jpeg-7/makefile.manx +220 -0
  232. data/src/jpeg-7/makefile.mc6 +255 -0
  233. data/src/jpeg-7/makefile.mms +224 -0
  234. data/src/jpeg-7/makefile.sas +258 -0
  235. data/src/jpeg-7/makefile.unix +234 -0
  236. data/src/jpeg-7/makefile.vc +217 -0
  237. data/src/jpeg-7/makefile.vms +142 -0
  238. data/src/jpeg-7/makefile.wat +239 -0
  239. data/src/jpeg-7/makejdep.vc6 +423 -0
  240. data/src/jpeg-7/makejdsp.vc6 +285 -0
  241. data/src/jpeg-7/makejdsw.vc6 +29 -0
  242. data/src/jpeg-7/makejmak.vc6 +425 -0
  243. data/src/jpeg-7/makejsln.vc9 +17 -0
  244. data/src/jpeg-7/makejvcp.vc9 +328 -0
  245. data/src/jpeg-7/makeproj.mac +213 -0
  246. data/src/jpeg-7/makerdep.vc6 +6 -0
  247. data/src/jpeg-7/makerdsp.vc6 +78 -0
  248. data/src/jpeg-7/makermak.vc6 +110 -0
  249. data/src/jpeg-7/makervcp.vc9 +133 -0
  250. data/src/jpeg-7/maketdep.vc6 +43 -0
  251. data/src/jpeg-7/maketdsp.vc6 +122 -0
  252. data/src/jpeg-7/maketmak.vc6 +131 -0
  253. data/src/jpeg-7/maketvcp.vc9 +178 -0
  254. data/src/jpeg-7/makewdep.vc6 +6 -0
  255. data/src/jpeg-7/makewdsp.vc6 +78 -0
  256. data/src/jpeg-7/makewmak.vc6 +110 -0
  257. data/src/jpeg-7/makewvcp.vc9 +133 -0
  258. data/src/jpeg-7/makljpeg.st +68 -0
  259. data/src/jpeg-7/maktjpeg.st +30 -0
  260. data/src/jpeg-7/makvms.opt +4 -0
  261. data/src/jpeg-7/missing +376 -0
  262. data/src/jpeg-7/rdbmp.c +439 -0
  263. data/src/jpeg-7/rdcolmap.c +253 -0
  264. data/src/jpeg-7/rdgif.c +38 -0
  265. data/src/jpeg-7/rdjpgcom.1 +63 -0
  266. data/src/jpeg-7/rdjpgcom.c +515 -0
  267. data/src/jpeg-7/rdppm.c +459 -0
  268. data/src/jpeg-7/rdrle.c +387 -0
  269. data/src/jpeg-7/rdswitch.c +365 -0
  270. data/src/jpeg-7/rdtarga.c +500 -0
  271. data/src/jpeg-7/structure.txt +945 -0
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +4 -0
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +1533 -0
  279. data/src/jpeg-7/transupp.h +205 -0
  280. data/src/jpeg-7/usage.txt +605 -0
  281. data/src/jpeg-7/wizard.txt +211 -0
  282. data/src/jpeg-7/wrbmp.c +442 -0
  283. data/src/jpeg-7/wrgif.c +399 -0
  284. data/src/jpeg-7/wrjpgcom.1 +103 -0
  285. data/src/jpeg-7/wrjpgcom.c +583 -0
  286. data/src/jpeg-7/wrppm.c +269 -0
  287. data/src/jpeg-7/wrrle.c +305 -0
  288. data/src/jpeg-7/wrtarga.c +253 -0
  289. data/test/isbn_test.rb +7 -0
  290. data/test/test_helper.rb +7 -0
  291. metadata +345 -0
@@ -0,0 +1,11 @@
1
+ #ifndef _BARCODE_H
2
+ #define _BARCODE_H
3
+ #include "pnm.h"
4
+
5
+ /*
6
+ detect barcode and add a string to the box (obj-pointer)
7
+ */
8
+
9
+ int detect_barcode(job_t *job);
10
+
11
+ #endif
@@ -0,0 +1,372 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2009 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL address
20
+
21
+ */
22
+
23
+ #include <stdio.h>
24
+ #include <stdlib.h>
25
+ #include <assert.h>
26
+ #include <string.h>
27
+ /* do we need #include <math.h>? conflicts with INFINITY in unicode.h */
28
+ #include "gocr.h"
29
+ #include "pgm2asc.h"
30
+
31
+ /* for sorting letters by position on the image
32
+ / ToDo: - use function same line like this or include lines.m1 etc. */
33
+ int box_gt(struct box *box1, struct box *box2) {
34
+ // box1 after box2 ?
35
+ if (box1->line > box2->line)
36
+ return 1;
37
+ if (box1->line < box2->line)
38
+ return 0;
39
+ if (box1->x0 > box2->x1) // before
40
+ return 1;
41
+ if (box1->x1 < box2->x0) // before
42
+ return 0;
43
+ if (box1->x0 > box2->x0) // before, overlapping!
44
+ return 1;
45
+
46
+ return 0;
47
+ }
48
+
49
+ /* --- copy part of pix p into new pix b ---- len=10000
50
+ * Returns: 0 on success, 1 on error.
51
+ * naming it as copybox isnt very clever, because it dont have to do with the
52
+ * char boxes (struct box)
53
+ */
54
+ int copybox (pix * p, int x0, int y0, int dx, int dy, pix * b, int len) {
55
+ int x, y;
56
+
57
+ /* test boundaries */
58
+ if (b->p == NULL || dx < 0 || dy < 0 || dx * dy > len) {
59
+ fprintf(stderr, " error-copybox x=%5d %5d d=%5d %5d\n", x0, y0, dx, dy);
60
+ return 1;
61
+ }
62
+
63
+ b->x = dx;
64
+ b->y = dy;
65
+ b->bpp = 1;
66
+ #ifdef FASTER_INCOMPLETE
67
+ for (y = 0; y < dy; y++)
68
+ memcpy(&pixel_atp(b, 0, y), &pixel_atp(p, x0, y + y0 ), dx);
69
+ // and unmark pixels
70
+ #else
71
+ for (y = 0; y < dy; y++)
72
+ for (x = 0; x < dx; x++)
73
+ pixel_atp(b, x, y) = getpixel(p, x + x0, y + y0);
74
+ #endif
75
+
76
+ return 0;
77
+ }
78
+
79
+ /* reset table of alternative chars (and free memory) */
80
+ int reset_box_ac(struct box *box){
81
+ int i;
82
+ for (i=0; i<box->num_ac; i++)
83
+ if (box->tas[i]) {
84
+ /* fprintf(stderr,"DBG free_s[%d] %p %s\n",i,box->tas[i],box->tas[i]); */
85
+ free(box->tas[i]);
86
+ box->tas[i]=0; /* prevent double freeing */
87
+ }
88
+ box->num_ac=0; /* mark as freed */
89
+ return 0;
90
+ }
91
+
92
+ /* ini or copy a box: get memory for box and initialize the memory */
93
+ struct box *malloc_box (struct box *inibox) {
94
+ struct box *buf;
95
+ int i;
96
+
97
+ buf = (struct box *) malloc(sizeof(struct box));
98
+ if (!buf)
99
+ return NULL;
100
+ if (inibox) {
101
+ memcpy(buf, inibox, sizeof(struct box));
102
+ /* only pointer are copied, we want to copy the contents too */
103
+ for (i=0;i<inibox->num_ac;i++) {
104
+ if (inibox->tas[i]) {
105
+ buf->tas[i]=(char *)malloc(strlen(inibox->tas[i])+1);
106
+ memcpy(buf->tas[i], inibox->tas[i], strlen(inibox->tas[i])+1);
107
+ }
108
+ }
109
+ }
110
+ else { /* ToDo: init it */
111
+ buf->num_ac=0;
112
+ buf->num_frames=0;
113
+ }
114
+ /* fprintf(stderr,"\nDBG ini_box %p",buf); */
115
+ return buf;
116
+ }
117
+
118
+ /* free memory of box */
119
+ int free_box (struct box *box) {
120
+ if (!box) return 0;
121
+ /* fprintf(stderr,"DBG free_box %p\n",box); out_x(box); */
122
+ reset_box_ac(box); /* free alternative char table */
123
+ free(box); /* free the box memory */
124
+ return 0;
125
+ }
126
+
127
+ /* simplify the vectorgraph,
128
+ * but what is the best way?
129
+ * a) melting two neighbouring vectors with nearly same direction?
130
+ * (nearest angle to pi)
131
+ * b) melting three neigbours with smallest area?
132
+ * ToDo:
133
+ * mode = 0 - only lossless
134
+ * mode = 1 - reduce one vector, smallest possible loss
135
+ * mode = 2 - remove jitter (todo, or somewhere else)
136
+ * ToDo: include also loop around (last - first element)
137
+ * ToDo: reduce by 10..50%
138
+ */
139
+ int reduce_vectors ( struct box *box1, int mode ) {
140
+ int i1, i2, nx, ny, mx, my, len,
141
+ minlen=1024, /* minlength of to neighbouring vectors */
142
+ besti1=0, /* frame for best reduction */
143
+ besti2=2; /* vector replacing its predecessor */
144
+ double sprod, maxsprod=-1;
145
+ if (mode!=1) fprintf(stderr,"ERR not supported yet, ToDo\n");
146
+ for (i2=1,i1=0; i1<box1->num_frames; i1++) { /* every frame */
147
+ for (;i2<box1->num_frame_vectors[i1]-1; i2++) { /* every vector */
148
+ /* predecessor n */
149
+ nx = box1->frame_vector[i2-0][0] - box1->frame_vector[i2-1][0];
150
+ ny = box1->frame_vector[i2-0][1] - box1->frame_vector[i2-1][1];
151
+ /* successor m */
152
+ mx = box1->frame_vector[i2+1][0] - box1->frame_vector[i2-0][0];
153
+ my = box1->frame_vector[i2+1][1] - box1->frame_vector[i2-0][1];
154
+ /* angle is w = a*b/(|a|*|b|) = 1 means parallel */
155
+ /* normalized: minimize w^2 = (a*b/(|a|*|b|)-1)^2 */
156
+ /* -1=90grd, 0=0grd, -2=180grd */
157
+ sprod = /* fabs */(abs(nx*mx+ny*my)*(nx*mx+ny*my)
158
+ /(1.*(nx*nx+ny*ny)*(mx*mx+my*my))-1);
159
+ /* we dont include math.h because INFINITY conflicts to unicode,h */
160
+ if (sprod<0) sprod=-sprod;
161
+ len = (mx*mx+my*my)*(nx*nx+ny*ny); /* sum lengths^2 */
162
+ // ..c ###c ... .. ...
163
+ // .b. len=2+2 #b.. len=2+5 #bc len=1+2 bc len=1+1 b#a len=4+5
164
+ // a.. spr=0 a... spr=1/10 a.. spr=1/4 a. spr=1 ##c spr=9/5
165
+ //
166
+ if ( len* sprod* sprod* sprod* sprod
167
+ <minlen*maxsprod*maxsprod*maxsprod*maxsprod
168
+ || maxsprod<0) /* Bad! ToDo! */
169
+ { maxsprod=sprod; besti1=i1; besti2=i2; minlen=len; }
170
+ }
171
+ }
172
+ if (box1->num_frames>0)
173
+ for (i2=besti2; i2<box1->num_frame_vectors[ box1->num_frames-1 ]-1; i2++) {
174
+ box1->frame_vector[i2][0]=box1->frame_vector[i2+1][0];
175
+ box1->frame_vector[i2][1]=box1->frame_vector[i2+1][1];
176
+ }
177
+ for (i1=besti1; i1<box1->num_frames; i1++)
178
+ box1->num_frame_vectors[i1]--;
179
+ // fprintf(stderr,"\nDBG_reduce_vectors i= %d nv= %d sprod=%f len2=%d\n# ...",
180
+ // besti2,box1->num_frame_vectors[ box1->num_frames-1 ],maxsprod,minlen);
181
+ // out_x(box1);
182
+ return 0;
183
+ }
184
+
185
+ /* add the contents of box2 to box1
186
+ * especially add vectors of box2 to box1
187
+ */
188
+ int merge_boxes( struct box *box1, struct box *box2 ) {
189
+ int i1, i2, i3, i4;
190
+ struct box tmpbox, *bsmaller, *bbigger; /* for mixing and sorting */
191
+ /* DEBUG, use valgrind to check uninitialized memory */
192
+ #if 0
193
+ fprintf(stderr,"\nDBG merge_boxes_input:"); out_x(box1); out_x(box2);
194
+ #endif
195
+ /* pair distance is to expendable, taking borders is easier */
196
+ if ((box2->x1 - box2->x0)*(box2->y1 - box2->y0)
197
+ >(box1->x1 - box1->x0)*(box1->y1 - box1->y0)) {
198
+ bbigger=box2; bsmaller=box1; }
199
+ else {
200
+ bbigger=box1; bsmaller=box2; }
201
+ /* ToDo: does not work if a third box is added */
202
+ if (box2->y0>box1->y1 || box2->y1<box1->y0
203
+ || box2->x0>box1->x1 || box2->x1<box1->x0) {
204
+ box1->num_boxes += box2->num_boxes; /* num seperate objects 2=ij */
205
+ } else {
206
+ if (box2->num_boxes>box1->num_boxes) box1->num_boxes=box2->num_boxes;
207
+ box1->num_subboxes += box2->num_subboxes+1; /* num holes 1=abdepq 2=B */
208
+ }
209
+ box1->dots += box2->dots; /* num i-dots */
210
+ if ( box2->x0 < box1->x0 ) box1->x0 = box2->x0;
211
+ if ( box2->x1 > box1->x1 ) box1->x1 = box2->x1;
212
+ if ( box2->y0 < box1->y0 ) box1->y0 = box2->y0;
213
+ if ( box2->y1 > box1->y1 ) box1->y1 = box2->y1;
214
+ i1 = i2 = 0;
215
+ if (bbigger->num_frames)
216
+ i1 = bbigger->num_frame_vectors[ bbigger->num_frames - 1 ];
217
+ if (bsmaller->num_frames)
218
+ i2 = bsmaller->num_frame_vectors[ bsmaller->num_frames - 1 ];
219
+ while (i1+i2 > MaxFrameVectors) {
220
+ if (i1>i2) { reduce_vectors( bbigger, 1 ); i1--; }
221
+ else { reduce_vectors( bsmaller, 1 ); i2--; }
222
+ }
223
+ /* if i1+i2>MaxFrameVectors simplify the vectorgraph */
224
+ /* if sum num_frames>MaxNumFrames through shortest graph away and warn */
225
+ /* first copy the bigger box */
226
+ memcpy(&tmpbox, bbigger, sizeof(struct box));
227
+ /* attach the smaller box */
228
+ for (i4=i3=0; i3<bsmaller->num_frames; i3++) {
229
+ if (tmpbox.num_frames>=MaxNumFrames) break;
230
+
231
+ for (; i4<bsmaller->num_frame_vectors[i3]; i4++) {
232
+ memcpy(tmpbox.frame_vector[i1],
233
+ bsmaller->frame_vector[i4],2*sizeof(int));
234
+ i1++;
235
+ }
236
+ tmpbox.num_frame_vectors[ tmpbox.num_frames ] = i1;
237
+ tmpbox.frame_vol[ tmpbox.num_frames ] = bsmaller->frame_vol[ i3 ];
238
+ tmpbox.frame_per[ tmpbox.num_frames ] = bsmaller->frame_per[ i3 ];
239
+ tmpbox.num_frames++;
240
+ if (tmpbox.num_frames>=MaxNumFrames) {
241
+ if (JOB->cfg.verbose)
242
+ fprintf(stderr,"\nDBG merge_boxes MaxNumFrames reached");
243
+ break;
244
+ }
245
+ }
246
+ /* copy tmpbox to destination */
247
+ box1->num_frames = tmpbox.num_frames;
248
+ memcpy(box1->num_frame_vectors,
249
+ tmpbox.num_frame_vectors,sizeof(int)*MaxNumFrames);
250
+ memcpy(box1->frame_vol,
251
+ tmpbox.frame_vol,sizeof(int)*MaxNumFrames);
252
+ memcpy(box1->frame_per,
253
+ tmpbox.frame_per,sizeof(int)*MaxNumFrames);
254
+ memcpy(box1->frame_vector,
255
+ tmpbox.frame_vector,sizeof(int)*2*MaxFrameVectors);
256
+ #if 0
257
+ if (JOB->cfg.verbose)
258
+ fprintf(stderr,"\nDBG merge_boxes_result:"); out_x(box1);
259
+ #endif
260
+ return 0;
261
+ }
262
+
263
+ /* used for division of glued chars
264
+ * after a box is splitted into 2, where vectors are copied to both,
265
+ * vectors outside the new box are cutted and thrown away,
266
+ * later replaced by
267
+ * - 1st remove outside vectors with outside neighbours (complete frames?)
268
+ * add vector on outside vector with inside neighbours
269
+ * care about connections through box between outside vectors
270
+ * - 2nd reduce outside crossings (inclusive splitting frames if necessary)
271
+ * depending on direction (rotation) of outside connections
272
+ * - 3th shift outside vectors to crossing points
273
+ * - split add this points, connect only in-out...out-in,
274
+ * - cutting can result in more objects
275
+ * ToDo:
276
+ * dont connect --1---2--------3----4-- new-y1 (inside above not drawn)
277
+ * \ \->>>>-/ / outside
278
+ * \----<<<<-----/ old-y1
279
+ * |======| subtractable?
280
+ *
281
+ * only connect --1---2--------3----4-- new-y1
282
+ * \>>/ \>>>/ old-y1 outside
283
+ * ToDo: what about cutting 2 frames (example: 2fold melted MN)
284
+ * better restart framing algo?
285
+ *
286
+ * ToDo: new vol, per
287
+ */
288
+ int cut_box( struct box *box1) {
289
+ int i1, i2, i3, i4, x, y, lx, ly, dbg=0;
290
+ if (JOB->cfg.verbose) dbg=1; // debug level, enlarge to get more output
291
+ if (dbg) fprintf(stderr,"\n cut box x= %3d %3d", box1->x0, box1->y0);
292
+ /* check if complete frames are outside the box */
293
+ for (i1=0; i1<box1->num_frames; i1++){
294
+ if (dbg>2) fprintf(stderr,"\n checking frame %d outside", i1);
295
+ i2 = ((i1)?box1->num_frame_vectors[ i1-1 ]:0); // this frame
296
+ i3 = box1->num_frame_vectors[ i1 ]; // next frame
297
+ for (i4=i2; i4 < i3; i4++) {
298
+ x = box1->frame_vector[i4][0];
299
+ y = box1->frame_vector[i4][1];
300
+ /* break, if one vector is lying inside */
301
+ if (x>=box1->x0 && x<=box1->x1 && y>=box1->y0 && y<=box1->y1) break;
302
+ }
303
+ if (i4==i3) { /* all vectors outside */
304
+ if (dbg>1) fprintf(stderr,"\n remove frame %d",i1);
305
+ /* replace all frames i1,i1+1,... by i1+1,i1+2,... */
306
+ /* replace (x,y) pairs first */
307
+ for (i4=i2; i4<box1->num_frame_vectors[ box1->num_frames-1 ]-(i3-i2);
308
+ i4++) {
309
+ box1->frame_vector[i4][0] = box1->frame_vector[i4+i3-i2][0];
310
+ box1->frame_vector[i4][1] = box1->frame_vector[i4+i3-i2][1];
311
+ }
312
+ /* replace the num_frame_vectors */
313
+ for (i4=i1; i4<box1->num_frames-1; i4++)
314
+ box1->num_frame_vectors[ i4 ] =
315
+ box1->num_frame_vectors[ i4+1 ]-(i3-i2);
316
+ box1->num_frames--; i1--;
317
+ }
318
+ }
319
+ /* remove vectors outside the box */
320
+ i3=0;
321
+ for (i1=0; i1<box1->num_frames; i1++){
322
+ if (dbg>2) fprintf(stderr,"\n check cutting vectors on frame %d", i1);
323
+ x = box1->frame_vector[0][0]; /* last x */
324
+ y = box1->frame_vector[0][1]; /* last y */
325
+ /* ToDo: start inside to get a closed object */
326
+ if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) i3=1;
327
+ for (i2=0; i2<box1->num_frame_vectors[ i1 ]; i2++) {
328
+ lx = x; /* last x */
329
+ ly = y; /* last y */
330
+ x = box1->frame_vector[i2][0];
331
+ y = box1->frame_vector[i2][1];
332
+ // fprintf(stderr,"DBG LEV3 i2= %3d xy= %3d %3d",i2,x,y);
333
+ /* check if outside */
334
+ if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) {
335
+ /* replace by nearest point at border, ToDo: better crossingpoint */
336
+ if (i3==0) { /* wrong if it starts outside */
337
+ if (x < box1->x0) x = box1->frame_vector[i2][0] = box1->x0;
338
+ if (x > box1->x1) x = box1->frame_vector[i2][0] = box1->x1;
339
+ if (y < box1->y0) y = box1->frame_vector[i2][1] = box1->y0;
340
+ if (y > box1->y1) y = box1->frame_vector[i2][1] = box1->y1;
341
+ } else {
342
+ /* remove vector */
343
+ if (dbg>1) fprintf(stderr,"\n remove vector[%d][%d] x= %2d %2d",i1,i2,x-box1->x0,y-box1->y0);
344
+ for (i4=i2;i4<box1->num_frame_vectors[ box1->num_frames-1 ]-1;i4++) {
345
+ box1->frame_vector[i4][0] = box1->frame_vector[i4+1][0];
346
+ box1->frame_vector[i4][1] = box1->frame_vector[i4+1][1];
347
+ }
348
+ for (i4=i1; i4<box1->num_frames; i4++)
349
+ box1->num_frame_vectors[ i4 ]--;
350
+ i2--; /* next element is shiftet now, setting back the counter */
351
+ }
352
+ i3++;
353
+ // fprintf(stderr," outside i3= %d\n",i3);
354
+ continue;
355
+ }
356
+ // fprintf(stderr," inside i3= %d",i3);
357
+ if (i3) { /* ToDo: better crossing point last vector and border */
358
+ if (lx < box1->x0) lx = box1->x0;
359
+ if (lx > box1->x1) lx = box1->x1;
360
+ if (ly < box1->y0) ly = box1->y0;
361
+ if (ly > box1->y1) ly = box1->y1;
362
+ x = box1->frame_vector[i2][0] = lx;
363
+ y = box1->frame_vector[i2][1] = ly;
364
+ i3 = 0;
365
+ }
366
+ // fprintf(stderr," xy= %3d %3d\n",x,y);
367
+ }
368
+ }
369
+ if (dbg>2) { fprintf(stderr,"\nDBG cut_box_result:"); out_x(box1); }
370
+ return 0;
371
+ }
372
+
@@ -0,0 +1,462 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2009 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL address
20
+ */
21
+
22
+ #include <stdio.h>
23
+ #include <stdlib.h>
24
+ #include "gocr.h"
25
+ #include "pnm.h"
26
+ #include "pgm2asc.h"
27
+ #include <string.h>
28
+ #include <time.h>
29
+
30
+ #define Blen 256
31
+
32
+ // load boxes from database into boxlist (for faster access)
33
+ // used as alternate engine, comparing chars with database
34
+ int load_db(void) {
35
+ FILE *f1;
36
+ char s1[Blen+1],
37
+ s2[Blen+1] = "./db/", /* ToDo: replace by constant! by configure */
38
+ *s3;
39
+ int i, j, ii, i2, line;
40
+ struct box *box1;
41
+ pix *pp;
42
+
43
+ if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
44
+ i2=strlen(s2);
45
+ if (JOB->cfg.verbose)
46
+ fprintf(stderr, "# load database %s %s ... ",s2,JOB->cfg.db_path);
47
+
48
+ strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
49
+ f1 = fopen(s2, "r");
50
+ if (!f1) {
51
+ fprintf(stderr, " DB %s not found\n",s2);
52
+ return 1;
53
+ }
54
+
55
+ line = 0; /* line counter for better error report */
56
+ for (ii = 0; !feof(f1); ii++) {
57
+ /* bbg: should write a better input routine */
58
+ if (!fgets(s1, Blen, f1)) break; line++;
59
+ j = strlen(s1);
60
+ /* remove carriage return sequences from line */
61
+ while (j > 0 && (s1[j - 1] == '\r' || s1[j - 1] == '\n'))
62
+ s1[--j] = 0;
63
+ if (!j) continue; /* skip empty line */
64
+ if (s1[0]=='#') continue; /* skip comments (v0.44) */
65
+ /* copy file name */
66
+ for (i = 0; i < j && i+i2 < Blen && strchr(" \t,;",s1[i]) == 0; i++)
67
+ s2[i2 + i] = s1[i];
68
+ s2[i2+i]=0;
69
+ /* skip spaces */
70
+ for (; i < j && strchr(" \t",s1[i]) != 0; i++);
71
+ /* by now: read pix, fill box, goto next ??? */
72
+ pp = (pix *)malloc(sizeof(pix));
73
+ if( !pp ) fprintf(stderr,"malloc error in load_db pix\n");
74
+
75
+ // if (JOB->cfg.verbose) fprintf(stderr,"\n# readpgm %s ",s2);
76
+ if (readpgm(s2, pp, 0 * JOB->cfg.verbose)!=0) {
77
+ fprintf(stderr,"\ndatabase error: readpgm %s\n", s2);
78
+ exit(-1);
79
+ }
80
+
81
+ box1 = (struct box *)malloc_box(NULL);
82
+ if(!box1) fprintf(stderr,"malloc error in load_db box1\n");
83
+ box1->x0 = 0;
84
+ box1->x1 = pp->x-1; // white border 1 pixel width
85
+ box1->y0 = 0;
86
+ box1->y1 = pp->y-1;
87
+ box1->x = 1;
88
+ box1->y = 1;
89
+ box1->dots = 0;
90
+ box1->c = 0;
91
+ box1->modifier = 0; /* ToDo: obsolete */
92
+ box1->tas[0]=NULL;
93
+ box1->tac[0]=0;
94
+ box1->wac[0]=100; /* really 100% sure? */
95
+ box1->num_ac=1;
96
+ if (s1[i]=='"'){ /* parse a string */
97
+ j=strrchr(s1+i+1,'"')-(s1+i+1); /* we only look for first and last "" */
98
+ if (j>=1) {
99
+ s3=(char *)malloc(j+1);
100
+ if (!s3) fprintf (stderr, "malloc error in load_db s3\n");
101
+ if (s3) {
102
+ memcpy(s3,s1+i+1,j);
103
+ s3[j]=0;
104
+ box1->tas[0]=s3;
105
+ // fprintf(stderr,"\nstring=%s",s3);
106
+ }
107
+ } else { fprintf(stderr,"load_db: string parse error L%d\n",line); }
108
+ } else {
109
+ box1->tac[0] = box1->c = s1[i]; /* try to interpret as ASCII */
110
+ /* we can live without hexcode in future if we use UTF8-strings */
111
+ s3=s1+i;
112
+ j=strtol( s1+i, &s3, 16); /* try to read 4 to 8 digit hex unicode */
113
+ /* if its an hexcode, ASCII interpretation is overwritten */
114
+ if( j && i+3<=Blen && s3-s1-i>3 ) box1->tac[0] = box1->c = j;
115
+ // fprintf(stderr,"\nhexcode=%04x=%04x %d",(int)j,(int)box1->c,s3-s1-i);
116
+ }
117
+ box1->num = 0;
118
+ box1->line = -1;
119
+ box1->m1 = 0; /* ToDo: should be given too in the database! */
120
+ box1->m2 = 0;
121
+ box1->m3 = 0;
122
+ box1->m4 = 0;
123
+ box1->p = pp;
124
+ list_app(&JOB->tmp.dblist, box1); // append to list
125
+ #if 0
126
+ out_x(box1);
127
+ #endif
128
+ }
129
+ fclose(f1);
130
+ if (JOB->cfg.verbose)
131
+ fprintf(stderr, " %d chars loaded\n", ii);
132
+ return 0;
133
+ }
134
+
135
+ // expand database from box/boxlist name=db_$utime.pbm
136
+ // this is added in version v0.3.3
137
+ int store_db(struct box *box1) {
138
+ FILE *f1;
139
+ char s2[Blen+1] = "./db/", s3[Blen+1];
140
+ int i2, dx, dy;
141
+ unsigned c_out;
142
+ pix b; /* temporary mini page */
143
+
144
+ if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
145
+ i2=strlen(s2);
146
+
147
+ /* add (first) char and time to the file name for better debugging */
148
+
149
+ /* decide between 7bit ASCII and UTF8-char or string */
150
+ c_out = ((box1->num_ac && box1->tas[0]) ?
151
+ (unsigned char )box1->tas[0][0] /* char */ :
152
+ box1->c /* wchar */);
153
+ /* (unsigned int)(( char)0x80) = 0xffffff80 */
154
+ /* (unsigned int)((unsigned char)0x80) = 0x00000080 */
155
+
156
+ /* name generation can cause problems, if called twice within a second */
157
+ sprintf(s3,"db_%04x_%08lx.pbm", c_out, (unsigned long)time(NULL));
158
+ /* ToDo: the file name may be not unique */
159
+
160
+ strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
161
+ f1 = fopen(s2, "a");
162
+ if (!f1) {
163
+ fprintf(stderr, " could not access %s\n",s2);
164
+ return 1;
165
+ }
166
+ strncpy(s2+i2,s3,strlen(s3)); s2[i2+strlen(s3)]=0;
167
+ /* store image and infos about the char */
168
+ /* ToDo: store the vector list instead of the pixelarray */
169
+
170
+ if (JOB->cfg.verbose)
171
+ fprintf(stderr, "store_db: add file %s to database (nac=%d c=%04x)"
172
+ "\n#",s3, box1->num_ac, c_out);
173
+
174
+ dx=box1->x1-box1->x0+1;
175
+ dy=box1->y1-box1->y0+1;
176
+ b.p = (unsigned char *) malloc( dx * dy );
177
+ if( !b.p ){
178
+ fprintf( stderr, "\nFATAL: malloc failed, skip store_db" );
179
+ return 2;
180
+ }
181
+ if (copybox(box1->p, box1->x0, box1->y0, dx, dy, &b, dx * dy))
182
+ return -1;
183
+
184
+ writepbm(s2,&b); /* What is to do on error? */
185
+ free(b.p);
186
+
187
+ /* store the database line */
188
+ /* some infos about box1->m1,..,m4 should added (base line, high etc.) */
189
+ if (box1->num_ac && box1->tas[0]) {
190
+ fprintf(f1, "%s \"%s\"\n",s3,box1->tas[0]);
191
+ /* ToDo: what if tas contains '"'? */
192
+ } else {
193
+ if( (box1->c >= '0' && box1->c <= '9')
194
+ || (box1->c >= 'A' && box1->c <= 'Z')
195
+ || (box1->c >= 'a' && box1->c <= 'z') )
196
+ fprintf(f1, "%s %c\n",s3,(char)box1->c);
197
+ else {
198
+ if (((box1->c)>>16)>>16)
199
+ fprintf(f1, "%s %08x\n",s3,(unsigned int)box1->c);
200
+ else
201
+ fprintf(f1, "%s %04x\n",s3,(unsigned int)box1->c);
202
+ }
203
+ }
204
+ fclose(f1);
205
+ return 0;
206
+ }
207
+
208
+ /* function is only for user prompt on console to identify chars
209
+ it prints out a part of pixmap b at point x0,y0 to stderr
210
+ using dots .,; if no pixel, and @xoO for pixels
211
+ */
212
+ void out_env(struct box *px ){
213
+ int x0,y0,x1,y1,dx,dy,x,y,x2,y2,yy0,tx,ty,i,cs;
214
+ char c1, c2; pix *b;
215
+ cs=JOB->cfg.cs;
216
+ yy0=px->y0;
217
+ { /* overwrite rest of arguments */
218
+ b=px->p;
219
+ x0=px->x0; x1=px->x1; dx=x1-x0+1;
220
+ y0=px->y0; y1=px->y1; dy=y1-y0+1;
221
+ y0-=2; y1+=2;
222
+ if (px->m4 && y0>px->m1) y0=px->m1;
223
+ if (px->m4 && y1<px->m4) y1=px->m4;
224
+ if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
225
+ if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
226
+ if (x1-x0+1<62) { x0-=5; x1+=5; }
227
+ if (y1-y0+1<10) { y0-= 4; y1+= 4; } /* fragment? */
228
+ if (x0<0) x0=0; if (x1>=b->x) x1=b->x-1;
229
+ if (y0<0) y0=0; if (y1>=b->y) y1=b->y-1;
230
+ dx=x1-x0+1;
231
+ dy=y1-y0+1; yy0=y0;
232
+ fprintf(stderr,"\n# show box + environment");
233
+ fprintf(stderr,"\n# show box x= %4d %4d d= %3d %3d r= %d %d",
234
+ px->x0, px->y0, px->x1 - px->x0 + 1, px->y1 - px->y0 + 1,
235
+ px->x - px->x0, px->y - px->y0);
236
+ if (px->num_ac){ /* output table of chars and its probabilities */
237
+ fprintf(stderr,"\n# list box char: ");
238
+ for(i=0;i<px->num_ac && i<NumAlt;i++)
239
+ /* output the (xml-)string (picture position, barcodes, glyphs, ...) */
240
+ if (px->tas[i])
241
+ fprintf(stderr," %s(%d)", px->tas[i] ,px->wac[i]);
242
+ else
243
+ fprintf(stderr," %s(%d)",decode(px->tac[i],ASCII),px->wac[i]);
244
+ }
245
+ fprintf(stderr,"\n");
246
+ if (px->dots && px->m2 && px->m1<y0) { yy0=px->m1; dy=px->y1-yy0+1; }
247
+ }
248
+ tx=dx/80+1;
249
+ ty=dy/40+1; // step, usually 1, but greater on large maps
250
+ fprintf(stderr,"# show pattern x= %4d %4d d= %3d %3d t= %d %d\n",
251
+ x0,y0,dx,dy,tx,ty);
252
+ if (dx>0)
253
+ for(y=yy0;y<yy0+dy;y+=ty) { /* reduce the output to max 78x40 */
254
+
255
+ /* image is the boxframe + environment in the original bitmap */
256
+ for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
257
+ c1='.';
258
+ for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
259
+ for(x2=x;x2<x+tx && x2<x0+dx;x2++)
260
+ { if((getpixel(b,x2,y2)<cs)) c1='#'; }
261
+ // show pixels outside the box thinner/weaker
262
+ if (x+tx-1 < px->x0 || x > px->x1
263
+ || y+ty-1 < px->y0 || y > px->y1) c1=((c1=='#')?'O':',');
264
+ fprintf(stderr,"%c", c1 );
265
+ }
266
+
267
+ c1=c2=' ';
268
+ /* mark lines with < */
269
+ if (px) if (y==px->m1 || y==px->m2 || y==px->m3 || y==px->m4) c1='<';
270
+ if (y==px->y0 || y==px->y1) c2='-'; /* boxmarks */
271
+ fprintf(stderr,"%c%c\n",c1,c2);
272
+ }
273
+ }
274
+
275
+
276
+ /*
277
+ // second variant, for database (with slightly other behaviour)
278
+ // new variant
279
+ // look at the environment of the pixel too (contrast etc.)
280
+ // detailed analysis only of diff pixels!
281
+ //
282
+ // 100% * distance, 0 is best fit
283
+ // = similarity of 2 chars for recognition of noisy chars
284
+ // weigth of pixels with only one same neighbour set to 0
285
+ // look at contours too!
286
+ ToDo: especially on small boxes distance should only be 0 if
287
+ characters are 100% identical!
288
+ */
289
+ // #define DEBUG 2
290
+ int distance2( pix *p1, struct box *box1,
291
+ pix *p2, struct box *box2, int cs){
292
+ int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,
293
+ x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2,tx,ty;
294
+ #if DEBUG == 2
295
+ if(JOB->cfg.verbose)
296
+ fprintf(stderr," DEBUG: distance2\n");
297
+ #endif
298
+ x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
299
+ dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);dx=dx1;
300
+ dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);dy=dy1;
301
+ if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) rbad++; // how to weight?
302
+ // compare relations to baseline and upper line
303
+ if(box1->m4>0 && box2->m4>0){ // used ???
304
+ if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
305
+ if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
306
+ }
307
+ tx=dx/16; if(dx<17)tx=1; // raster
308
+ ty=dy/32; if(dy<33)ty=1;
309
+ // compare pixels
310
+ for( y=0;y<dy;y+=ty )
311
+ for( x=0;x<dx;x+=tx ) { // try global shift too ???
312
+ v1=((getpixel(p1,x1+x*dx1/dx,y1+y*dy1/dy)<cs)?1:0); i1=8; // better gray?
313
+ v2=((getpixel(p2,x2+x*dx2/dx,y2+y*dy2/dy)<cs)?1:0); i2=8; // better gray?
314
+ if(v1==v2) { rgood+=16; continue; } // all things are right!
315
+ // what about different pixel???
316
+ // test overlapp of surounding pixels ???
317
+ v1=1; rbad+=4;
318
+ v1=-1;
319
+ for(i1=-1;i1<2;i1++)
320
+ for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
321
+ if( ((getpixel(p1,x1+x*dx1/dx+i1*(1+dx1/32),y1+y*dy1/dy+i2*(1+dy1/32))<cs)?1:0)
322
+ !=((getpixel(p2,x2+x*dx2/dx+i1*(1+dx2/32),y2+y*dy2/dy+i2*(1+dy2/32))<cs)?1:0) ) v1++;
323
+ }
324
+ if(v1>0)
325
+ rbad+=16*v1;
326
+ }
327
+ if(rgood+rbad) rc= 100*rbad/(rgood+rbad); else rc=99;
328
+ /* if width/high is not correct add badness */
329
+ rc += ( abs(dx1*dy2-dx2*dy1) * 10 ) / (dy1*dy2);
330
+ if (rc>100) rc=100;
331
+ if(/* rc<10 && */ JOB->cfg.verbose /* &1024 */){
332
+ #if DEBUG == 2
333
+ fprintf(stderr," distance2 rc=%d rgood=%d rbad=%d\n",rc,rgood,rbad);
334
+ // out_b(NULL,p1,box1->x0,box1->y0,box1->x1-box1->x0+1,
335
+ // box1->y1-box1->y0+1,cs);
336
+ // out_b(NULL,p2,box2->x0,box2->y0,box2->x1-box2->x0+1,
337
+ // box2->y1-box2->y0+1,cs);
338
+ out_x(box1);
339
+ out_x(box2);
340
+ #endif
341
+ }
342
+ return rc;
343
+ }
344
+
345
+ wchar_t ocr_db(struct box *box1) {
346
+ int dd = 1000, dist = 1000;
347
+ wchar_t c = UNKNOWN;
348
+ unsigned char buf[200]; /* Oct08 JS: add unsigned to avoid UTF problems */
349
+ Box *box2, *box3;
350
+
351
+ if (!list_empty(&JOB->tmp.dblist)){
352
+ box3 = (Box *)list_get_header(&JOB->tmp.dblist);
353
+ if(JOB->cfg.verbose)
354
+ fprintf(stderr,"\n#DEBUG: ocr_db (%d,%d) ",box1->x0, box1->y0);
355
+
356
+ for_each_data(&JOB->tmp.dblist) {
357
+ box2 = (Box *)list_get_current(&JOB->tmp.dblist);
358
+ /* do preselect!!! distance() slowly */
359
+ dd = distance2( box2->p, box2, box1->p, box1, JOB->cfg.cs);
360
+ if (dd <= dist) { /* new best fit */
361
+ dist = dd;
362
+ box3 = box2; /* box3 is a pointer and not copied box2 */
363
+
364
+ if (dist<100 && 100-dist >= JOB->cfg.certainty) {
365
+ /* some deviation of the pattern is tolerated */
366
+ int i, wa;
367
+ for (i=0;i<box3->num_ac;i++) {
368
+ wa = (100-dist)*box3->wac[i]/100; /* weight *= (100-dist) */
369
+ if (box3->tas[i]) setas(box1,box3->tas[i],wa);
370
+ else setac(box1,box3->tac[i],wa);
371
+ }
372
+ if (box3->num_ac) c=box3->tac[0]; /* 0 for strings (!UNKNOWN) */
373
+ if (JOB->cfg.verbose)
374
+ fprintf(stderr, " dist=%4d c= %c 0x%02x %s wc= %3d", dist,
375
+ ((box3->c>32 && box3->c<127) ? (char) box3->c : '.'),
376
+ (int)box3->c, ((box3->tas[0])?box3->tas[0]:""), box3->wac[0]);
377
+ }
378
+ if (dd<=0 && ((box3->num_ac && box3->tas[0]) || box3->c >= 128
379
+ || !strchr ("l1|I0O", box3->c)))
380
+ break; /* speedup if found */
381
+ }
382
+ } end_for_each(&JOB->tmp.dblist);
383
+
384
+ }
385
+
386
+ if( (JOB->cfg.mode&128) != 0 && c == UNKNOWN ) { /* prompt the user */
387
+ /* should the output go to stderr or special pipe??? */
388
+ int utf8_ok=0; /* trigger this flag if input is ok */
389
+ int i, endchar; /* index */
390
+ out_env(box1); /* old: out_x(box1); */
391
+ fprintf(stderr,"The above pattern was not recognized.\n"
392
+ "Enter UTF8 char or string for above pattern. Leave empty if unsure.\n"
393
+ "Press RET at the end (ALT+RET to store into RAM only) : "
394
+ ); /* ToDo: empty + alt-return (0x1b 0x0a) for help? ^a for skip all */
395
+ /* UTF-8 (man 7 utf-8):
396
+ * 7bit = 0xxxxxxx (0000-007F)
397
+ * 11bit = 110xxxxx 10xxxxxx (0080-07FF)
398
+ * 16bit = 1110xxxx 10xxxxxx 10xxxxxx (0800-FFFF)
399
+ * 21bit = 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
400
+ * 26bit = 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
401
+ * 31bit = 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
402
+ */
403
+ buf[0]=0;
404
+ /* shift/ctrl/altgr-enter acts like enter or ^j or ^m,
405
+ * alt-enter returns 0x1b 0x0a and returns from fgets()
406
+ * ^d (EOF) returns (nil) from fgets()
407
+ * x+(2*)ctrl-d returns from fgets() without returning a 0x0a
408
+ * if not UTF-input-mode, we are in trouble?
409
+ * ^a=0x01, ^b=0x02, ^e=05, ..., ToDo: meaning of no-input or <=space
410
+ */
411
+ fgets((char *)buf,200,stdin); /* including \n=0x0a */
412
+ dd=strlen((char *)buf);
413
+ /* output hexcode if verbose set */
414
+ if (JOB->cfg.verbose) {
415
+ fprintf(stderr, "\n# fgets [%d]:", dd);
416
+ for(i=0; i<dd; i++)
417
+ fprintf(stderr, " %02x", (unsigned)((unsigned char)buf[i]));
418
+ fprintf(stderr, "\n#");
419
+ }
420
+ /* we dont accept chars which could destroy database file */
421
+ for (i=0; i<dd; i++) if (buf[i]<32) break; /* need unsigned char here */
422
+ endchar=buf[i]; /* last char is 0x0a (ret) 0x00 (EOF) or 0x1b (alt+ret) */
423
+ if (endchar==0x01) { i=0;JOB->cfg.mode&=~128; } /* skip all */
424
+ buf[dd=i]=0; /* replace final 0x0a or other special codes */
425
+ if (dd==1 && !(buf[0]&128)) { c=buf[0]; utf8_ok=1; } /* single char */
426
+ if (dd>1 && dd<7) { /* try to decode single wide char (utf8) */
427
+ int u0, u1; /* define UTF8-start sequences, u0=0bits u1=1bits */
428
+ u0= 1<<(7-dd); /* compute start byte from UTF8-length */
429
+ u1=255&~((1<<(8-dd))-1);
430
+ /* count number of following 10xxxxxx bytes to i */
431
+ for (i=1;i<dd;i++) if ((buf[i]&0xc0)!=0x80) break; /* 10xxxxxx */
432
+ if (i==dd && (buf[0]&(u0|u1))==u1) { utf8_ok=1;
433
+ c=buf[0]&(u0-1); /* 11..0x.. */
434
+ for (i=1;i<dd;i++) { c<<=6; c|=buf[i]&0x3F; } /* 10xxxxxx */
435
+ }
436
+ }
437
+ if (dd>0){ /* ToDo: skip space and tab too? */
438
+ if (utf8_ok==1) { setac(box1, c, 100); } /* store single wchar */
439
+ if (utf8_ok==0) { /* store a string of chars (UTF8-string) */
440
+ c='_'; /* what should we do with c? probably a bad idea? */
441
+ setas(box1, (char *)buf, 100);
442
+ }
443
+ /* decide between
444
+ * 0) just help gocr to find the results and (dont remember, 0x01)
445
+ * 1) help and remember in the same run (store to memory, 0x1b)
446
+ * 2) expand the database (dont store ugly chars to the database!)
447
+ */
448
+ if (endchar!=0x01){ /* ^a before hit return */
449
+ /* is there a reason to dont store to memory? */
450
+ list_app(&JOB->tmp.dblist, box1); /* append to list for 1+2 */
451
+ }
452
+ if (endchar!=0x01 && endchar!=0x1b){
453
+ store_db(box1); /* store to disk for 2 */
454
+ }
455
+ if (JOB->cfg.verbose)
456
+ fprintf(stderr, " got char= %c 16bit= 0x%04x string= \"%s\"\n",
457
+ ((c>32 && c<127)?(char)c:'.'), (int)c, buf);
458
+ }
459
+ }
460
+
461
+ return c;
462
+ }