isbn 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. data/{README → README.md} +5 -11
  2. data/Rakefile +20 -14
  3. data/isbn.gemspec +23 -0
  4. data/lib/isbn.rb +2 -0
  5. data/test/isbn_spec.rb +1 -1
  6. metadata +29 -316
  7. data/VERSION +0 -1
  8. data/src/gocr-0.48/.cvsignore +0 -6
  9. data/src/gocr-0.48/AUTHORS +0 -7
  10. data/src/gocr-0.48/BUGS +0 -55
  11. data/src/gocr-0.48/CREDITS +0 -17
  12. data/src/gocr-0.48/HISTORY +0 -243
  13. data/src/gocr-0.48/INSTALL +0 -83
  14. data/src/gocr-0.48/Makefile +0 -193
  15. data/src/gocr-0.48/Makefile.in +0 -193
  16. data/src/gocr-0.48/README +0 -165
  17. data/src/gocr-0.48/READMEde.txt +0 -80
  18. data/src/gocr-0.48/REMARK.txt +0 -18
  19. data/src/gocr-0.48/REVIEW +0 -538
  20. data/src/gocr-0.48/TODO +0 -65
  21. data/src/gocr-0.48/bin/.cvsignore +0 -2
  22. data/src/gocr-0.48/bin/create_db +0 -38
  23. data/src/gocr-0.48/bin/gocr.tcl +0 -527
  24. data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
  25. data/src/gocr-0.48/configure +0 -4689
  26. data/src/gocr-0.48/configure.in +0 -71
  27. data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
  28. data/src/gocr-0.48/doc/.cvsignore +0 -2
  29. data/src/gocr-0.48/doc/Makefile +0 -39
  30. data/src/gocr-0.48/doc/Makefile.in +0 -39
  31. data/src/gocr-0.48/doc/example.dtd +0 -53
  32. data/src/gocr-0.48/doc/example.xml +0 -21
  33. data/src/gocr-0.48/doc/examples.txt +0 -67
  34. data/src/gocr-0.48/doc/gocr.html +0 -578
  35. data/src/gocr-0.48/doc/unicode.txt +0 -57
  36. data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
  37. data/src/gocr-0.48/examples/4x6.png +0 -0
  38. data/src/gocr-0.48/examples/4x6.txt +0 -2
  39. data/src/gocr-0.48/examples/5x7.png +0 -0
  40. data/src/gocr-0.48/examples/5x7.png.txt +0 -2
  41. data/src/gocr-0.48/examples/5x8.png +0 -0
  42. data/src/gocr-0.48/examples/5x8.png.txt +0 -2
  43. data/src/gocr-0.48/examples/Makefile +0 -166
  44. data/src/gocr-0.48/examples/color.fig +0 -20
  45. data/src/gocr-0.48/examples/ex.fig +0 -16
  46. data/src/gocr-0.48/examples/font.tex +0 -22
  47. data/src/gocr-0.48/examples/font1.tex +0 -46
  48. data/src/gocr-0.48/examples/font2.fig +0 -27
  49. data/src/gocr-0.48/examples/font_nw.tex +0 -24
  50. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  51. data/src/gocr-0.48/examples/handwrt1.txt +0 -10
  52. data/src/gocr-0.48/examples/inverse.fig +0 -20
  53. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
  56. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-a.txt +0 -6
  58. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  59. data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
  60. data/src/gocr-0.48/examples/polish.tex +0 -28
  61. data/src/gocr-0.48/examples/rotate45.fig +0 -14
  62. data/src/gocr-0.48/examples/score +0 -36
  63. data/src/gocr-0.48/examples/text.tex +0 -28
  64. data/src/gocr-0.48/gpl.html +0 -537
  65. data/src/gocr-0.48/include/.cvsignore +0 -2
  66. data/src/gocr-0.48/include/config.h +0 -36
  67. data/src/gocr-0.48/include/config.h.in +0 -36
  68. data/src/gocr-0.48/include/version.h +0 -2
  69. data/src/gocr-0.48/install-sh +0 -3
  70. data/src/gocr-0.48/make.bat +0 -57
  71. data/src/gocr-0.48/man/.cvsignore +0 -2
  72. data/src/gocr-0.48/man/Makefile +0 -29
  73. data/src/gocr-0.48/man/Makefile.in +0 -29
  74. data/src/gocr-0.48/man/man1/gocr.1 +0 -166
  75. data/src/gocr-0.48/src/.cvsignore +0 -4
  76. data/src/gocr-0.48/src/Makefile +0 -132
  77. data/src/gocr-0.48/src/Makefile.in +0 -132
  78. data/src/gocr-0.48/src/amiga.h +0 -31
  79. data/src/gocr-0.48/src/barcode.c +0 -846
  80. data/src/gocr-0.48/src/barcode.c.orig +0 -593
  81. data/src/gocr-0.48/src/barcode.h +0 -11
  82. data/src/gocr-0.48/src/box.c +0 -372
  83. data/src/gocr-0.48/src/database.c +0 -462
  84. data/src/gocr-0.48/src/detect.c +0 -943
  85. data/src/gocr-0.48/src/gocr.c +0 -373
  86. data/src/gocr-0.48/src/gocr.h +0 -288
  87. data/src/gocr-0.48/src/jconv.c +0 -168
  88. data/src/gocr-0.48/src/job.c +0 -84
  89. data/src/gocr-0.48/src/lines.c +0 -350
  90. data/src/gocr-0.48/src/list.c +0 -334
  91. data/src/gocr-0.48/src/list.h +0 -90
  92. data/src/gocr-0.48/src/ocr0.c +0 -6756
  93. data/src/gocr-0.48/src/ocr0.h +0 -63
  94. data/src/gocr-0.48/src/ocr0n.c +0 -1475
  95. data/src/gocr-0.48/src/ocr1.c +0 -85
  96. data/src/gocr-0.48/src/ocr1.h +0 -3
  97. data/src/gocr-0.48/src/otsu.c +0 -289
  98. data/src/gocr-0.48/src/otsu.h +0 -23
  99. data/src/gocr-0.48/src/output.c +0 -289
  100. data/src/gocr-0.48/src/output.h +0 -37
  101. data/src/gocr-0.48/src/pcx.c +0 -153
  102. data/src/gocr-0.48/src/pcx.h +0 -9
  103. data/src/gocr-0.48/src/pgm2asc.c +0 -2893
  104. data/src/gocr-0.48/src/pgm2asc.h +0 -105
  105. data/src/gocr-0.48/src/pixel.c +0 -537
  106. data/src/gocr-0.48/src/pnm.c +0 -533
  107. data/src/gocr-0.48/src/pnm.h +0 -35
  108. data/src/gocr-0.48/src/progress.c +0 -87
  109. data/src/gocr-0.48/src/progress.h +0 -42
  110. data/src/gocr-0.48/src/remove.c +0 -703
  111. data/src/gocr-0.48/src/tga.c +0 -87
  112. data/src/gocr-0.48/src/tga.h +0 -6
  113. data/src/gocr-0.48/src/unicode.c +0 -1314
  114. data/src/gocr-0.48/src/unicode.h +0 -1257
  115. data/src/jpeg-7/Makefile.am +0 -133
  116. data/src/jpeg-7/Makefile.in +0 -1089
  117. data/src/jpeg-7/README +0 -322
  118. data/src/jpeg-7/aclocal.m4 +0 -8990
  119. data/src/jpeg-7/ansi2knr.1 +0 -36
  120. data/src/jpeg-7/ansi2knr.c +0 -739
  121. data/src/jpeg-7/cderror.h +0 -132
  122. data/src/jpeg-7/cdjpeg.c +0 -181
  123. data/src/jpeg-7/cdjpeg.h +0 -187
  124. data/src/jpeg-7/change.log +0 -270
  125. data/src/jpeg-7/cjpeg.1 +0 -325
  126. data/src/jpeg-7/cjpeg.c +0 -616
  127. data/src/jpeg-7/ckconfig.c +0 -402
  128. data/src/jpeg-7/coderules.txt +0 -118
  129. data/src/jpeg-7/config.guess +0 -1561
  130. data/src/jpeg-7/config.sub +0 -1686
  131. data/src/jpeg-7/configure +0 -17139
  132. data/src/jpeg-7/configure.ac +0 -317
  133. data/src/jpeg-7/depcomp +0 -630
  134. data/src/jpeg-7/djpeg.1 +0 -251
  135. data/src/jpeg-7/djpeg.c +0 -617
  136. data/src/jpeg-7/example.c +0 -433
  137. data/src/jpeg-7/filelist.txt +0 -215
  138. data/src/jpeg-7/install-sh +0 -520
  139. data/src/jpeg-7/install.txt +0 -1097
  140. data/src/jpeg-7/jaricom.c +0 -148
  141. data/src/jpeg-7/jcapimin.c +0 -282
  142. data/src/jpeg-7/jcapistd.c +0 -161
  143. data/src/jpeg-7/jcarith.c +0 -921
  144. data/src/jpeg-7/jccoefct.c +0 -453
  145. data/src/jpeg-7/jccolor.c +0 -459
  146. data/src/jpeg-7/jcdctmgr.c +0 -482
  147. data/src/jpeg-7/jchuff.c +0 -1612
  148. data/src/jpeg-7/jcinit.c +0 -65
  149. data/src/jpeg-7/jcmainct.c +0 -293
  150. data/src/jpeg-7/jcmarker.c +0 -667
  151. data/src/jpeg-7/jcmaster.c +0 -770
  152. data/src/jpeg-7/jcomapi.c +0 -106
  153. data/src/jpeg-7/jconfig.bcc +0 -48
  154. data/src/jpeg-7/jconfig.cfg +0 -45
  155. data/src/jpeg-7/jconfig.dj +0 -38
  156. data/src/jpeg-7/jconfig.mac +0 -43
  157. data/src/jpeg-7/jconfig.manx +0 -43
  158. data/src/jpeg-7/jconfig.mc6 +0 -52
  159. data/src/jpeg-7/jconfig.sas +0 -43
  160. data/src/jpeg-7/jconfig.st +0 -42
  161. data/src/jpeg-7/jconfig.txt +0 -155
  162. data/src/jpeg-7/jconfig.vc +0 -45
  163. data/src/jpeg-7/jconfig.vms +0 -37
  164. data/src/jpeg-7/jconfig.wat +0 -38
  165. data/src/jpeg-7/jcparam.c +0 -632
  166. data/src/jpeg-7/jcprepct.c +0 -358
  167. data/src/jpeg-7/jcsample.c +0 -545
  168. data/src/jpeg-7/jctrans.c +0 -381
  169. data/src/jpeg-7/jdapimin.c +0 -396
  170. data/src/jpeg-7/jdapistd.c +0 -275
  171. data/src/jpeg-7/jdarith.c +0 -762
  172. data/src/jpeg-7/jdatadst.c +0 -151
  173. data/src/jpeg-7/jdatasrc.c +0 -212
  174. data/src/jpeg-7/jdcoefct.c +0 -736
  175. data/src/jpeg-7/jdcolor.c +0 -396
  176. data/src/jpeg-7/jdct.h +0 -393
  177. data/src/jpeg-7/jddctmgr.c +0 -382
  178. data/src/jpeg-7/jdhuff.c +0 -1309
  179. data/src/jpeg-7/jdinput.c +0 -384
  180. data/src/jpeg-7/jdmainct.c +0 -512
  181. data/src/jpeg-7/jdmarker.c +0 -1360
  182. data/src/jpeg-7/jdmaster.c +0 -663
  183. data/src/jpeg-7/jdmerge.c +0 -400
  184. data/src/jpeg-7/jdpostct.c +0 -290
  185. data/src/jpeg-7/jdsample.c +0 -361
  186. data/src/jpeg-7/jdtrans.c +0 -136
  187. data/src/jpeg-7/jerror.c +0 -252
  188. data/src/jpeg-7/jerror.h +0 -304
  189. data/src/jpeg-7/jfdctflt.c +0 -174
  190. data/src/jpeg-7/jfdctfst.c +0 -230
  191. data/src/jpeg-7/jfdctint.c +0 -4348
  192. data/src/jpeg-7/jidctflt.c +0 -242
  193. data/src/jpeg-7/jidctfst.c +0 -368
  194. data/src/jpeg-7/jidctint.c +0 -5137
  195. data/src/jpeg-7/jinclude.h +0 -91
  196. data/src/jpeg-7/jmemansi.c +0 -167
  197. data/src/jpeg-7/jmemdos.c +0 -638
  198. data/src/jpeg-7/jmemdosa.asm +0 -379
  199. data/src/jpeg-7/jmemmac.c +0 -289
  200. data/src/jpeg-7/jmemmgr.c +0 -1118
  201. data/src/jpeg-7/jmemname.c +0 -276
  202. data/src/jpeg-7/jmemnobs.c +0 -109
  203. data/src/jpeg-7/jmemsys.h +0 -198
  204. data/src/jpeg-7/jmorecfg.h +0 -369
  205. data/src/jpeg-7/jpegint.h +0 -395
  206. data/src/jpeg-7/jpeglib.h +0 -1135
  207. data/src/jpeg-7/jpegtran.1 +0 -272
  208. data/src/jpeg-7/jpegtran.c +0 -546
  209. data/src/jpeg-7/jquant1.c +0 -856
  210. data/src/jpeg-7/jquant2.c +0 -1310
  211. data/src/jpeg-7/jutils.c +0 -179
  212. data/src/jpeg-7/jversion.h +0 -14
  213. data/src/jpeg-7/libjpeg.map +0 -4
  214. data/src/jpeg-7/libjpeg.txt +0 -3067
  215. data/src/jpeg-7/ltmain.sh +0 -8406
  216. data/src/jpeg-7/makcjpeg.st +0 -36
  217. data/src/jpeg-7/makdjpeg.st +0 -36
  218. data/src/jpeg-7/makeadsw.vc6 +0 -77
  219. data/src/jpeg-7/makeasln.vc9 +0 -33
  220. data/src/jpeg-7/makecdep.vc6 +0 -82
  221. data/src/jpeg-7/makecdsp.vc6 +0 -130
  222. data/src/jpeg-7/makecmak.vc6 +0 -159
  223. data/src/jpeg-7/makecvcp.vc9 +0 -186
  224. data/src/jpeg-7/makeddep.vc6 +0 -82
  225. data/src/jpeg-7/makeddsp.vc6 +0 -130
  226. data/src/jpeg-7/makedmak.vc6 +0 -159
  227. data/src/jpeg-7/makedvcp.vc9 +0 -186
  228. data/src/jpeg-7/makefile.ansi +0 -220
  229. data/src/jpeg-7/makefile.bcc +0 -291
  230. data/src/jpeg-7/makefile.dj +0 -226
  231. data/src/jpeg-7/makefile.manx +0 -220
  232. data/src/jpeg-7/makefile.mc6 +0 -255
  233. data/src/jpeg-7/makefile.mms +0 -224
  234. data/src/jpeg-7/makefile.sas +0 -258
  235. data/src/jpeg-7/makefile.unix +0 -234
  236. data/src/jpeg-7/makefile.vc +0 -217
  237. data/src/jpeg-7/makefile.vms +0 -142
  238. data/src/jpeg-7/makefile.wat +0 -239
  239. data/src/jpeg-7/makejdep.vc6 +0 -423
  240. data/src/jpeg-7/makejdsp.vc6 +0 -285
  241. data/src/jpeg-7/makejdsw.vc6 +0 -29
  242. data/src/jpeg-7/makejmak.vc6 +0 -425
  243. data/src/jpeg-7/makejsln.vc9 +0 -17
  244. data/src/jpeg-7/makejvcp.vc9 +0 -328
  245. data/src/jpeg-7/makeproj.mac +0 -213
  246. data/src/jpeg-7/makerdep.vc6 +0 -6
  247. data/src/jpeg-7/makerdsp.vc6 +0 -78
  248. data/src/jpeg-7/makermak.vc6 +0 -110
  249. data/src/jpeg-7/makervcp.vc9 +0 -133
  250. data/src/jpeg-7/maketdep.vc6 +0 -43
  251. data/src/jpeg-7/maketdsp.vc6 +0 -122
  252. data/src/jpeg-7/maketmak.vc6 +0 -131
  253. data/src/jpeg-7/maketvcp.vc9 +0 -178
  254. data/src/jpeg-7/makewdep.vc6 +0 -6
  255. data/src/jpeg-7/makewdsp.vc6 +0 -78
  256. data/src/jpeg-7/makewmak.vc6 +0 -110
  257. data/src/jpeg-7/makewvcp.vc9 +0 -133
  258. data/src/jpeg-7/makljpeg.st +0 -68
  259. data/src/jpeg-7/maktjpeg.st +0 -30
  260. data/src/jpeg-7/makvms.opt +0 -4
  261. data/src/jpeg-7/missing +0 -376
  262. data/src/jpeg-7/rdbmp.c +0 -439
  263. data/src/jpeg-7/rdcolmap.c +0 -253
  264. data/src/jpeg-7/rdgif.c +0 -38
  265. data/src/jpeg-7/rdjpgcom.1 +0 -63
  266. data/src/jpeg-7/rdjpgcom.c +0 -515
  267. data/src/jpeg-7/rdppm.c +0 -459
  268. data/src/jpeg-7/rdrle.c +0 -387
  269. data/src/jpeg-7/rdswitch.c +0 -365
  270. data/src/jpeg-7/rdtarga.c +0 -500
  271. data/src/jpeg-7/structure.txt +0 -945
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +0 -4
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +0 -1533
  279. data/src/jpeg-7/transupp.h +0 -205
  280. data/src/jpeg-7/usage.txt +0 -605
  281. data/src/jpeg-7/wizard.txt +0 -211
  282. data/src/jpeg-7/wrbmp.c +0 -442
  283. data/src/jpeg-7/wrgif.c +0 -399
  284. data/src/jpeg-7/wrjpgcom.1 +0 -103
  285. data/src/jpeg-7/wrjpgcom.c +0 -583
  286. data/src/jpeg-7/wrppm.c +0 -269
  287. data/src/jpeg-7/wrrle.c +0 -305
  288. data/src/jpeg-7/wrtarga.c +0 -253
@@ -1,943 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2007 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- check README for my email address
20
- */
21
-
22
- #include <stdlib.h>
23
- #include <stdio.h>
24
- #include <string.h>
25
- #include <ctype.h> // toupper, tolower
26
- #include "pgm2asc.h"
27
- #include "gocr.h"
28
-
29
- // ----- detect lines ---------------
30
- /* suggestion: Fourier transform and set line frequency where the
31
- amplitude has a maximum (JS: slow and not smarty enough).
32
-
33
- option: range for line numbers 1..1000 or similar
34
- todo: look for thickest line, and divide if thickness=2*mean_thickness
35
- Set these elements of the box structs:
36
-
37
- m1 <-- top of upper case letters and (bdfhkl) (can differ)
38
- m2 <-- top of letters (acegmnopqrsuvwxyz)
39
- m3 <-- baseline
40
- m4 <-- bottom of hanging letters (gqpy)
41
-
42
- performance can be improved by working with a temporary
43
- list of boxes of the special text line
44
-
45
- - Jun23,00 more robustness of m3 (test liebfrau1)
46
- - Feb01,02 more robustness of m4 (test s46_084.pgm)
47
- - Dec03,12 fix problems with footnotes
48
- ToDo:
49
- - generate lists of boxes per line (faster access)
50
- - use statistics
51
- - for each box look at it neighbours and set box-m1..m4
52
- - m[1..4].max .min if m4.min-m3.max<1 probability lower
53
- */
54
- int detect_lines1(pix * p, int x0, int y0, int dx, int dy)
55
- {
56
- int i, jj, j2, y, yy, my, mi, mc, i1, i2, i3, i4,
57
- m1, m2, m3, m4, ma1, ma2, ma3, ma4, m3pre, m4pre;
58
- struct box *box2, *box3; /* box3 is for verbose / debugging */
59
- struct tlines *lines = &JOB->res.lines;
60
-
61
- /* ToDo: optional read line-data from external source??? */
62
- if (lines->num == 0) { // initialize one dummy-line for pictures etc.
63
- lines->m4[0] = 0;
64
- lines->m3[0] = 0;
65
- lines->m2[0] = 0;
66
- lines->m1[0] = 0;
67
- lines->x0[0] = p->x; /* expand to left end during detection */
68
- lines->x1[0] = 0; /* expand to right end */
69
- lines->pitch[0] = JOB->cfg.spc; /* default word pitch */
70
- lines->mono[0] = 0; /* default spacing, 0 = prop */
71
- lines->num++;
72
- }
73
- i = lines->num;
74
- if (dy < 4)
75
- return 0; /* image is to low for latin chars */
76
- my = jj = 0;
77
- // get the mean height of all hollow chars
78
- // (better than mean value of everything including bg-pattern or dust?)
79
- for_each_data(&(JOB->res.boxlist)) {
80
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
81
- if ( box2->c != PICTURE
82
- && box2->num_frames>1 && box2->num_frames<3 /* 1 or 2 holes */
83
- && box2->y0 >= y0 && box2->y1 <= y0 + dy
84
- && box2->x0 >= x0 && box2->x1 <= x0 + dx
85
- && box2->frame_vol[0]>0
86
- && box2->frame_vol[1]<0
87
- ) {
88
- jj++;
89
- my += box2->y1 - box2->y0 + 1;
90
- }
91
- } end_for_each(&(JOB->res.boxlist));
92
- if (jj==0) {
93
- // get the mean height of all chars
94
- for_each_data(&(JOB->res.boxlist)) {
95
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
96
- if ( box2->c != PICTURE
97
- && box2->y1 - box2->y0 + 1 >= 4 /* 4x6 font */
98
- && box2->y0 >= y0 && box2->y1 <= y0 + dy
99
- && box2->x0 >= x0 && box2->x1 <= x0 + dx ) {
100
- jj++;
101
- my += box2->y1 - box2->y0 + 1;
102
- }
103
- } end_for_each(&(JOB->res.boxlist));
104
- }
105
- if (jj == 0)
106
- return 0; /* no chars detected */
107
-
108
-
109
- /* ToDo: a better way could be to mark good boxes (of typical high a-zA-Z0-9)
110
- * first and handle only marked boxes for line scan, exclude ?!,.:;etc
111
- * but without setect the chars itself (using good statistics)
112
- * see adjust_text_lines()
113
- */
114
- my /= jj; /* we only care about chars with high arround my */
115
- if (JOB->cfg.verbose & 16)
116
- fprintf(stderr,"\n# detect_lines1(%d %d %d %d) vvv&16 chars=%d my=%d\n# ",
117
- x0, y0, dx, dy, jj, my);
118
- // "my" is the average over the whole image (bad, if different fontsizes)
119
-
120
- if (my < 4)
121
- return 0; /* mean high is to small => error */
122
-
123
- m4pre=m3pre=y0; /* lower bond of upper line */
124
- // better function for scanning line around a letter ???
125
- // or define lines around known chars "eaTmM"
126
- for (j2 = y = y0; y < y0 + dy; y++) {
127
- // look for max. of upper and lower bound of next line
128
- m1 = y0 + dy;
129
- jj = 0;
130
- #if 1
131
- /* this is only for test runs */
132
- if (JOB->cfg.verbose & 16)
133
- fprintf(stderr,"searching new line %d\n# ",i /* lines->num */);
134
- #endif
135
-
136
- box3 = NULL; /* mark the most upper box starting next line */
137
- // find highest point of next line => store to m1-min (m1>=y)
138
- // only objects greater 2/3*my and smaller 3*my are allowed
139
- // a higher "!" at end of line can result in a to low m1
140
- for_each_data(&(JOB->res.boxlist)) {
141
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
142
- if (box2->line>0 || box2->c == PICTURE) continue;
143
- if (lines->dx)
144
- yy = lines->dy * box2->x0 / (lines->dx); /* correct crooked lines */
145
- else yy=0;
146
- if ( box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
147
- && box2->x0 >= x0 && box2->x1 < x0 + dx // within box ?
148
- && box2->c != PICTURE // no picture
149
- && box2->num_boxes <= 1 // ignore 2 for "!?i" 3 for "&auml;"
150
- && 3 * (box2->y1 - box2->y0) > 2 * my // not to small
151
- && (box2->y1 - box2->y0) < 3 * my // not to big
152
- && (box2->y1 - box2->y0) > 4) // minimum absolute size
153
- {
154
- if (box2->y0 < m1 + yy) {
155
- m1 = box2->y0 - yy; /* highest upper boundary */
156
- box3 = box2;
157
- }
158
- // fprintf(stderr,"\n %3d %3d %+3d %d m1= %3d",
159
- // box2->x0, box2->y0, box2->y1 - box2->y0 + 1, box2->num_boxes, m1);
160
- }
161
- } end_for_each(&(JOB->res.boxlist));
162
- if (!box3 || m1 >= y0+dy) break; /* no further line found */
163
- if (JOB->cfg.verbose & 16)
164
- fprintf(stderr," most upper box at new line xy= %4d %4d %+4d %+4d\n# ",
165
- box3->x0, box3->y0, box3->x1-box3->x0, box3->y1-box3->y0);
166
-
167
- // at the moment values depend from single chars, which can
168
- // result in bad values (ex: 4x6 /\=)
169
- // ToDo: 2) mean size of next line (store list of y0,y1)
170
- // ToDo: 3) count num0[(y0-m1)*16/my], num1[(y1-m1)*16/my]
171
- // ToDo: or down-top search horizontal nerarest neighbours
172
- lines->x0[i] = x0 + dx - 1; /* expand during operation to left end */
173
- lines->x1[i] = x0; /* expand to the right end of line */
174
- m4=m2=m1; mi=m1+my; m3=m1+2*my; jj=0;
175
- // find limits for upper bound, base line and ground line
176
- // m2-max m3-min m4-max
177
- for_each_data(&(JOB->res.boxlist)) {
178
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
179
- if (box2->line>0 || box2->c == PICTURE) continue;
180
- if ( box2->y0 < y0 || box2->y1 >= y0 + dy
181
- || box2->x0 < x0 || box2->x1 >= x0 + dx ) continue; // out of image
182
- if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx);
183
- else yy=0;
184
- /* check for ij-dots, used if chars of same high */
185
- if ( box2->y0 >= y + yy
186
- && box2->y0 >= y
187
- && (box2->y1 - box2->y0) < my
188
- && box2->y1 < m1 + yy + my/4
189
- && box2->y0 < mi + yy ) {
190
- mi = box2->y0 - yy; /* highest upper boundary i-dot */
191
- }
192
- // fprintf(stderr,"\n check %3d %3d-%3d y=%d yy=%d m1=%d", box2->x0, box2->y0, box2->y1, y, yy, m1);
193
- /* get m2-max m3-min m4-max */
194
- if ( box2->y0 >= y + yy // lower than y
195
- && 3 * (box2->y1 - box2->y0 + 1) > 2 * my // right size ?
196
- && (box2->y1 - box2->y0 + 1) < 3 * my // font mix, size = 2.6*my
197
- && (box2->y1 - box2->y0 + 1) > 3 // 4x6 lowercase=4
198
- && box2->y0 >= m1 // in m1 range?
199
- && box2->y0 <= m1 + yy + 9 * my / 8 // my can be to small if mixed
200
- // ToDo: we need a better (local?) algorithm for big headlines > 2*my
201
- && box2->y1 <= m1 + yy + 3 * my
202
- && box2->y1 >= m1 + yy + my / 2
203
- // lines can differ in high, my may be to small (smaller headlines)
204
- && box2->y0+box2->y1 <= 2*box3->y1
205
- )
206
- {
207
- jj++; // count chars for debugging purpose
208
- if (box2->y0 > m2 + yy) {
209
- m2 = box2->y0 - yy; /* highest upper boundary */
210
- if (JOB->cfg.verbose & 16)
211
- fprintf(stderr," set m2= %d yy= %d\n# ",m2, yy);
212
- }
213
- if (box2->y1 > m4 + yy && (my>6 || box2->y1 < m3+my)) {
214
- m4 = box2->y1 - yy; /* lowest lower boundary, small font lines can touch */
215
- }
216
- if ( box2->y1 < m3 + yy
217
- && ( ( 2*box2->y1 > m2+ m4+yy && m2>m1)
218
- || ( 4*box2->y1 > m1+3*m4+yy) ) ) // care for TeX: \(^1\)Footnote 2003
219
- /* "'!?" could cause trouble here, therefore this lines */
220
- /* ToDo: get_bw costs time, check pre and next */
221
- if( get_bw(box2->x0,box2->x1,box2->y1+1 ,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 0
222
- || get_bw(box2->x0,box2->x1,box2->y1+my/2,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 1
223
- || num_cross(box2->x0,box2->x1,(box2->y0+box2->y1)/2,(box2->y0+box2->y1)/2,box2->p,JOB->cfg.cs)>2 )
224
- {
225
- m3 = box2->y1 - yy; /* highest lower boundary */
226
- // printf("\n# set1 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
227
- // out_x(box2);
228
- }
229
- if (box2->y0 + box2->y1 > 2*(m3 + yy)
230
- && box2->y1 < m4 + yy - my/4 -1
231
- && box2->y1 >= (m2 + m4)/2 // care for TeX: \(^1\)Footnote 2003
232
- && m2 > m1 ) // be sure to not use ', m2 must be ok
233
- {
234
- m3 = box2->y1 - yy; /* highest lower boundary */
235
- // printf("\n# set2 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
236
- // out_x(box2);
237
- }
238
- if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
239
- if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
240
- // printf(" m=%3d %+2d %+2d %+2d yy=%3d\n",m1,m2-m1,m3-m1,m4-m1,yy);
241
- }
242
- } end_for_each(&(JOB->res.boxlist));
243
-
244
- #if 1
245
- /* this is only for test runs */
246
- if (JOB->cfg.verbose & 16)
247
- fprintf(stderr," step 1 y=%4d m= %4d %+3d %+3d %+3d"
248
- " my=%2d chars=%3d\n# ",
249
- y, m1, m2-m1, m3-m1, m4-m1, my, jj);
250
- #endif
251
-
252
- if (m3 == m1)
253
- break;
254
- #if 1 /* make averages about the line */
255
- // same again better estimation
256
- mc = (3 * m3 + m1) / 4; /* lower center ? */
257
- ma1 = ma2 = ma3 = ma4 = i1 = i2 = i3 = i4 = jj = 0;
258
- for_each_data(&(JOB->res.boxlist)) {
259
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
260
- if (box2->line>0 || box2->c == PICTURE) continue;
261
- if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
262
- if (box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
263
- && box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
264
- && box2->c != PICTURE // no picture
265
- && 2 * (box2->y1 - box2->y0) > my // right size ?
266
- && (box2->y1 - box2->y0) < 4 * my) {
267
- if ( box2->y0 - yy >= m1-my/4
268
- && box2->y0 - yy <= m2+my/4
269
- && box2->y1 - yy >= m3-my/4
270
- && box2->y1 - yy <= m4+my/4 ) { /* its within allowed range! */
271
- // jj++; // not used
272
- if (abs(box2->y0 - yy - m1) <= abs(box2->y0 - yy - m2))
273
- { i1++; ma1 += box2->y0 - yy; }
274
- else { i2++; ma2 += box2->y0 - yy; }
275
- if (abs(box2->y1 - yy - m3) < abs(box2->y1 - yy - m4))
276
- { i3++; ma3 += box2->y1 - yy; }
277
- else { i4++; ma4 += box2->y1 - yy; }
278
- if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
279
- if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
280
- }
281
- }
282
- } end_for_each(&(JOB->res.boxlist));
283
-
284
- if (i1) m1 = (ma1+i1/2) / i1; /* best rounded */
285
- if (i2) m2 = (ma2+i2/2) / i2;
286
- if (i3) m3 = (ma3+i3-1) / i3; /* round up */
287
- if (i4) m4 = (ma4+i4-1) / i4;
288
- // printf("\n# .. set3 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
289
-
290
- #endif
291
-
292
- /* expand right and left end of line */
293
- for_each_data(&(JOB->res.boxlist)) {
294
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
295
- if (box2->line>0 || box2->c == PICTURE) continue;
296
- if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
297
- if ( box2->y0 >= y0 && box2->y1 < y0 + dy
298
- && box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
299
- && box2->c != PICTURE // no picture
300
- && box2->y0 >= m1-1
301
- && box2->y0 <= m4
302
- && box2->y1 >= m1
303
- && box2->y1 <= m4+1 ) { /* its within line */
304
- if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
305
- if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
306
- }
307
- } end_for_each(&(JOB->res.boxlist));
308
-
309
- #if 1
310
- /* this is only for test runs */
311
- if (JOB->cfg.verbose & 16)
312
- fprintf(stderr," step 2 y=%4d m= %4d %+3d %+3d %+3d\n# ",
313
- y,m1,m2-m1,m3-m1,m4-m1);
314
- #endif
315
-
316
- if (m4 == m1) {
317
- if(m3+m4>2*y) y = (m4+m3)/2; /* lower end may overlap the next line */
318
- continue;
319
- }
320
- jj=0;
321
- lines->wt[i] = 100;
322
- if (5 * (m2 - m1 +1) < m3 - m2 || (m2 - m1) < 2) jj|=1; /* same high */
323
- if (5 * (m4 - m3 +1) < m3 - m2 || (m4 - m3) < 1) jj|=2; /* same base */
324
- if (jj&1) lines->wt[i] = 75*lines->wt[i]/100;
325
- if (jj&2) lines->wt[i] = 75*lines->wt[i]/100;
326
- if (jj>0 && JOB->cfg.verbose) {
327
- fprintf(stderr," trouble on line %d, wt*100= %d\n",i,lines->wt[i]);
328
- fprintf(stderr,"# m= %4d %+3d %+3d %+3d\n",m1,m2-m1,m3-m1,m4-m1);
329
- fprintf(stderr,"# i= %3d %3d %3d %3d (counts)\n",i1,i2,i3,i4);
330
- if (jj==3) fprintf(stderr,"# all boxes of same high!\n# ");
331
- if (jj==1) fprintf(stderr,"# all boxes of same upper bound!\n# ");
332
- if (jj==2) fprintf(stderr,"# all boxes of same lower bound!\n# ");
333
- }
334
- /* ToDo: check for dots ij,. to get the missing information */
335
- #if 1
336
- /* jj=3: ABCDEF123456 or mnmno or gqpy or lkhfdtb => we are in trouble */
337
- if (jj==3 && (m4-m1)>my) { jj=0; m2=m1+my/8+1; m4=m3+my/8+1; } /* ABC123 */
338
- /* using idots, may fail on "ABCDEFG&Auml;&Uuml;&Ouml;" */
339
- if (jj==3 && mi>0 && mi<m1 && mi>m4pre) { jj=2; m1=mi; } /* use ij dots */
340
- if (jj==1 && m2-(m3-m2)/4>m3pre ) { /* expect: acegmnopqrsuvwxyz */
341
- if (m1-m4pre<m4-m1) /* fails for 0123ABCD+Q$ */
342
- m1 = ( m2 + m4pre ) / 2 ;
343
- else
344
- m1 = ( m2 - (m3 - m2) / 4 );
345
- }
346
- if (jj==3)
347
- m2 = m1 + (m3 - m1) / 4 + 1; /* expect: 0123456789ABCDEF */
348
- if ( (m2 - m1) < 2)
349
- m2 = m1 + 2; /* font hight < 8 pixel ? */
350
- if (jj&2)
351
- m4 = m3 + (m4 - m1) / 4 + 1; /* chars have same lower base */
352
- if (jj>0 && JOB->cfg.verbose & 16) {
353
- fprintf(stderr," m= %4d %+2d %+2d %+2d my= %4d\n# ",
354
- m1, m2-m1, m3-m1, m4-m1, my);
355
- }
356
- #endif
357
-
358
-
359
- { // empty space between lines
360
- lines->m4[i] = m4;
361
- lines->m3[i] = m3;
362
- lines->m2[i] = m2;
363
- lines->m1[i] = m1;
364
- lines->pitch[i] = JOB->cfg.spc; /* default word pitch */
365
- lines->mono[i] = 0; /* default spacing, 0=prop, 1=mono */
366
- if (JOB->cfg.verbose & 16)
367
- fprintf(stderr, " m= %4d %+3d %+3d %+3d w= %d (line=%d)\n# ",
368
- m1, m2 - m1, m3 - m1, m4 - m1, lines->wt[i], i);
369
- if (i < MAXlines && m4 - m1 > 4)
370
- i++;
371
- if (i >= MAXlines) {
372
- fprintf(stderr, "Warning: lines>MAXlines\n");
373
- break;
374
- }
375
- }
376
- if (m3+m4>2*y) y = (m3+m4)/2; /* lower end may overlap the next line */
377
- if (m3>m3pre) m3pre = m3; else m3=y0; /* set for next-line scan */
378
- if (m4>m4pre) m4pre = m4; else m4=y0; /* set for next-line scan */
379
- }
380
- lines->num = i;
381
- if (JOB->cfg.verbose)
382
- fprintf(stderr, " num_lines= %d", lines->num-1);
383
- return 0;
384
- }
385
-
386
- // ----- layout analyzis of dx*dy region at x0,y0 -----
387
- // ----- detect lines via recursive division (new version) ---------------
388
- // what about text in frames???
389
- // ToDo: change to bottom-top analyse or/and take rotation into account
390
- int detect_lines2(pix *p,int x0,int y0,int dx,int dy,int r){
391
- int i,x2,y2,x3,y3,x4,y4,x5,y5,y6,mx,my,x30,x31,y30,y31;
392
- struct box *box2,*box3;
393
- // shrink box
394
- if(dx<=0 || dy<=0) return 0;
395
- if(y0+dy< p->y/128 && y0==0) return 0; /* looks like dust */
396
- if(y0>p->y-p->y/128 && y0+dy==p->y) return 0; /* looks like dust */
397
-
398
- if(r>1000){ return -1;} // something is wrong
399
- if(JOB->cfg.verbose)fprintf(stderr,"\n# r=%2d ",r);
400
-
401
- mx=my=i=0; // mean thickness
402
- // remove border, shrink size
403
- x2=x0+dx-1; // min x
404
- y2=y0+dy-1; // min y
405
- x3=x0; // max x
406
- y3=y0; // max y
407
- for_each_data(&(JOB->res.boxlist)) {
408
- box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
409
- if(box3->y0>=y0 && box3->y1<y0+dy &&
410
- box3->x0>=x0 && box3->x1<x0+dx)
411
- {
412
- if( box3->x1 > x3 ) x3=box3->x1; // max x
413
- if( box3->x0 < x2 ) x2=box3->x0; // min x
414
- if( box3->y1 > y3 ) y3=box3->y1; // max y
415
- if( box3->y0 < y2 ) y2=box3->y0; // min y
416
- if(box3->c!=PICTURE)
417
- if( box3->y1 - box3->y0 > 4 )
418
- {
419
- i++;
420
- mx+=box3->x1-box3->x0+1; // mean x
421
- my+=box3->y1-box3->y0+1; // mean y
422
- }
423
- }
424
- } end_for_each(&(JOB->res.boxlist));
425
- x0=x2; dx=x3-x2+1;
426
- y0=y2; dy=y3-y2+1;
427
-
428
- if(i==0 || dx<=0 || dy<=0) return 0;
429
- mx/=i;my/=i;
430
- // better look for widest h/v-gap, ToDo: vertical lines?
431
- if(r<8){ // max. depth
432
-
433
- // detect widest horizontal gap
434
- y2=y3=y4=y5=y6=0;
435
- x2=x3=x4=x5=y5=0;// min. 3 lines
436
- // position and thickness of gap, y6=num_gaps, nbox^2 ops
437
- for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
438
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
439
- if( box2->c!=PICTURE ) /* ToDo: not sure, that this is a good idea */
440
- if( box2->y0>=y0 && box2->y1<y0+dy
441
- && box2->x0>=x0 && box2->x1<x0+dx
442
- && box2->y1-box2->y0>my/2 ){ // no pictures & dust???
443
-
444
- y4=y0+dy-1; // nearest vert. box
445
- x4=x0+dx-1;
446
- // ToDo: rotate back box2->x1,y1 to x21,y21
447
- // look for nearest lowest (y4) and right (x4) neighbour
448
- // of every box (box2)
449
- for_each_data(&(JOB->res.boxlist)) {
450
- box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
451
- if(box3!=box2)
452
- if(box3->y0>=y0 && box3->y1<y0+dy)
453
- if(box3->x0>=x0 && box3->x1<x0+dx)
454
- if(box3->c!=PICTURE) /* ToDo: not sure, that this is a good idea */
455
- if(box3->y1-box3->y0>my/2 ){
456
- // ToDo: here we need the rotation around box2
457
- x30=box3->x0;
458
- x31=box3->x1;
459
- y30=box3->y0;
460
- y31=box3->y1;
461
- // get min. distances to lower and to right direction
462
- if( y31 > box2->y1 && y30 < y4 ) y4=y30-1;
463
- if( x31 > box2->x1 && x30 < x4 ) x4=x30-1;
464
- }
465
- } end_for_each(&(JOB->res.boxlist));
466
- // set the witdht and position of largest hor./vert. gap
467
- // largest gap: width position
468
- if( y4-box2->y1 > y3 ) { y3=y4-box2->y1; y2=(y4+box2->y1)/2; }
469
- if( x4-box2->x1 > x3 ) { x3=x4-box2->x1; x2=(x4+box2->x1)/2; }
470
- }
471
- } end_for_each(&(JOB->res.boxlist));
472
- // fprintf(stderr,"\n widest y-gap= %4d %4d",y2,y3);
473
- // fprintf(stderr,"\n widest x-gap= %4d %4d",x2,x3);
474
-
475
- i=0; // i=1 at x, i=2 at y
476
- // this is the critical point
477
- // is this a good decision or not???
478
- if(x3>0 || y3>0){
479
- if(x3>mx && x3>2*y3 && (dy>5*x3 || (x3>10*y3 && y3>0))) i=1; else
480
- if(dx>5*y3 && y3>my) i=2;
481
- }
482
-
483
- // compare with largest box???
484
- for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
485
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
486
- if( box2->c == PICTURE )
487
- if( box2->y0>=y0 && box2->y1<y0+dy
488
- && box2->x0>=x0 && box2->x1<x0+dx )
489
- { // hline ???
490
- // largest gap: width position
491
- if( box2->x1-box2->x0+4 > dx && box2->y1+4<y0+dy ) { y3=1; y2=box2->y1+1; i=2; break; }
492
- if( box2->x1-box2->x0+4 > dx && box2->y0-4>y0 ) { y3=1; y2=box2->y0-1; i=2; break; }
493
- if( box2->y1-box2->y0+4 > dy && box2->x1+4<x0+dx ) { x3=1; x2=box2->x1+1; i=1; break; }
494
- if( box2->y1-box2->y0+4 > dy && box2->x0-4>x0 ) { x3=1; x2=box2->x0-1; i=1; break; }
495
- }
496
- } end_for_each(&(JOB->res.boxlist));
497
- if(JOB->cfg.verbose)fprintf(stderr," i=%d",i);
498
-
499
- if(JOB->cfg.verbose && i) fprintf(stderr," divide at %s x=%4d y=%4d dx=%4d dy=%4d",
500
- ((i)?( (i==1)?"x":"y" ):"?"),x2,y2,x3,y3);
501
- // divide horizontally if v-gap is thicker than h-gap
502
- // and length is larger 5*width
503
- if(i==1){ detect_lines2(p,x0,y0,x2-x0+1,dy,r+1);
504
- return detect_lines2(p,x2,y0,x0+dx-x2+1,dy,r+1); }
505
- // divide vertically
506
- if(i==2){ detect_lines2(p,x0,y0,dx,y2-y0+1,r+1);
507
- return detect_lines2(p,x0,y2,dx,y0+dy-y2+1,r+1);
508
- }
509
- }
510
-
511
-
512
- if(JOB->cfg.verbose) if(dx<5 || dy<7)fprintf(stderr," empty box");
513
- if(dx<5 || dy<7) return 0; // do not care about dust
514
- if(JOB->cfg.verbose)fprintf(stderr, " box detected at %4d %4d %4d %4d",x0,y0,dx,dy);
515
- if(JOB->tmp.ppo.p){
516
- for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0 ,255,16);
517
- for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0+dy-1,255,16);
518
- for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0 ,y0+i ,255,16);
519
- for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0+dx-1,y0+i ,255,16);
520
- // writebmp("out10.bmp",p2,JOB->cfg.verbose); // colored should be better
521
- }
522
- return detect_lines1(p,x0-0*1,y0-0*2,dx+0*2,dy+0*3);
523
-
524
- /*
525
- struct tlines *lines = &JOB->res.lines;
526
- i=lines->num; lines->num++;
527
- lines->m1[i]=y0; lines->m2[i]=y0+5*dy/16;
528
- lines->m3[i]=y0+12*dy/16; lines->m4[i]=y0+dy-1;
529
- lines->x0[i]=x0; lines->x1[i]=x0+dx-1;
530
- if(JOB->cfg.verbose)fprintf(stderr," - line= %d",lines->num);
531
- return 0;
532
- */
533
- }
534
-
535
- /* ToDo: herons algorithm for square root x=(x+y/x)/2 is more efficient
536
- * than interval subdivision (?) (germ.: Intervallschachtelung)
537
- * without using matlib
538
- * see http://www.math.vt.edu/people/brown/doc/sqrts.pdf
539
- */
540
- int my_sqrt(int x){
541
- int y0=0,y1=x,ym;
542
- for (;y0<y1-1;){
543
- ym=(y0+y1)/2;
544
- if (ym*ym<x) y0=ym; else y1=ym;
545
- }
546
- return y0;
547
- }
548
-
549
- /*
550
- ** Detect rotation angle (one for whole image)
551
- ** old: longest text-line and determining the angle of this line.
552
- *
553
- * search right nearest neighbour of each box and average vectors
554
- * to get the text orientation,
555
- * upside down decision is not made here (I dont know how to do it)
556
- * ToDo: set job->res.lines.{dx,dy}
557
- * pass 1: get mean vector to nearest char
558
- * pass 2: get mean vector to nearest char without outriders to pass 1
559
- * extimate direction as (dx,dy,num)[pass]
560
- * ToDo: estimate an error, boxes only work fine for zero-rotation
561
- * for 45 degree use vectors, not boxes to get base line
562
- */
563
- #define INorm 1024 /* integer unit 1.0 */
564
- int detect_rotation_angle(job_t *job){
565
- struct box *box2, *box3,
566
- *box_nn; /* nearest neighbour box */
567
- int x2, y2, x3, y3, dist, mindist, pass,
568
- rx=0, ry=0, re=0, // final result
569
- /* to avoid 2nd run, wie store pairs in 2 different categories */
570
- nn[4]={0,0,0,0}, /* num_pairs used for estimation [(pass-1)%2,pass%2] */
571
- dx[4]={0,0,0,0}, /* x-component of rotation vector per pass */
572
- dy[4]={0,0,0,0}, /* y-component of rotation vector per pass */
573
- er[4]={INorm/4,0,0,0}; /* mean angle deviation to pass-1 (radius^2) */
574
- // de; /* ToDo: absolute maximum error (dx^2+dy^2) */
575
- // ToDo: next pass: go to bigger distances and reduce max error
576
- // error is diff between passes? or diff of bottoms and top borders (?)
577
-
578
- rx=1024; ry=0; // default
579
- for (pass=0;pass<4;pass++) {
580
- for_each_data(&(job->res.boxlist)) {
581
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
582
- if (box2->c==PICTURE) continue;
583
- /* subfunction probability of char */
584
- // i?
585
- // if (box2->x1 - box2->x0 < 3) continue; /* smallest font is 4x6 */
586
- if (box2->y1 - box2->y0 < 4) continue;
587
- /* set maximum possible distance */
588
- box_nn=box2; // initial box to compare with
589
-
590
- // ToDo: clustering or majority
591
- // the algorithm is far from being perfect, pitfalls are likely
592
- // but its better than the old algorithm, ToDo: database-rotated-images
593
- mindist = job->src.p.x * job->src.p.x + job->src.p.y * job->src.p.y;
594
- /* get middle point of the box */
595
- x2 = (box2->x0 + box2->x1)/2;
596
- y2 = (box2->y0 + box2->y1)/2;
597
- re=0;
598
- /* search for nearest neighbour box_nn[pass+1] of box_nn[pass] */
599
- for_each_data(&(job->res.boxlist)) {
600
- box3 = (struct box *)list_get_current(&(job->res.boxlist));
601
- /* try to select only potential neighbouring chars */
602
- /* select out all senseless combinations */
603
- if (box3->c==PICTURE || box3==box2) continue;
604
- x3 = (box3->x0 + box3->x1)/2;
605
- y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
606
- if (x3<x2) continue; /* simplify by going right only */
607
- // through-away deviation of angles if > pass-1?
608
- // scalprod max in direction, cross prod min in direction
609
- // a,b (vectors): <a,b>^2/(|a|*|b|)^2 = 0(90deg)..0.5(45deg).. 1(0deg)
610
- // * 1024 ??
611
- if (pass>0) { // new variant = scalar product
612
- // danger of int overflow, ToDo: use int fraction
613
- re =(int) ((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
614
- *(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
615
- /(1.*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
616
- *(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1])));
617
- if (INorm-re>er[pass-1]) continue; // hits mean deviation
618
- }
619
- /* neighbours should have same order of size (?) */
620
- if (3*(box3->y1-box3->y0+4) < 2*(box2->y1-box2->y0+1)) continue;
621
- if (2*(box3->y1-box3->y0+1) > 3*(box2->y1-box2->y0+4)) continue;
622
- if (2*(box3->x1-box3->x0+1) > 5*(box2->x1-box2->x0+4)) continue;
623
- if (5*(box3->x1-box3->x0+4) < 2*(box2->x1-box2->x0+1)) continue;
624
- /* should be in right range, Idea: center3 outside box2? noholes */
625
- if ((x3<box2->x1-1) && (x3>box2->x0+1)
626
- && (y3<box2->y1-1) && (y3>box2->y0+1)) continue;
627
- // if chars are of different size, connect careful
628
- if ( abs(x3-x2) > 2*(box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
629
- if ( abs(y3-y2) > (box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
630
- dist = (y3-y2)*(y3-y2) + (x3-x2)*(x3-x2);
631
- // make distances in pass-1 directions shorter or continue if not in pass-1 range?
632
- if (dist<9) continue; /* minimum distance^2 is 3^2 */
633
- if (dist<mindist) { mindist=dist; box_nn=box3;}
634
- // fprintf(stderr,"x y %d %d %d %d dist %d min %d\n",
635
- // x2,y2,x3,y3,dist,mindist);
636
- } end_for_each(&(job->res.boxlist));
637
-
638
- if (box_nn==box2) continue; /* has no neighbour, next box */
639
-
640
- box3=box_nn; dist=mindist;
641
- x3 = (box3->x0 + box3->x1)/2;
642
- y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
643
- // dist = my_sqrt(1024*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2)));
644
- // compare with first box
645
- x2 = (box2->x0 + box2->x1)/2;
646
- y2 = (box2->y0 + box2->y1)/2;
647
- // if the high of neighbouring boxes differ, use min diff (y0,y1)
648
- if (pass>0 && 16*abs(dy[pass-1]) < dx[pass-1]) // dont work for strong rot.
649
- if (abs(box2->y1-box2->y0-box3->y1+box3->y0)>(box2->y1-box2->y0)/8) {
650
- // ad eh ck ...
651
- if (abs(box2->y1-box3->y1)<abs(y3-y2)) { y2=box2->y1; y3=box3->y1; }
652
- // ag ep qu ...
653
- if (abs(box2->y0-box3->y0)<abs(y3-y2)) { y2=box2->y0; y3=box3->y0; }
654
- }
655
- if (abs(x3-x2)<4) continue;
656
- dx[pass]+=(x3-x2)*1024; /* normalized before averaging */
657
- dy[pass]+=(y3-y2)*1024; /* 1024 is for the precision */
658
- nn[pass]++;
659
- if (pass>0) { // set error = mean deviation from pass -1
660
- re = INorm-(int)((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
661
- *(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
662
- /((1.*(x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
663
- *(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1]))
664
- );
665
- er[pass]+=re;
666
- }
667
- #if 0
668
- if(JOB->cfg.verbose)
669
- fprintf(stderr,"# next nb (x,y,dx,dy,re) %6d %6d %5d %5d %5d pass %d\n",
670
- x2, y2, x3-x2, y3-y2, re, pass+1);
671
- #endif
672
- } end_for_each(&(job->res.boxlist));
673
- if (!nn[pass]) break;
674
- if (nn[pass]) {
675
- /* meanvalues */
676
- rx=dx[pass]/=nn[pass];
677
- ry=dy[pass]/=nn[pass];
678
- if (pass>0) er[pass]/=nn[pass];
679
- }
680
- if(JOB->cfg.verbose)
681
- fprintf(stderr,"# rotation angle (x,y,maxr,num)"
682
- " %6d %6d %6d %4d pass %d\n",
683
- rx, ry, er[pass], nn[pass], pass+1);
684
- }
685
- if (abs(ry*100)>abs(rx*50))
686
- fprintf(stderr,"<!-- gocr will fail, strong rotation angle detected -->\n");
687
- /* ToDo: normalize to 2^10 bit (square fits to 32 it) */
688
- JOB->res.lines.dx=rx;
689
- JOB->res.lines.dy=ry;
690
- return 0;
691
- }
692
-
693
- /* ----- detect lines --------------- */
694
- int detect_text_lines(pix * pp, int mo) {
695
-
696
- if (JOB->cfg.verbose)
697
- fprintf(stderr, "# detect.c detect_text_lines (vvv=16 for more info) ");
698
- if (mo & 4){
699
- if (JOB->cfg.verbose) fprintf(stderr, "# zoning\n# ... ");
700
- detect_lines2(pp, 0, 0, pp->x, pp->y, 0); // later replaced by better algo
701
- } else
702
- detect_lines1(pp, 0, 0, pp->x, pp->y); // old algo
703
-
704
- if(JOB->cfg.verbose) fprintf(stderr,"\n");
705
- return 0;
706
- }
707
-
708
-
709
- /* ----- adjust lines --------------- */
710
- // rotation angle? JOB->res.lines.dy, .x0 removed later
711
- // this is for cases, where m1..m4 is not very sure detected before
712
- // chars are recognized
713
- int adjust_text_lines(pix * pp, int mo) {
714
- struct box *box2;
715
- int *m, /* summ m1..m4, num_chars for m1..m4, min m1..m4, max. m1..m4 */
716
- l, i, dy, dx, diff=0, y0, y1;
717
-
718
- if ((l=JOB->res.lines.num)<2) return 0; // ???
719
- if (JOB->cfg.verbose)
720
- fprintf(stderr, "# adjust text lines ");
721
- m=(int *)malloc(l*16*sizeof(int));
722
- if (!m) { fprintf(stderr," malloc failed\n"); return 0;}
723
- for (i=0;i<16*l;i++) m[i]=0; /* initialize */
724
- dy=JOB->res.lines.dy; /* tan(alpha) of skewing */
725
- dx=JOB->res.lines.dx; /* old: width of image */
726
- // js: later skewing is replaced by one transformation of vectorized image
727
-
728
- if (dx)
729
- for_each_data(&(JOB->res.boxlist)) {
730
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
731
- if (box2->line<=0) continue;
732
- if (box2->num_ac<1) continue;
733
- if (box2->wac[0]<95) continue;
734
- if (box2->m2==0 || box2->y1<box2->m2) continue; // char outside line
735
- if (box2->m3==4 || box2->y0>box2->m3) continue; // char outside line
736
- y0=box2->y0-((box2->x1)*dy/dx); /* corrected by page skewing */
737
- y1=box2->y1-((box2->x1)*dy/dx);
738
- if (strchr("aemnr",(char)box2->tac[0])) { // cC vV sS oO ... is unsure!
739
- m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
740
- m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
741
- if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
742
- if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
743
- if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
744
- if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
745
- }
746
- if (strchr("bdhklABDEFGHIKLMNRT123456789",(char)box2->tac[0])) {
747
- m[box2->line*16+0]+=y0; m[box2->line*16+4]++; // num m1
748
- m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
749
- if (m[box2->line*16+ 8]>y0) m[box2->line*16+ 8]=y0; /* min m1 */
750
- if (m[box2->line*16+12]<y0) m[box2->line*16+12]=y0; /* max m1 */
751
- if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
752
- if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
753
- }
754
- if (strchr("gq",(char)box2->tac[0])) {
755
- m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
756
- m[box2->line*16+3]+=y1; m[box2->line*16+7]++; // num m4
757
- if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
758
- if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
759
- if (m[box2->line*16+11]>y1) m[box2->line*16+11]=y1; /* min m4 */
760
- if (m[box2->line*16+15]<y1) m[box2->line*16+15]=y1; /* max m4 */
761
- }
762
- } end_for_each(&(JOB->res.boxlist));
763
-
764
- for (i=1;i<l;i++) {
765
- diff=0; // show diff per line
766
- if (m[i*16+4]) diff+=abs(JOB->res.lines.m1[i]-m[i*16+0]/m[i*16+4]);
767
- if (m[i*16+5]) diff+=abs(JOB->res.lines.m2[i]-m[i*16+1]/m[i*16+5]);
768
- if (m[i*16+6]) diff+=abs(JOB->res.lines.m3[i]-m[i*16+2]/m[i*16+6]);
769
- if (m[i*16+7]) diff+=abs(JOB->res.lines.m4[i]-m[i*16+3]/m[i*16+7]);
770
- /* recalculate sureness, empirically */
771
- if (m[i*16+4]*m[i*16+5]*m[i*16+6]*m[i*16+7] > 0)
772
- JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]+100)/2;
773
- else
774
- JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]*90)/100;
775
- // set mean values of sure detected bounds (rounded precisely)
776
- if ( m[i*16+4]) JOB->res.lines.m1[i]=(m[i*16+0]+m[i*16+4]/2)/m[i*16+4];
777
- if ( m[i*16+5]) JOB->res.lines.m2[i]=(m[i*16+1]+m[i*16+5]/2)/m[i*16+5];
778
- if ( m[i*16+6]) JOB->res.lines.m3[i]=(m[i*16+2]+m[i*16+6]/2)/m[i*16+6];
779
- if ( m[i*16+7]) JOB->res.lines.m4[i]=(m[i*16+3]+m[i*16+7]/2)/m[i*16+7];
780
- // care about very small fonts
781
- if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+5]==0 && m[i*16+4])
782
- JOB->res.lines.m2[i]=JOB->res.lines.m1[i]+2;
783
- if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+4]==0 && m[i*16+5])
784
- JOB->res.lines.m1[i]=JOB->res.lines.m2[i]-2;
785
- if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+7]==0 && m[i*16+6])
786
- JOB->res.lines.m4[i]=JOB->res.lines.m3[i]+2;
787
- if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+6]==0 && m[i*16+7])
788
- JOB->res.lines.m3[i]=JOB->res.lines.m4[i]-2;
789
- if ( m[i*16+7]<1 &&
790
- JOB->res.lines.m4[i]
791
- <=JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4 )
792
- JOB->res.lines.m4[i]=
793
- JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4;
794
- if ( m[i*16+7]<1 && m[i*16+12+2]>0 && // m4 < max.m3+..
795
- JOB->res.lines.m4[i] < 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2 )
796
- JOB->res.lines.m4[i] = 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2;
797
- if (JOB->res.lines.m4[i]<=JOB->res.lines.m3[i])
798
- JOB->res.lines.m4[i]= JOB->res.lines.m3[i]+1; /* 4x6 */
799
-
800
- if (JOB->cfg.verbose & 17)
801
- fprintf(stderr, "\n# line= %3d m= %4d %+3d %+3d %+3d "
802
- " n= %2d %2d %2d %2d w= %3d diff= %d",
803
- i, JOB->res.lines.m1[i],
804
- JOB->res.lines.m2[i] - JOB->res.lines.m1[i],
805
- JOB->res.lines.m3[i] - JOB->res.lines.m1[i],
806
- JOB->res.lines.m4[i] - JOB->res.lines.m1[i],
807
- m[i*16+4],m[i*16+5],m[i*16+6],m[i*16+7],
808
- JOB->res.lines.wt[i], diff);
809
- }
810
- diff=0; // count adjusted chars
811
- #if 1
812
- if (dx)
813
- for_each_data(&(JOB->res.boxlist)) {
814
- box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
815
- if (box2->line<=0) continue;
816
- /* check if box was on the wrong line, ToDo: search a better line */
817
- if (2*box2->y0<2*JOB->res.lines.m1[box2->line]
818
- -JOB->res.lines.m4[box2->line]
819
- +JOB->res.lines.m1[box2->line]) box2->line=0;
820
- if (2*box2->y1>2*JOB->res.lines.m4[box2->line]
821
- +JOB->res.lines.m4[box2->line]
822
- -JOB->res.lines.m1[box2->line]) box2->line=0;
823
- /* do adjustments */
824
- if (box2->num_ac>0
825
- && box2->num_ac > 31 && box2->tac[0] < 127 /* islower(>256) may SIGSEGV */
826
- && strchr("cCoOpPsSuUvVwWxXyYzZ",(char)box2->tac[0])) { // no_wchar
827
- if (box2->y0-((box2->x1)*dy/dx)
828
- < (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line])/2
829
- && islower(box2->tac[0])
830
- ) { setac(box2,toupper((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
831
- if (box2->y0-((box2->x1)*dy/dx)
832
- > (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line]+1)/2
833
- && isupper(box2->tac[0])
834
- ){ setac(box2,tolower((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
835
- }
836
- box2->m1=JOB->res.lines.m1[box2->line]+((box2->x1)*dy/dx);
837
- box2->m2=JOB->res.lines.m2[box2->line]+((box2->x1)*dy/dx);
838
- box2->m3=JOB->res.lines.m3[box2->line]+((box2->x1)*dy/dx);
839
- box2->m4=JOB->res.lines.m4[box2->line]+((box2->x1)*dy/dx);
840
- } end_for_each(&(JOB->res.boxlist));
841
- #endif
842
-
843
- free(m);
844
- if(JOB->cfg.verbose) fprintf(stderr,"\n# changed_chars= %d\n",diff);
845
- return(diff);
846
- }
847
-
848
- /* ---- measure mean character
849
- * recalculate mean width and high after changes in boxlist
850
- * ToDo: only within a Range?
851
- */
852
- int calc_average() {
853
- int i = 0, x0, y0, x1, y1;
854
- struct box *box4;
855
-
856
- JOB->res.numC = 0;
857
- JOB->res.sumY = 0;
858
- JOB->res.sumX = 0;
859
- for_each_data(&(JOB->res.boxlist)) {
860
- box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
861
- if( box4->c != PICTURE ){
862
- x0 = box4->x0; x1 = box4->x1;
863
- y0 = box4->y0; y1 = box4->y1;
864
- i++;
865
- if (JOB->res.avX * JOB->res.avY > 0) {
866
- if (x1 - x0 + 1 > 4 * JOB->res.avX
867
- && y1 - y0 + 1 > 4 * JOB->res.avY) continue; /* small picture */
868
- if (4 * (y1 - y0 + 1) < JOB->res.avY || y1 - y0 < 2)
869
- continue; // dots .,-_ etc.
870
- }
871
- if (x1 - x0 + 1 < 4
872
- && y1 - y0 + 1 < 6 ) continue; /* dots etc */
873
- JOB->res.sumX += x1 - x0 + 1;
874
- JOB->res.sumY += y1 - y0 + 1;
875
- JOB->res.numC++;
876
- }
877
- } end_for_each(&(JOB->res.boxlist));
878
- if ( JOB->res.numC ) { /* avoid div 0 */
879
- JOB->res.avY = (JOB->res.sumY+JOB->res.numC/2) / JOB->res.numC;
880
- JOB->res.avX = (JOB->res.sumX+JOB->res.numC/2) / JOB->res.numC;
881
- }
882
- if (JOB->cfg.verbose){
883
- fprintf(stderr, "# averages: mXmY= %d %d nC= %d n= %d\n",
884
- JOB->res.avX, JOB->res.avY, JOB->res.numC, i);
885
- }
886
- return 0;
887
- }
888
-
889
-
890
- /* ---- analyse boxes, find pictures and mark (do this first!!!)
891
- */
892
- int detect_pictures(job_t *job) {
893
- int i = 0, x0, y0, x1, y1, num_h;
894
- struct box *box2, *box4;
895
-
896
- if ( job->res.numC == 0 ) {
897
- if (job->cfg.verbose) fprintf(stderr,
898
- "# detect.c L%d Warning: numC=0\n", __LINE__);
899
- return -1;
900
- }
901
- /* ToDo: set Y to uppercase mean value? */
902
- job->res.avY = (job->res.sumY+job->res.numC/2) / job->res.numC;
903
- job->res.avX = (job->res.sumX+job->res.numC/2) / job->res.numC;
904
- /* ToDo: two highest volumes? crosses, on extreme volume + on border */
905
- if (job->cfg.verbose)
906
- fprintf(stderr, "# detect.c L%d pictures, frames, mXmY= %d %d ... ",
907
- __LINE__, job->res.avX, job->res.avY);
908
- for_each_data(&(job->res.boxlist)) {
909
- box2 = (struct box *)list_get_current(&(job->res.boxlist));
910
- if (box2->c == PICTURE) continue;
911
- x0 = box2->x0; x1 = box2->x1;
912
- y0 = box2->y0; y1 = box2->y1;
913
-
914
- /* pictures could be of unusual size */
915
- if (x1 - x0 + 1 > 4 * job->res.avX || y1 - y0 + 1 > 4 * job->res.avY) {
916
- /* count objects on same baseline which could be chars */
917
- /* else: big headlines could be misinterpreted as pictures */
918
- num_h=0;
919
- for_each_data(&(job->res.boxlist)) {
920
- box4 = (struct box *)list_get_current(&(job->res.boxlist));
921
- if (box4->c == PICTURE) continue;
922
- if (box4->y1-box4->y0 > 2*(y1-y0)) continue;
923
- if (2*(box4->y1-box4->y0) < y1-y0) continue;
924
- if (box4->y0 > y0 + (y1-y0+1)/2
925
- || box4->y0 < y0 - (y1-y0+1)/2
926
- || box4->y1 > y1 + (y1-y0+1)/2
927
- || box4->y1 < y1 - (y1-y0+1)/2) continue;
928
- // ToDo: continue if numcross() only 1, example: |||IIIll|||
929
- num_h++;
930
- } end_for_each(&(job->res.boxlist));
931
- if (num_h>4) continue;
932
- box2->c = PICTURE;
933
- i++;
934
- }
935
- /* ToDo: pictures could have low contrast=Sum((pixel(p,x,y)-160)^2) */
936
- } end_for_each(&(job->res.boxlist));
937
- // start second iteration
938
- if (job->cfg.verbose) {
939
- fprintf(stderr, " %d - boxes %d\n", i, job->res.numC-i);
940
- }
941
- calc_average();
942
- return 0;
943
- }