entangledstate-isbn 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (290) hide show
  1. data/README +1 -1
  2. data/Rakefile +0 -18
  3. data/VERSION +1 -0
  4. data/isbn.gemspec +290 -7
  5. data/lib/isbn.rb +6 -6
  6. data/src/gocr-0.48/.cvsignore +6 -0
  7. data/src/gocr-0.48/AUTHORS +7 -0
  8. data/src/gocr-0.48/BUGS +55 -0
  9. data/src/gocr-0.48/CREDITS +17 -0
  10. data/src/gocr-0.48/HISTORY +243 -0
  11. data/src/gocr-0.48/INSTALL +83 -0
  12. data/src/gocr-0.48/Makefile +193 -0
  13. data/src/gocr-0.48/Makefile.in +193 -0
  14. data/src/gocr-0.48/README +165 -0
  15. data/src/gocr-0.48/READMEde.txt +80 -0
  16. data/src/gocr-0.48/REMARK.txt +18 -0
  17. data/src/gocr-0.48/REVIEW +538 -0
  18. data/src/gocr-0.48/TODO +65 -0
  19. data/src/gocr-0.48/bin/.cvsignore +2 -0
  20. data/src/gocr-0.48/bin/create_db +38 -0
  21. data/src/gocr-0.48/bin/gocr.tcl +527 -0
  22. data/src/gocr-0.48/bin/gocr_chk.sh +44 -0
  23. data/src/gocr-0.48/configure +4689 -0
  24. data/src/gocr-0.48/configure.in +71 -0
  25. data/src/gocr-0.48/doc/.#Makefile.1.6 +39 -0
  26. data/src/gocr-0.48/doc/.cvsignore +2 -0
  27. data/src/gocr-0.48/doc/Makefile +39 -0
  28. data/src/gocr-0.48/doc/Makefile.in +39 -0
  29. data/src/gocr-0.48/doc/example.dtd +53 -0
  30. data/src/gocr-0.48/doc/example.xml +21 -0
  31. data/src/gocr-0.48/doc/examples.txt +67 -0
  32. data/src/gocr-0.48/doc/gocr.html +578 -0
  33. data/src/gocr-0.48/doc/unicode.txt +57 -0
  34. data/src/gocr-0.48/examples/.#Makefile.1.22 +166 -0
  35. data/src/gocr-0.48/examples/4x6.png +0 -0
  36. data/src/gocr-0.48/examples/4x6.txt +2 -0
  37. data/src/gocr-0.48/examples/5x7.png +0 -0
  38. data/src/gocr-0.48/examples/5x7.png.txt +2 -0
  39. data/src/gocr-0.48/examples/5x8.png +0 -0
  40. data/src/gocr-0.48/examples/5x8.png.txt +2 -0
  41. data/src/gocr-0.48/examples/Makefile +166 -0
  42. data/src/gocr-0.48/examples/color.fig +20 -0
  43. data/src/gocr-0.48/examples/ex.fig +16 -0
  44. data/src/gocr-0.48/examples/font.tex +22 -0
  45. data/src/gocr-0.48/examples/font1.tex +46 -0
  46. data/src/gocr-0.48/examples/font2.fig +27 -0
  47. data/src/gocr-0.48/examples/font_nw.tex +24 -0
  48. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  49. data/src/gocr-0.48/examples/handwrt1.txt +10 -0
  50. data/src/gocr-0.48/examples/inverse.fig +20 -0
  51. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  52. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  53. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +4 -0
  54. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a.txt +6 -0
  56. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-b.png.txt +4 -0
  58. data/src/gocr-0.48/examples/polish.tex +28 -0
  59. data/src/gocr-0.48/examples/rotate45.fig +14 -0
  60. data/src/gocr-0.48/examples/score +36 -0
  61. data/src/gocr-0.48/examples/text.tex +28 -0
  62. data/src/gocr-0.48/gocr.spec +143 -0
  63. data/src/gocr-0.48/gpl.html +537 -0
  64. data/src/gocr-0.48/include/.cvsignore +2 -0
  65. data/src/gocr-0.48/include/config.h +36 -0
  66. data/src/gocr-0.48/include/config.h.in +36 -0
  67. data/src/gocr-0.48/include/version.h +2 -0
  68. data/src/gocr-0.48/install-sh +3 -0
  69. data/src/gocr-0.48/make.bat +57 -0
  70. data/src/gocr-0.48/man/.cvsignore +2 -0
  71. data/src/gocr-0.48/man/Makefile +29 -0
  72. data/src/gocr-0.48/man/Makefile.in +29 -0
  73. data/src/gocr-0.48/man/man1/gocr.1 +166 -0
  74. data/src/gocr-0.48/src/.cvsignore +4 -0
  75. data/src/gocr-0.48/src/Makefile +132 -0
  76. data/src/gocr-0.48/src/Makefile.in +132 -0
  77. data/src/gocr-0.48/src/amiga.h +31 -0
  78. data/src/gocr-0.48/src/barcode.c +846 -0
  79. data/src/gocr-0.48/src/barcode.c.orig +593 -0
  80. data/src/gocr-0.48/src/barcode.h +11 -0
  81. data/src/gocr-0.48/src/box.c +372 -0
  82. data/src/gocr-0.48/src/database.c +462 -0
  83. data/src/gocr-0.48/src/detect.c +943 -0
  84. data/src/gocr-0.48/src/gocr.c +373 -0
  85. data/src/gocr-0.48/src/gocr.h +288 -0
  86. data/src/gocr-0.48/src/jconv.c +168 -0
  87. data/src/gocr-0.48/src/job.c +84 -0
  88. data/src/gocr-0.48/src/lines.c +350 -0
  89. data/src/gocr-0.48/src/list.c +334 -0
  90. data/src/gocr-0.48/src/list.h +90 -0
  91. data/src/gocr-0.48/src/ocr0.c +6756 -0
  92. data/src/gocr-0.48/src/ocr0.h +63 -0
  93. data/src/gocr-0.48/src/ocr0n.c +1475 -0
  94. data/src/gocr-0.48/src/ocr1.c +85 -0
  95. data/src/gocr-0.48/src/ocr1.h +3 -0
  96. data/src/gocr-0.48/src/otsu.c +289 -0
  97. data/src/gocr-0.48/src/otsu.h +23 -0
  98. data/src/gocr-0.48/src/output.c +289 -0
  99. data/src/gocr-0.48/src/output.h +37 -0
  100. data/src/gocr-0.48/src/pcx.c +153 -0
  101. data/src/gocr-0.48/src/pcx.h +9 -0
  102. data/src/gocr-0.48/src/pgm2asc.c +2893 -0
  103. data/src/gocr-0.48/src/pgm2asc.h +105 -0
  104. data/src/gocr-0.48/src/pixel.c +537 -0
  105. data/src/gocr-0.48/src/pnm.c +533 -0
  106. data/src/gocr-0.48/src/pnm.h +35 -0
  107. data/src/gocr-0.48/src/progress.c +87 -0
  108. data/src/gocr-0.48/src/progress.h +42 -0
  109. data/src/gocr-0.48/src/remove.c +703 -0
  110. data/src/gocr-0.48/src/tga.c +87 -0
  111. data/src/gocr-0.48/src/tga.h +6 -0
  112. data/src/gocr-0.48/src/unicode.c +1314 -0
  113. data/src/gocr-0.48/src/unicode.h +1257 -0
  114. data/src/jpeg-7/Makefile.am +133 -0
  115. data/src/jpeg-7/Makefile.in +1089 -0
  116. data/src/jpeg-7/README +322 -0
  117. data/src/jpeg-7/aclocal.m4 +8990 -0
  118. data/src/jpeg-7/ansi2knr.1 +36 -0
  119. data/src/jpeg-7/ansi2knr.c +739 -0
  120. data/src/jpeg-7/cderror.h +132 -0
  121. data/src/jpeg-7/cdjpeg.c +181 -0
  122. data/src/jpeg-7/cdjpeg.h +187 -0
  123. data/src/jpeg-7/change.log +270 -0
  124. data/src/jpeg-7/cjpeg.1 +325 -0
  125. data/src/jpeg-7/cjpeg.c +616 -0
  126. data/src/jpeg-7/ckconfig.c +402 -0
  127. data/src/jpeg-7/coderules.txt +118 -0
  128. data/src/jpeg-7/config.guess +1561 -0
  129. data/src/jpeg-7/config.sub +1686 -0
  130. data/src/jpeg-7/configure +17139 -0
  131. data/src/jpeg-7/configure.ac +317 -0
  132. data/src/jpeg-7/depcomp +630 -0
  133. data/src/jpeg-7/djpeg.1 +251 -0
  134. data/src/jpeg-7/djpeg.c +617 -0
  135. data/src/jpeg-7/example.c +433 -0
  136. data/src/jpeg-7/filelist.txt +215 -0
  137. data/src/jpeg-7/install-sh +520 -0
  138. data/src/jpeg-7/install.txt +1097 -0
  139. data/src/jpeg-7/jaricom.c +148 -0
  140. data/src/jpeg-7/jcapimin.c +282 -0
  141. data/src/jpeg-7/jcapistd.c +161 -0
  142. data/src/jpeg-7/jcarith.c +921 -0
  143. data/src/jpeg-7/jccoefct.c +453 -0
  144. data/src/jpeg-7/jccolor.c +459 -0
  145. data/src/jpeg-7/jcdctmgr.c +482 -0
  146. data/src/jpeg-7/jchuff.c +1612 -0
  147. data/src/jpeg-7/jcinit.c +65 -0
  148. data/src/jpeg-7/jcmainct.c +293 -0
  149. data/src/jpeg-7/jcmarker.c +667 -0
  150. data/src/jpeg-7/jcmaster.c +770 -0
  151. data/src/jpeg-7/jcomapi.c +106 -0
  152. data/src/jpeg-7/jconfig.bcc +48 -0
  153. data/src/jpeg-7/jconfig.cfg +45 -0
  154. data/src/jpeg-7/jconfig.dj +38 -0
  155. data/src/jpeg-7/jconfig.mac +43 -0
  156. data/src/jpeg-7/jconfig.manx +43 -0
  157. data/src/jpeg-7/jconfig.mc6 +52 -0
  158. data/src/jpeg-7/jconfig.sas +43 -0
  159. data/src/jpeg-7/jconfig.st +42 -0
  160. data/src/jpeg-7/jconfig.txt +155 -0
  161. data/src/jpeg-7/jconfig.vc +45 -0
  162. data/src/jpeg-7/jconfig.vms +37 -0
  163. data/src/jpeg-7/jconfig.wat +38 -0
  164. data/src/jpeg-7/jcparam.c +632 -0
  165. data/src/jpeg-7/jcprepct.c +358 -0
  166. data/src/jpeg-7/jcsample.c +545 -0
  167. data/src/jpeg-7/jctrans.c +381 -0
  168. data/src/jpeg-7/jdapimin.c +396 -0
  169. data/src/jpeg-7/jdapistd.c +275 -0
  170. data/src/jpeg-7/jdarith.c +762 -0
  171. data/src/jpeg-7/jdatadst.c +151 -0
  172. data/src/jpeg-7/jdatasrc.c +212 -0
  173. data/src/jpeg-7/jdcoefct.c +736 -0
  174. data/src/jpeg-7/jdcolor.c +396 -0
  175. data/src/jpeg-7/jdct.h +393 -0
  176. data/src/jpeg-7/jddctmgr.c +382 -0
  177. data/src/jpeg-7/jdhuff.c +1309 -0
  178. data/src/jpeg-7/jdinput.c +384 -0
  179. data/src/jpeg-7/jdmainct.c +512 -0
  180. data/src/jpeg-7/jdmarker.c +1360 -0
  181. data/src/jpeg-7/jdmaster.c +663 -0
  182. data/src/jpeg-7/jdmerge.c +400 -0
  183. data/src/jpeg-7/jdpostct.c +290 -0
  184. data/src/jpeg-7/jdsample.c +361 -0
  185. data/src/jpeg-7/jdtrans.c +136 -0
  186. data/src/jpeg-7/jerror.c +252 -0
  187. data/src/jpeg-7/jerror.h +304 -0
  188. data/src/jpeg-7/jfdctflt.c +174 -0
  189. data/src/jpeg-7/jfdctfst.c +230 -0
  190. data/src/jpeg-7/jfdctint.c +4348 -0
  191. data/src/jpeg-7/jidctflt.c +242 -0
  192. data/src/jpeg-7/jidctfst.c +368 -0
  193. data/src/jpeg-7/jidctint.c +5137 -0
  194. data/src/jpeg-7/jinclude.h +91 -0
  195. data/src/jpeg-7/jmemansi.c +167 -0
  196. data/src/jpeg-7/jmemdos.c +638 -0
  197. data/src/jpeg-7/jmemdosa.asm +379 -0
  198. data/src/jpeg-7/jmemmac.c +289 -0
  199. data/src/jpeg-7/jmemmgr.c +1118 -0
  200. data/src/jpeg-7/jmemname.c +276 -0
  201. data/src/jpeg-7/jmemnobs.c +109 -0
  202. data/src/jpeg-7/jmemsys.h +198 -0
  203. data/src/jpeg-7/jmorecfg.h +369 -0
  204. data/src/jpeg-7/jpegint.h +395 -0
  205. data/src/jpeg-7/jpeglib.h +1135 -0
  206. data/src/jpeg-7/jpegtran.1 +272 -0
  207. data/src/jpeg-7/jpegtran.c +546 -0
  208. data/src/jpeg-7/jquant1.c +856 -0
  209. data/src/jpeg-7/jquant2.c +1310 -0
  210. data/src/jpeg-7/jutils.c +179 -0
  211. data/src/jpeg-7/jversion.h +14 -0
  212. data/src/jpeg-7/libjpeg.map +4 -0
  213. data/src/jpeg-7/libjpeg.txt +3067 -0
  214. data/src/jpeg-7/ltmain.sh +8406 -0
  215. data/src/jpeg-7/makcjpeg.st +36 -0
  216. data/src/jpeg-7/makdjpeg.st +36 -0
  217. data/src/jpeg-7/makeadsw.vc6 +77 -0
  218. data/src/jpeg-7/makeasln.vc9 +33 -0
  219. data/src/jpeg-7/makecdep.vc6 +82 -0
  220. data/src/jpeg-7/makecdsp.vc6 +130 -0
  221. data/src/jpeg-7/makecmak.vc6 +159 -0
  222. data/src/jpeg-7/makecvcp.vc9 +186 -0
  223. data/src/jpeg-7/makeddep.vc6 +82 -0
  224. data/src/jpeg-7/makeddsp.vc6 +130 -0
  225. data/src/jpeg-7/makedmak.vc6 +159 -0
  226. data/src/jpeg-7/makedvcp.vc9 +186 -0
  227. data/src/jpeg-7/makefile.ansi +220 -0
  228. data/src/jpeg-7/makefile.bcc +291 -0
  229. data/src/jpeg-7/makefile.dj +226 -0
  230. data/src/jpeg-7/makefile.manx +220 -0
  231. data/src/jpeg-7/makefile.mc6 +255 -0
  232. data/src/jpeg-7/makefile.mms +224 -0
  233. data/src/jpeg-7/makefile.sas +258 -0
  234. data/src/jpeg-7/makefile.unix +234 -0
  235. data/src/jpeg-7/makefile.vc +217 -0
  236. data/src/jpeg-7/makefile.vms +142 -0
  237. data/src/jpeg-7/makefile.wat +239 -0
  238. data/src/jpeg-7/makejdep.vc6 +423 -0
  239. data/src/jpeg-7/makejdsp.vc6 +285 -0
  240. data/src/jpeg-7/makejdsw.vc6 +29 -0
  241. data/src/jpeg-7/makejmak.vc6 +425 -0
  242. data/src/jpeg-7/makejsln.vc9 +17 -0
  243. data/src/jpeg-7/makejvcp.vc9 +328 -0
  244. data/src/jpeg-7/makeproj.mac +213 -0
  245. data/src/jpeg-7/makerdep.vc6 +6 -0
  246. data/src/jpeg-7/makerdsp.vc6 +78 -0
  247. data/src/jpeg-7/makermak.vc6 +110 -0
  248. data/src/jpeg-7/makervcp.vc9 +133 -0
  249. data/src/jpeg-7/maketdep.vc6 +43 -0
  250. data/src/jpeg-7/maketdsp.vc6 +122 -0
  251. data/src/jpeg-7/maketmak.vc6 +131 -0
  252. data/src/jpeg-7/maketvcp.vc9 +178 -0
  253. data/src/jpeg-7/makewdep.vc6 +6 -0
  254. data/src/jpeg-7/makewdsp.vc6 +78 -0
  255. data/src/jpeg-7/makewmak.vc6 +110 -0
  256. data/src/jpeg-7/makewvcp.vc9 +133 -0
  257. data/src/jpeg-7/makljpeg.st +68 -0
  258. data/src/jpeg-7/maktjpeg.st +30 -0
  259. data/src/jpeg-7/makvms.opt +4 -0
  260. data/src/jpeg-7/missing +376 -0
  261. data/src/jpeg-7/rdbmp.c +439 -0
  262. data/src/jpeg-7/rdcolmap.c +253 -0
  263. data/src/jpeg-7/rdgif.c +38 -0
  264. data/src/jpeg-7/rdjpgcom.1 +63 -0
  265. data/src/jpeg-7/rdjpgcom.c +515 -0
  266. data/src/jpeg-7/rdppm.c +459 -0
  267. data/src/jpeg-7/rdrle.c +387 -0
  268. data/src/jpeg-7/rdswitch.c +365 -0
  269. data/src/jpeg-7/rdtarga.c +500 -0
  270. data/src/jpeg-7/structure.txt +945 -0
  271. data/src/jpeg-7/testimg.bmp +0 -0
  272. data/src/jpeg-7/testimg.jpg +0 -0
  273. data/src/jpeg-7/testimg.ppm +4 -0
  274. data/src/jpeg-7/testimgp.jpg +0 -0
  275. data/src/jpeg-7/testorig.jpg +0 -0
  276. data/src/jpeg-7/testprog.jpg +0 -0
  277. data/src/jpeg-7/transupp.c +1533 -0
  278. data/src/jpeg-7/transupp.h +205 -0
  279. data/src/jpeg-7/usage.txt +605 -0
  280. data/src/jpeg-7/wizard.txt +211 -0
  281. data/src/jpeg-7/wrbmp.c +442 -0
  282. data/src/jpeg-7/wrgif.c +399 -0
  283. data/src/jpeg-7/wrjpgcom.1 +103 -0
  284. data/src/jpeg-7/wrjpgcom.c +583 -0
  285. data/src/jpeg-7/wrppm.c +269 -0
  286. data/src/jpeg-7/wrrle.c +305 -0
  287. data/src/jpeg-7/wrtarga.c +253 -0
  288. metadata +287 -6
  289. data/LICENSE +0 -20
  290. data/VERSION.yml +0 -4
@@ -0,0 +1,943 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2007 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ check README for my email address
20
+ */
21
+
22
+ #include <stdlib.h>
23
+ #include <stdio.h>
24
+ #include <string.h>
25
+ #include <ctype.h> // toupper, tolower
26
+ #include "pgm2asc.h"
27
+ #include "gocr.h"
28
+
29
+ // ----- detect lines ---------------
30
+ /* suggestion: Fourier transform and set line frequency where the
31
+ amplitude has a maximum (JS: slow and not smarty enough).
32
+
33
+ option: range for line numbers 1..1000 or similar
34
+ todo: look for thickest line, and divide if thickness=2*mean_thickness
35
+ Set these elements of the box structs:
36
+
37
+ m1 <-- top of upper case letters and (bdfhkl) (can differ)
38
+ m2 <-- top of letters (acegmnopqrsuvwxyz)
39
+ m3 <-- baseline
40
+ m4 <-- bottom of hanging letters (gqpy)
41
+
42
+ performance can be improved by working with a temporary
43
+ list of boxes of the special text line
44
+
45
+ - Jun23,00 more robustness of m3 (test liebfrau1)
46
+ - Feb01,02 more robustness of m4 (test s46_084.pgm)
47
+ - Dec03,12 fix problems with footnotes
48
+ ToDo:
49
+ - generate lists of boxes per line (faster access)
50
+ - use statistics
51
+ - for each box look at it neighbours and set box-m1..m4
52
+ - m[1..4].max .min if m4.min-m3.max<1 probability lower
53
+ */
54
+ int detect_lines1(pix * p, int x0, int y0, int dx, int dy)
55
+ {
56
+ int i, jj, j2, y, yy, my, mi, mc, i1, i2, i3, i4,
57
+ m1, m2, m3, m4, ma1, ma2, ma3, ma4, m3pre, m4pre;
58
+ struct box *box2, *box3; /* box3 is for verbose / debugging */
59
+ struct tlines *lines = &JOB->res.lines;
60
+
61
+ /* ToDo: optional read line-data from external source??? */
62
+ if (lines->num == 0) { // initialize one dummy-line for pictures etc.
63
+ lines->m4[0] = 0;
64
+ lines->m3[0] = 0;
65
+ lines->m2[0] = 0;
66
+ lines->m1[0] = 0;
67
+ lines->x0[0] = p->x; /* expand to left end during detection */
68
+ lines->x1[0] = 0; /* expand to right end */
69
+ lines->pitch[0] = JOB->cfg.spc; /* default word pitch */
70
+ lines->mono[0] = 0; /* default spacing, 0 = prop */
71
+ lines->num++;
72
+ }
73
+ i = lines->num;
74
+ if (dy < 4)
75
+ return 0; /* image is to low for latin chars */
76
+ my = jj = 0;
77
+ // get the mean height of all hollow chars
78
+ // (better than mean value of everything including bg-pattern or dust?)
79
+ for_each_data(&(JOB->res.boxlist)) {
80
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
81
+ if ( box2->c != PICTURE
82
+ && box2->num_frames>1 && box2->num_frames<3 /* 1 or 2 holes */
83
+ && box2->y0 >= y0 && box2->y1 <= y0 + dy
84
+ && box2->x0 >= x0 && box2->x1 <= x0 + dx
85
+ && box2->frame_vol[0]>0
86
+ && box2->frame_vol[1]<0
87
+ ) {
88
+ jj++;
89
+ my += box2->y1 - box2->y0 + 1;
90
+ }
91
+ } end_for_each(&(JOB->res.boxlist));
92
+ if (jj==0) {
93
+ // get the mean height of all chars
94
+ for_each_data(&(JOB->res.boxlist)) {
95
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
96
+ if ( box2->c != PICTURE
97
+ && box2->y1 - box2->y0 + 1 >= 4 /* 4x6 font */
98
+ && box2->y0 >= y0 && box2->y1 <= y0 + dy
99
+ && box2->x0 >= x0 && box2->x1 <= x0 + dx ) {
100
+ jj++;
101
+ my += box2->y1 - box2->y0 + 1;
102
+ }
103
+ } end_for_each(&(JOB->res.boxlist));
104
+ }
105
+ if (jj == 0)
106
+ return 0; /* no chars detected */
107
+
108
+
109
+ /* ToDo: a better way could be to mark good boxes (of typical high a-zA-Z0-9)
110
+ * first and handle only marked boxes for line scan, exclude ?!,.:;etc
111
+ * but without setect the chars itself (using good statistics)
112
+ * see adjust_text_lines()
113
+ */
114
+ my /= jj; /* we only care about chars with high arround my */
115
+ if (JOB->cfg.verbose & 16)
116
+ fprintf(stderr,"\n# detect_lines1(%d %d %d %d) vvv&16 chars=%d my=%d\n# ",
117
+ x0, y0, dx, dy, jj, my);
118
+ // "my" is the average over the whole image (bad, if different fontsizes)
119
+
120
+ if (my < 4)
121
+ return 0; /* mean high is to small => error */
122
+
123
+ m4pre=m3pre=y0; /* lower bond of upper line */
124
+ // better function for scanning line around a letter ???
125
+ // or define lines around known chars "eaTmM"
126
+ for (j2 = y = y0; y < y0 + dy; y++) {
127
+ // look for max. of upper and lower bound of next line
128
+ m1 = y0 + dy;
129
+ jj = 0;
130
+ #if 1
131
+ /* this is only for test runs */
132
+ if (JOB->cfg.verbose & 16)
133
+ fprintf(stderr,"searching new line %d\n# ",i /* lines->num */);
134
+ #endif
135
+
136
+ box3 = NULL; /* mark the most upper box starting next line */
137
+ // find highest point of next line => store to m1-min (m1>=y)
138
+ // only objects greater 2/3*my and smaller 3*my are allowed
139
+ // a higher "!" at end of line can result in a to low m1
140
+ for_each_data(&(JOB->res.boxlist)) {
141
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
142
+ if (box2->line>0 || box2->c == PICTURE) continue;
143
+ if (lines->dx)
144
+ yy = lines->dy * box2->x0 / (lines->dx); /* correct crooked lines */
145
+ else yy=0;
146
+ if ( box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
147
+ && box2->x0 >= x0 && box2->x1 < x0 + dx // within box ?
148
+ && box2->c != PICTURE // no picture
149
+ && box2->num_boxes <= 1 // ignore 2 for "!?i" 3 for "&auml;"
150
+ && 3 * (box2->y1 - box2->y0) > 2 * my // not to small
151
+ && (box2->y1 - box2->y0) < 3 * my // not to big
152
+ && (box2->y1 - box2->y0) > 4) // minimum absolute size
153
+ {
154
+ if (box2->y0 < m1 + yy) {
155
+ m1 = box2->y0 - yy; /* highest upper boundary */
156
+ box3 = box2;
157
+ }
158
+ // fprintf(stderr,"\n %3d %3d %+3d %d m1= %3d",
159
+ // box2->x0, box2->y0, box2->y1 - box2->y0 + 1, box2->num_boxes, m1);
160
+ }
161
+ } end_for_each(&(JOB->res.boxlist));
162
+ if (!box3 || m1 >= y0+dy) break; /* no further line found */
163
+ if (JOB->cfg.verbose & 16)
164
+ fprintf(stderr," most upper box at new line xy= %4d %4d %+4d %+4d\n# ",
165
+ box3->x0, box3->y0, box3->x1-box3->x0, box3->y1-box3->y0);
166
+
167
+ // at the moment values depend from single chars, which can
168
+ // result in bad values (ex: 4x6 /\=)
169
+ // ToDo: 2) mean size of next line (store list of y0,y1)
170
+ // ToDo: 3) count num0[(y0-m1)*16/my], num1[(y1-m1)*16/my]
171
+ // ToDo: or down-top search horizontal nerarest neighbours
172
+ lines->x0[i] = x0 + dx - 1; /* expand during operation to left end */
173
+ lines->x1[i] = x0; /* expand to the right end of line */
174
+ m4=m2=m1; mi=m1+my; m3=m1+2*my; jj=0;
175
+ // find limits for upper bound, base line and ground line
176
+ // m2-max m3-min m4-max
177
+ for_each_data(&(JOB->res.boxlist)) {
178
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
179
+ if (box2->line>0 || box2->c == PICTURE) continue;
180
+ if ( box2->y0 < y0 || box2->y1 >= y0 + dy
181
+ || box2->x0 < x0 || box2->x1 >= x0 + dx ) continue; // out of image
182
+ if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx);
183
+ else yy=0;
184
+ /* check for ij-dots, used if chars of same high */
185
+ if ( box2->y0 >= y + yy
186
+ && box2->y0 >= y
187
+ && (box2->y1 - box2->y0) < my
188
+ && box2->y1 < m1 + yy + my/4
189
+ && box2->y0 < mi + yy ) {
190
+ mi = box2->y0 - yy; /* highest upper boundary i-dot */
191
+ }
192
+ // fprintf(stderr,"\n check %3d %3d-%3d y=%d yy=%d m1=%d", box2->x0, box2->y0, box2->y1, y, yy, m1);
193
+ /* get m2-max m3-min m4-max */
194
+ if ( box2->y0 >= y + yy // lower than y
195
+ && 3 * (box2->y1 - box2->y0 + 1) > 2 * my // right size ?
196
+ && (box2->y1 - box2->y0 + 1) < 3 * my // font mix, size = 2.6*my
197
+ && (box2->y1 - box2->y0 + 1) > 3 // 4x6 lowercase=4
198
+ && box2->y0 >= m1 // in m1 range?
199
+ && box2->y0 <= m1 + yy + 9 * my / 8 // my can be to small if mixed
200
+ // ToDo: we need a better (local?) algorithm for big headlines > 2*my
201
+ && box2->y1 <= m1 + yy + 3 * my
202
+ && box2->y1 >= m1 + yy + my / 2
203
+ // lines can differ in high, my may be to small (smaller headlines)
204
+ && box2->y0+box2->y1 <= 2*box3->y1
205
+ )
206
+ {
207
+ jj++; // count chars for debugging purpose
208
+ if (box2->y0 > m2 + yy) {
209
+ m2 = box2->y0 - yy; /* highest upper boundary */
210
+ if (JOB->cfg.verbose & 16)
211
+ fprintf(stderr," set m2= %d yy= %d\n# ",m2, yy);
212
+ }
213
+ if (box2->y1 > m4 + yy && (my>6 || box2->y1 < m3+my)) {
214
+ m4 = box2->y1 - yy; /* lowest lower boundary, small font lines can touch */
215
+ }
216
+ if ( box2->y1 < m3 + yy
217
+ && ( ( 2*box2->y1 > m2+ m4+yy && m2>m1)
218
+ || ( 4*box2->y1 > m1+3*m4+yy) ) ) // care for TeX: \(^1\)Footnote 2003
219
+ /* "'!?" could cause trouble here, therefore this lines */
220
+ /* ToDo: get_bw costs time, check pre and next */
221
+ if( get_bw(box2->x0,box2->x1,box2->y1+1 ,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 0
222
+ || get_bw(box2->x0,box2->x1,box2->y1+my/2,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 1
223
+ || num_cross(box2->x0,box2->x1,(box2->y0+box2->y1)/2,(box2->y0+box2->y1)/2,box2->p,JOB->cfg.cs)>2 )
224
+ {
225
+ m3 = box2->y1 - yy; /* highest lower boundary */
226
+ // printf("\n# set1 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
227
+ // out_x(box2);
228
+ }
229
+ if (box2->y0 + box2->y1 > 2*(m3 + yy)
230
+ && box2->y1 < m4 + yy - my/4 -1
231
+ && box2->y1 >= (m2 + m4)/2 // care for TeX: \(^1\)Footnote 2003
232
+ && m2 > m1 ) // be sure to not use ', m2 must be ok
233
+ {
234
+ m3 = box2->y1 - yy; /* highest lower boundary */
235
+ // printf("\n# set2 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
236
+ // out_x(box2);
237
+ }
238
+ if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
239
+ if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
240
+ // printf(" m=%3d %+2d %+2d %+2d yy=%3d\n",m1,m2-m1,m3-m1,m4-m1,yy);
241
+ }
242
+ } end_for_each(&(JOB->res.boxlist));
243
+
244
+ #if 1
245
+ /* this is only for test runs */
246
+ if (JOB->cfg.verbose & 16)
247
+ fprintf(stderr," step 1 y=%4d m= %4d %+3d %+3d %+3d"
248
+ " my=%2d chars=%3d\n# ",
249
+ y, m1, m2-m1, m3-m1, m4-m1, my, jj);
250
+ #endif
251
+
252
+ if (m3 == m1)
253
+ break;
254
+ #if 1 /* make averages about the line */
255
+ // same again better estimation
256
+ mc = (3 * m3 + m1) / 4; /* lower center ? */
257
+ ma1 = ma2 = ma3 = ma4 = i1 = i2 = i3 = i4 = jj = 0;
258
+ for_each_data(&(JOB->res.boxlist)) {
259
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
260
+ if (box2->line>0 || box2->c == PICTURE) continue;
261
+ if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
262
+ if (box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
263
+ && box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
264
+ && box2->c != PICTURE // no picture
265
+ && 2 * (box2->y1 - box2->y0) > my // right size ?
266
+ && (box2->y1 - box2->y0) < 4 * my) {
267
+ if ( box2->y0 - yy >= m1-my/4
268
+ && box2->y0 - yy <= m2+my/4
269
+ && box2->y1 - yy >= m3-my/4
270
+ && box2->y1 - yy <= m4+my/4 ) { /* its within allowed range! */
271
+ // jj++; // not used
272
+ if (abs(box2->y0 - yy - m1) <= abs(box2->y0 - yy - m2))
273
+ { i1++; ma1 += box2->y0 - yy; }
274
+ else { i2++; ma2 += box2->y0 - yy; }
275
+ if (abs(box2->y1 - yy - m3) < abs(box2->y1 - yy - m4))
276
+ { i3++; ma3 += box2->y1 - yy; }
277
+ else { i4++; ma4 += box2->y1 - yy; }
278
+ if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
279
+ if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
280
+ }
281
+ }
282
+ } end_for_each(&(JOB->res.boxlist));
283
+
284
+ if (i1) m1 = (ma1+i1/2) / i1; /* best rounded */
285
+ if (i2) m2 = (ma2+i2/2) / i2;
286
+ if (i3) m3 = (ma3+i3-1) / i3; /* round up */
287
+ if (i4) m4 = (ma4+i4-1) / i4;
288
+ // printf("\n# .. set3 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
289
+
290
+ #endif
291
+
292
+ /* expand right and left end of line */
293
+ for_each_data(&(JOB->res.boxlist)) {
294
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
295
+ if (box2->line>0 || box2->c == PICTURE) continue;
296
+ if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
297
+ if ( box2->y0 >= y0 && box2->y1 < y0 + dy
298
+ && box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
299
+ && box2->c != PICTURE // no picture
300
+ && box2->y0 >= m1-1
301
+ && box2->y0 <= m4
302
+ && box2->y1 >= m1
303
+ && box2->y1 <= m4+1 ) { /* its within line */
304
+ if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
305
+ if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
306
+ }
307
+ } end_for_each(&(JOB->res.boxlist));
308
+
309
+ #if 1
310
+ /* this is only for test runs */
311
+ if (JOB->cfg.verbose & 16)
312
+ fprintf(stderr," step 2 y=%4d m= %4d %+3d %+3d %+3d\n# ",
313
+ y,m1,m2-m1,m3-m1,m4-m1);
314
+ #endif
315
+
316
+ if (m4 == m1) {
317
+ if(m3+m4>2*y) y = (m4+m3)/2; /* lower end may overlap the next line */
318
+ continue;
319
+ }
320
+ jj=0;
321
+ lines->wt[i] = 100;
322
+ if (5 * (m2 - m1 +1) < m3 - m2 || (m2 - m1) < 2) jj|=1; /* same high */
323
+ if (5 * (m4 - m3 +1) < m3 - m2 || (m4 - m3) < 1) jj|=2; /* same base */
324
+ if (jj&1) lines->wt[i] = 75*lines->wt[i]/100;
325
+ if (jj&2) lines->wt[i] = 75*lines->wt[i]/100;
326
+ if (jj>0 && JOB->cfg.verbose) {
327
+ fprintf(stderr," trouble on line %d, wt*100= %d\n",i,lines->wt[i]);
328
+ fprintf(stderr,"# m= %4d %+3d %+3d %+3d\n",m1,m2-m1,m3-m1,m4-m1);
329
+ fprintf(stderr,"# i= %3d %3d %3d %3d (counts)\n",i1,i2,i3,i4);
330
+ if (jj==3) fprintf(stderr,"# all boxes of same high!\n# ");
331
+ if (jj==1) fprintf(stderr,"# all boxes of same upper bound!\n# ");
332
+ if (jj==2) fprintf(stderr,"# all boxes of same lower bound!\n# ");
333
+ }
334
+ /* ToDo: check for dots ij,. to get the missing information */
335
+ #if 1
336
+ /* jj=3: ABCDEF123456 or mnmno or gqpy or lkhfdtb => we are in trouble */
337
+ if (jj==3 && (m4-m1)>my) { jj=0; m2=m1+my/8+1; m4=m3+my/8+1; } /* ABC123 */
338
+ /* using idots, may fail on "ABCDEFG&Auml;&Uuml;&Ouml;" */
339
+ if (jj==3 && mi>0 && mi<m1 && mi>m4pre) { jj=2; m1=mi; } /* use ij dots */
340
+ if (jj==1 && m2-(m3-m2)/4>m3pre ) { /* expect: acegmnopqrsuvwxyz */
341
+ if (m1-m4pre<m4-m1) /* fails for 0123ABCD+Q$ */
342
+ m1 = ( m2 + m4pre ) / 2 ;
343
+ else
344
+ m1 = ( m2 - (m3 - m2) / 4 );
345
+ }
346
+ if (jj==3)
347
+ m2 = m1 + (m3 - m1) / 4 + 1; /* expect: 0123456789ABCDEF */
348
+ if ( (m2 - m1) < 2)
349
+ m2 = m1 + 2; /* font hight < 8 pixel ? */
350
+ if (jj&2)
351
+ m4 = m3 + (m4 - m1) / 4 + 1; /* chars have same lower base */
352
+ if (jj>0 && JOB->cfg.verbose & 16) {
353
+ fprintf(stderr," m= %4d %+2d %+2d %+2d my= %4d\n# ",
354
+ m1, m2-m1, m3-m1, m4-m1, my);
355
+ }
356
+ #endif
357
+
358
+
359
+ { // empty space between lines
360
+ lines->m4[i] = m4;
361
+ lines->m3[i] = m3;
362
+ lines->m2[i] = m2;
363
+ lines->m1[i] = m1;
364
+ lines->pitch[i] = JOB->cfg.spc; /* default word pitch */
365
+ lines->mono[i] = 0; /* default spacing, 0=prop, 1=mono */
366
+ if (JOB->cfg.verbose & 16)
367
+ fprintf(stderr, " m= %4d %+3d %+3d %+3d w= %d (line=%d)\n# ",
368
+ m1, m2 - m1, m3 - m1, m4 - m1, lines->wt[i], i);
369
+ if (i < MAXlines && m4 - m1 > 4)
370
+ i++;
371
+ if (i >= MAXlines) {
372
+ fprintf(stderr, "Warning: lines>MAXlines\n");
373
+ break;
374
+ }
375
+ }
376
+ if (m3+m4>2*y) y = (m3+m4)/2; /* lower end may overlap the next line */
377
+ if (m3>m3pre) m3pre = m3; else m3=y0; /* set for next-line scan */
378
+ if (m4>m4pre) m4pre = m4; else m4=y0; /* set for next-line scan */
379
+ }
380
+ lines->num = i;
381
+ if (JOB->cfg.verbose)
382
+ fprintf(stderr, " num_lines= %d", lines->num-1);
383
+ return 0;
384
+ }
385
+
386
+ // ----- layout analyzis of dx*dy region at x0,y0 -----
387
+ // ----- detect lines via recursive division (new version) ---------------
388
+ // what about text in frames???
389
+ // ToDo: change to bottom-top analyse or/and take rotation into account
390
+ int detect_lines2(pix *p,int x0,int y0,int dx,int dy,int r){
391
+ int i,x2,y2,x3,y3,x4,y4,x5,y5,y6,mx,my,x30,x31,y30,y31;
392
+ struct box *box2,*box3;
393
+ // shrink box
394
+ if(dx<=0 || dy<=0) return 0;
395
+ if(y0+dy< p->y/128 && y0==0) return 0; /* looks like dust */
396
+ if(y0>p->y-p->y/128 && y0+dy==p->y) return 0; /* looks like dust */
397
+
398
+ if(r>1000){ return -1;} // something is wrong
399
+ if(JOB->cfg.verbose)fprintf(stderr,"\n# r=%2d ",r);
400
+
401
+ mx=my=i=0; // mean thickness
402
+ // remove border, shrink size
403
+ x2=x0+dx-1; // min x
404
+ y2=y0+dy-1; // min y
405
+ x3=x0; // max x
406
+ y3=y0; // max y
407
+ for_each_data(&(JOB->res.boxlist)) {
408
+ box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
409
+ if(box3->y0>=y0 && box3->y1<y0+dy &&
410
+ box3->x0>=x0 && box3->x1<x0+dx)
411
+ {
412
+ if( box3->x1 > x3 ) x3=box3->x1; // max x
413
+ if( box3->x0 < x2 ) x2=box3->x0; // min x
414
+ if( box3->y1 > y3 ) y3=box3->y1; // max y
415
+ if( box3->y0 < y2 ) y2=box3->y0; // min y
416
+ if(box3->c!=PICTURE)
417
+ if( box3->y1 - box3->y0 > 4 )
418
+ {
419
+ i++;
420
+ mx+=box3->x1-box3->x0+1; // mean x
421
+ my+=box3->y1-box3->y0+1; // mean y
422
+ }
423
+ }
424
+ } end_for_each(&(JOB->res.boxlist));
425
+ x0=x2; dx=x3-x2+1;
426
+ y0=y2; dy=y3-y2+1;
427
+
428
+ if(i==0 || dx<=0 || dy<=0) return 0;
429
+ mx/=i;my/=i;
430
+ // better look for widest h/v-gap, ToDo: vertical lines?
431
+ if(r<8){ // max. depth
432
+
433
+ // detect widest horizontal gap
434
+ y2=y3=y4=y5=y6=0;
435
+ x2=x3=x4=x5=y5=0;// min. 3 lines
436
+ // position and thickness of gap, y6=num_gaps, nbox^2 ops
437
+ for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
438
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
439
+ if( box2->c!=PICTURE ) /* ToDo: not sure, that this is a good idea */
440
+ if( box2->y0>=y0 && box2->y1<y0+dy
441
+ && box2->x0>=x0 && box2->x1<x0+dx
442
+ && box2->y1-box2->y0>my/2 ){ // no pictures & dust???
443
+
444
+ y4=y0+dy-1; // nearest vert. box
445
+ x4=x0+dx-1;
446
+ // ToDo: rotate back box2->x1,y1 to x21,y21
447
+ // look for nearest lowest (y4) and right (x4) neighbour
448
+ // of every box (box2)
449
+ for_each_data(&(JOB->res.boxlist)) {
450
+ box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
451
+ if(box3!=box2)
452
+ if(box3->y0>=y0 && box3->y1<y0+dy)
453
+ if(box3->x0>=x0 && box3->x1<x0+dx)
454
+ if(box3->c!=PICTURE) /* ToDo: not sure, that this is a good idea */
455
+ if(box3->y1-box3->y0>my/2 ){
456
+ // ToDo: here we need the rotation around box2
457
+ x30=box3->x0;
458
+ x31=box3->x1;
459
+ y30=box3->y0;
460
+ y31=box3->y1;
461
+ // get min. distances to lower and to right direction
462
+ if( y31 > box2->y1 && y30 < y4 ) y4=y30-1;
463
+ if( x31 > box2->x1 && x30 < x4 ) x4=x30-1;
464
+ }
465
+ } end_for_each(&(JOB->res.boxlist));
466
+ // set the witdht and position of largest hor./vert. gap
467
+ // largest gap: width position
468
+ if( y4-box2->y1 > y3 ) { y3=y4-box2->y1; y2=(y4+box2->y1)/2; }
469
+ if( x4-box2->x1 > x3 ) { x3=x4-box2->x1; x2=(x4+box2->x1)/2; }
470
+ }
471
+ } end_for_each(&(JOB->res.boxlist));
472
+ // fprintf(stderr,"\n widest y-gap= %4d %4d",y2,y3);
473
+ // fprintf(stderr,"\n widest x-gap= %4d %4d",x2,x3);
474
+
475
+ i=0; // i=1 at x, i=2 at y
476
+ // this is the critical point
477
+ // is this a good decision or not???
478
+ if(x3>0 || y3>0){
479
+ if(x3>mx && x3>2*y3 && (dy>5*x3 || (x3>10*y3 && y3>0))) i=1; else
480
+ if(dx>5*y3 && y3>my) i=2;
481
+ }
482
+
483
+ // compare with largest box???
484
+ for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
485
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
486
+ if( box2->c == PICTURE )
487
+ if( box2->y0>=y0 && box2->y1<y0+dy
488
+ && box2->x0>=x0 && box2->x1<x0+dx )
489
+ { // hline ???
490
+ // largest gap: width position
491
+ if( box2->x1-box2->x0+4 > dx && box2->y1+4<y0+dy ) { y3=1; y2=box2->y1+1; i=2; break; }
492
+ if( box2->x1-box2->x0+4 > dx && box2->y0-4>y0 ) { y3=1; y2=box2->y0-1; i=2; break; }
493
+ if( box2->y1-box2->y0+4 > dy && box2->x1+4<x0+dx ) { x3=1; x2=box2->x1+1; i=1; break; }
494
+ if( box2->y1-box2->y0+4 > dy && box2->x0-4>x0 ) { x3=1; x2=box2->x0-1; i=1; break; }
495
+ }
496
+ } end_for_each(&(JOB->res.boxlist));
497
+ if(JOB->cfg.verbose)fprintf(stderr," i=%d",i);
498
+
499
+ if(JOB->cfg.verbose && i) fprintf(stderr," divide at %s x=%4d y=%4d dx=%4d dy=%4d",
500
+ ((i)?( (i==1)?"x":"y" ):"?"),x2,y2,x3,y3);
501
+ // divide horizontally if v-gap is thicker than h-gap
502
+ // and length is larger 5*width
503
+ if(i==1){ detect_lines2(p,x0,y0,x2-x0+1,dy,r+1);
504
+ return detect_lines2(p,x2,y0,x0+dx-x2+1,dy,r+1); }
505
+ // divide vertically
506
+ if(i==2){ detect_lines2(p,x0,y0,dx,y2-y0+1,r+1);
507
+ return detect_lines2(p,x0,y2,dx,y0+dy-y2+1,r+1);
508
+ }
509
+ }
510
+
511
+
512
+ if(JOB->cfg.verbose) if(dx<5 || dy<7)fprintf(stderr," empty box");
513
+ if(dx<5 || dy<7) return 0; // do not care about dust
514
+ if(JOB->cfg.verbose)fprintf(stderr, " box detected at %4d %4d %4d %4d",x0,y0,dx,dy);
515
+ if(JOB->tmp.ppo.p){
516
+ for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0 ,255,16);
517
+ for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0+dy-1,255,16);
518
+ for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0 ,y0+i ,255,16);
519
+ for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0+dx-1,y0+i ,255,16);
520
+ // writebmp("out10.bmp",p2,JOB->cfg.verbose); // colored should be better
521
+ }
522
+ return detect_lines1(p,x0-0*1,y0-0*2,dx+0*2,dy+0*3);
523
+
524
+ /*
525
+ struct tlines *lines = &JOB->res.lines;
526
+ i=lines->num; lines->num++;
527
+ lines->m1[i]=y0; lines->m2[i]=y0+5*dy/16;
528
+ lines->m3[i]=y0+12*dy/16; lines->m4[i]=y0+dy-1;
529
+ lines->x0[i]=x0; lines->x1[i]=x0+dx-1;
530
+ if(JOB->cfg.verbose)fprintf(stderr," - line= %d",lines->num);
531
+ return 0;
532
+ */
533
+ }
534
+
535
+ /* ToDo: herons algorithm for square root x=(x+y/x)/2 is more efficient
536
+ * than interval subdivision (?) (germ.: Intervallschachtelung)
537
+ * without using matlib
538
+ * see http://www.math.vt.edu/people/brown/doc/sqrts.pdf
539
+ */
540
+ int my_sqrt(int x){
541
+ int y0=0,y1=x,ym;
542
+ for (;y0<y1-1;){
543
+ ym=(y0+y1)/2;
544
+ if (ym*ym<x) y0=ym; else y1=ym;
545
+ }
546
+ return y0;
547
+ }
548
+
549
+ /*
550
+ ** Detect rotation angle (one for whole image)
551
+ ** old: longest text-line and determining the angle of this line.
552
+ *
553
+ * search right nearest neighbour of each box and average vectors
554
+ * to get the text orientation,
555
+ * upside down decision is not made here (I dont know how to do it)
556
+ * ToDo: set job->res.lines.{dx,dy}
557
+ * pass 1: get mean vector to nearest char
558
+ * pass 2: get mean vector to nearest char without outriders to pass 1
559
+ * extimate direction as (dx,dy,num)[pass]
560
+ * ToDo: estimate an error, boxes only work fine for zero-rotation
561
+ * for 45 degree use vectors, not boxes to get base line
562
+ */
563
+ #define INorm 1024 /* integer unit 1.0 */
564
+ int detect_rotation_angle(job_t *job){
565
+ struct box *box2, *box3,
566
+ *box_nn; /* nearest neighbour box */
567
+ int x2, y2, x3, y3, dist, mindist, pass,
568
+ rx=0, ry=0, re=0, // final result
569
+ /* to avoid 2nd run, wie store pairs in 2 different categories */
570
+ nn[4]={0,0,0,0}, /* num_pairs used for estimation [(pass-1)%2,pass%2] */
571
+ dx[4]={0,0,0,0}, /* x-component of rotation vector per pass */
572
+ dy[4]={0,0,0,0}, /* y-component of rotation vector per pass */
573
+ er[4]={INorm/4,0,0,0}; /* mean angle deviation to pass-1 (radius^2) */
574
+ // de; /* ToDo: absolute maximum error (dx^2+dy^2) */
575
+ // ToDo: next pass: go to bigger distances and reduce max error
576
+ // error is diff between passes? or diff of bottoms and top borders (?)
577
+
578
+ rx=1024; ry=0; // default
579
+ for (pass=0;pass<4;pass++) {
580
+ for_each_data(&(job->res.boxlist)) {
581
+ box2 = (struct box *)list_get_current(&(job->res.boxlist));
582
+ if (box2->c==PICTURE) continue;
583
+ /* subfunction probability of char */
584
+ // i?
585
+ // if (box2->x1 - box2->x0 < 3) continue; /* smallest font is 4x6 */
586
+ if (box2->y1 - box2->y0 < 4) continue;
587
+ /* set maximum possible distance */
588
+ box_nn=box2; // initial box to compare with
589
+
590
+ // ToDo: clustering or majority
591
+ // the algorithm is far from being perfect, pitfalls are likely
592
+ // but its better than the old algorithm, ToDo: database-rotated-images
593
+ mindist = job->src.p.x * job->src.p.x + job->src.p.y * job->src.p.y;
594
+ /* get middle point of the box */
595
+ x2 = (box2->x0 + box2->x1)/2;
596
+ y2 = (box2->y0 + box2->y1)/2;
597
+ re=0;
598
+ /* search for nearest neighbour box_nn[pass+1] of box_nn[pass] */
599
+ for_each_data(&(job->res.boxlist)) {
600
+ box3 = (struct box *)list_get_current(&(job->res.boxlist));
601
+ /* try to select only potential neighbouring chars */
602
+ /* select out all senseless combinations */
603
+ if (box3->c==PICTURE || box3==box2) continue;
604
+ x3 = (box3->x0 + box3->x1)/2;
605
+ y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
606
+ if (x3<x2) continue; /* simplify by going right only */
607
+ // through-away deviation of angles if > pass-1?
608
+ // scalprod max in direction, cross prod min in direction
609
+ // a,b (vectors): <a,b>^2/(|a|*|b|)^2 = 0(90deg)..0.5(45deg).. 1(0deg)
610
+ // * 1024 ??
611
+ if (pass>0) { // new variant = scalar product
612
+ // danger of int overflow, ToDo: use int fraction
613
+ re =(int) ((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
614
+ *(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
615
+ /(1.*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
616
+ *(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1])));
617
+ if (INorm-re>er[pass-1]) continue; // hits mean deviation
618
+ }
619
+ /* neighbours should have same order of size (?) */
620
+ if (3*(box3->y1-box3->y0+4) < 2*(box2->y1-box2->y0+1)) continue;
621
+ if (2*(box3->y1-box3->y0+1) > 3*(box2->y1-box2->y0+4)) continue;
622
+ if (2*(box3->x1-box3->x0+1) > 5*(box2->x1-box2->x0+4)) continue;
623
+ if (5*(box3->x1-box3->x0+4) < 2*(box2->x1-box2->x0+1)) continue;
624
+ /* should be in right range, Idea: center3 outside box2? noholes */
625
+ if ((x3<box2->x1-1) && (x3>box2->x0+1)
626
+ && (y3<box2->y1-1) && (y3>box2->y0+1)) continue;
627
+ // if chars are of different size, connect careful
628
+ if ( abs(x3-x2) > 2*(box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
629
+ if ( abs(y3-y2) > (box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
630
+ dist = (y3-y2)*(y3-y2) + (x3-x2)*(x3-x2);
631
+ // make distances in pass-1 directions shorter or continue if not in pass-1 range?
632
+ if (dist<9) continue; /* minimum distance^2 is 3^2 */
633
+ if (dist<mindist) { mindist=dist; box_nn=box3;}
634
+ // fprintf(stderr,"x y %d %d %d %d dist %d min %d\n",
635
+ // x2,y2,x3,y3,dist,mindist);
636
+ } end_for_each(&(job->res.boxlist));
637
+
638
+ if (box_nn==box2) continue; /* has no neighbour, next box */
639
+
640
+ box3=box_nn; dist=mindist;
641
+ x3 = (box3->x0 + box3->x1)/2;
642
+ y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
643
+ // dist = my_sqrt(1024*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2)));
644
+ // compare with first box
645
+ x2 = (box2->x0 + box2->x1)/2;
646
+ y2 = (box2->y0 + box2->y1)/2;
647
+ // if the high of neighbouring boxes differ, use min diff (y0,y1)
648
+ if (pass>0 && 16*abs(dy[pass-1]) < dx[pass-1]) // dont work for strong rot.
649
+ if (abs(box2->y1-box2->y0-box3->y1+box3->y0)>(box2->y1-box2->y0)/8) {
650
+ // ad eh ck ...
651
+ if (abs(box2->y1-box3->y1)<abs(y3-y2)) { y2=box2->y1; y3=box3->y1; }
652
+ // ag ep qu ...
653
+ if (abs(box2->y0-box3->y0)<abs(y3-y2)) { y2=box2->y0; y3=box3->y0; }
654
+ }
655
+ if (abs(x3-x2)<4) continue;
656
+ dx[pass]+=(x3-x2)*1024; /* normalized before averaging */
657
+ dy[pass]+=(y3-y2)*1024; /* 1024 is for the precision */
658
+ nn[pass]++;
659
+ if (pass>0) { // set error = mean deviation from pass -1
660
+ re = INorm-(int)((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
661
+ *(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
662
+ /((1.*(x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
663
+ *(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1]))
664
+ );
665
+ er[pass]+=re;
666
+ }
667
+ #if 0
668
+ if(JOB->cfg.verbose)
669
+ fprintf(stderr,"# next nb (x,y,dx,dy,re) %6d %6d %5d %5d %5d pass %d\n",
670
+ x2, y2, x3-x2, y3-y2, re, pass+1);
671
+ #endif
672
+ } end_for_each(&(job->res.boxlist));
673
+ if (!nn[pass]) break;
674
+ if (nn[pass]) {
675
+ /* meanvalues */
676
+ rx=dx[pass]/=nn[pass];
677
+ ry=dy[pass]/=nn[pass];
678
+ if (pass>0) er[pass]/=nn[pass];
679
+ }
680
+ if(JOB->cfg.verbose)
681
+ fprintf(stderr,"# rotation angle (x,y,maxr,num)"
682
+ " %6d %6d %6d %4d pass %d\n",
683
+ rx, ry, er[pass], nn[pass], pass+1);
684
+ }
685
+ if (abs(ry*100)>abs(rx*50))
686
+ fprintf(stderr,"<!-- gocr will fail, strong rotation angle detected -->\n");
687
+ /* ToDo: normalize to 2^10 bit (square fits to 32 it) */
688
+ JOB->res.lines.dx=rx;
689
+ JOB->res.lines.dy=ry;
690
+ return 0;
691
+ }
692
+
693
+ /* ----- detect lines --------------- */
694
+ int detect_text_lines(pix * pp, int mo) {
695
+
696
+ if (JOB->cfg.verbose)
697
+ fprintf(stderr, "# detect.c detect_text_lines (vvv=16 for more info) ");
698
+ if (mo & 4){
699
+ if (JOB->cfg.verbose) fprintf(stderr, "# zoning\n# ... ");
700
+ detect_lines2(pp, 0, 0, pp->x, pp->y, 0); // later replaced by better algo
701
+ } else
702
+ detect_lines1(pp, 0, 0, pp->x, pp->y); // old algo
703
+
704
+ if(JOB->cfg.verbose) fprintf(stderr,"\n");
705
+ return 0;
706
+ }
707
+
708
+
709
+ /* ----- adjust lines --------------- */
710
+ // rotation angle? JOB->res.lines.dy, .x0 removed later
711
+ // this is for cases, where m1..m4 is not very sure detected before
712
+ // chars are recognized
713
+ int adjust_text_lines(pix * pp, int mo) {
714
+ struct box *box2;
715
+ int *m, /* summ m1..m4, num_chars for m1..m4, min m1..m4, max. m1..m4 */
716
+ l, i, dy, dx, diff=0, y0, y1;
717
+
718
+ if ((l=JOB->res.lines.num)<2) return 0; // ???
719
+ if (JOB->cfg.verbose)
720
+ fprintf(stderr, "# adjust text lines ");
721
+ m=(int *)malloc(l*16*sizeof(int));
722
+ if (!m) { fprintf(stderr," malloc failed\n"); return 0;}
723
+ for (i=0;i<16*l;i++) m[i]=0; /* initialize */
724
+ dy=JOB->res.lines.dy; /* tan(alpha) of skewing */
725
+ dx=JOB->res.lines.dx; /* old: width of image */
726
+ // js: later skewing is replaced by one transformation of vectorized image
727
+
728
+ if (dx)
729
+ for_each_data(&(JOB->res.boxlist)) {
730
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
731
+ if (box2->line<=0) continue;
732
+ if (box2->num_ac<1) continue;
733
+ if (box2->wac[0]<95) continue;
734
+ if (box2->m2==0 || box2->y1<box2->m2) continue; // char outside line
735
+ if (box2->m3==4 || box2->y0>box2->m3) continue; // char outside line
736
+ y0=box2->y0-((box2->x1)*dy/dx); /* corrected by page skewing */
737
+ y1=box2->y1-((box2->x1)*dy/dx);
738
+ if (strchr("aemnr",(char)box2->tac[0])) { // cC vV sS oO ... is unsure!
739
+ m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
740
+ m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
741
+ if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
742
+ if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
743
+ if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
744
+ if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
745
+ }
746
+ if (strchr("bdhklABDEFGHIKLMNRT123456789",(char)box2->tac[0])) {
747
+ m[box2->line*16+0]+=y0; m[box2->line*16+4]++; // num m1
748
+ m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
749
+ if (m[box2->line*16+ 8]>y0) m[box2->line*16+ 8]=y0; /* min m1 */
750
+ if (m[box2->line*16+12]<y0) m[box2->line*16+12]=y0; /* max m1 */
751
+ if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
752
+ if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
753
+ }
754
+ if (strchr("gq",(char)box2->tac[0])) {
755
+ m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
756
+ m[box2->line*16+3]+=y1; m[box2->line*16+7]++; // num m4
757
+ if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
758
+ if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
759
+ if (m[box2->line*16+11]>y1) m[box2->line*16+11]=y1; /* min m4 */
760
+ if (m[box2->line*16+15]<y1) m[box2->line*16+15]=y1; /* max m4 */
761
+ }
762
+ } end_for_each(&(JOB->res.boxlist));
763
+
764
+ for (i=1;i<l;i++) {
765
+ diff=0; // show diff per line
766
+ if (m[i*16+4]) diff+=abs(JOB->res.lines.m1[i]-m[i*16+0]/m[i*16+4]);
767
+ if (m[i*16+5]) diff+=abs(JOB->res.lines.m2[i]-m[i*16+1]/m[i*16+5]);
768
+ if (m[i*16+6]) diff+=abs(JOB->res.lines.m3[i]-m[i*16+2]/m[i*16+6]);
769
+ if (m[i*16+7]) diff+=abs(JOB->res.lines.m4[i]-m[i*16+3]/m[i*16+7]);
770
+ /* recalculate sureness, empirically */
771
+ if (m[i*16+4]*m[i*16+5]*m[i*16+6]*m[i*16+7] > 0)
772
+ JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]+100)/2;
773
+ else
774
+ JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]*90)/100;
775
+ // set mean values of sure detected bounds (rounded precisely)
776
+ if ( m[i*16+4]) JOB->res.lines.m1[i]=(m[i*16+0]+m[i*16+4]/2)/m[i*16+4];
777
+ if ( m[i*16+5]) JOB->res.lines.m2[i]=(m[i*16+1]+m[i*16+5]/2)/m[i*16+5];
778
+ if ( m[i*16+6]) JOB->res.lines.m3[i]=(m[i*16+2]+m[i*16+6]/2)/m[i*16+6];
779
+ if ( m[i*16+7]) JOB->res.lines.m4[i]=(m[i*16+3]+m[i*16+7]/2)/m[i*16+7];
780
+ // care about very small fonts
781
+ if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+5]==0 && m[i*16+4])
782
+ JOB->res.lines.m2[i]=JOB->res.lines.m1[i]+2;
783
+ if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+4]==0 && m[i*16+5])
784
+ JOB->res.lines.m1[i]=JOB->res.lines.m2[i]-2;
785
+ if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+7]==0 && m[i*16+6])
786
+ JOB->res.lines.m4[i]=JOB->res.lines.m3[i]+2;
787
+ if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+6]==0 && m[i*16+7])
788
+ JOB->res.lines.m3[i]=JOB->res.lines.m4[i]-2;
789
+ if ( m[i*16+7]<1 &&
790
+ JOB->res.lines.m4[i]
791
+ <=JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4 )
792
+ JOB->res.lines.m4[i]=
793
+ JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4;
794
+ if ( m[i*16+7]<1 && m[i*16+12+2]>0 && // m4 < max.m3+..
795
+ JOB->res.lines.m4[i] < 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2 )
796
+ JOB->res.lines.m4[i] = 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2;
797
+ if (JOB->res.lines.m4[i]<=JOB->res.lines.m3[i])
798
+ JOB->res.lines.m4[i]= JOB->res.lines.m3[i]+1; /* 4x6 */
799
+
800
+ if (JOB->cfg.verbose & 17)
801
+ fprintf(stderr, "\n# line= %3d m= %4d %+3d %+3d %+3d "
802
+ " n= %2d %2d %2d %2d w= %3d diff= %d",
803
+ i, JOB->res.lines.m1[i],
804
+ JOB->res.lines.m2[i] - JOB->res.lines.m1[i],
805
+ JOB->res.lines.m3[i] - JOB->res.lines.m1[i],
806
+ JOB->res.lines.m4[i] - JOB->res.lines.m1[i],
807
+ m[i*16+4],m[i*16+5],m[i*16+6],m[i*16+7],
808
+ JOB->res.lines.wt[i], diff);
809
+ }
810
+ diff=0; // count adjusted chars
811
+ #if 1
812
+ if (dx)
813
+ for_each_data(&(JOB->res.boxlist)) {
814
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
815
+ if (box2->line<=0) continue;
816
+ /* check if box was on the wrong line, ToDo: search a better line */
817
+ if (2*box2->y0<2*JOB->res.lines.m1[box2->line]
818
+ -JOB->res.lines.m4[box2->line]
819
+ +JOB->res.lines.m1[box2->line]) box2->line=0;
820
+ if (2*box2->y1>2*JOB->res.lines.m4[box2->line]
821
+ +JOB->res.lines.m4[box2->line]
822
+ -JOB->res.lines.m1[box2->line]) box2->line=0;
823
+ /* do adjustments */
824
+ if (box2->num_ac>0
825
+ && box2->num_ac > 31 && box2->tac[0] < 127 /* islower(>256) may SIGSEGV */
826
+ && strchr("cCoOpPsSuUvVwWxXyYzZ",(char)box2->tac[0])) { // no_wchar
827
+ if (box2->y0-((box2->x1)*dy/dx)
828
+ < (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line])/2
829
+ && islower(box2->tac[0])
830
+ ) { setac(box2,toupper((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
831
+ if (box2->y0-((box2->x1)*dy/dx)
832
+ > (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line]+1)/2
833
+ && isupper(box2->tac[0])
834
+ ){ setac(box2,tolower((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
835
+ }
836
+ box2->m1=JOB->res.lines.m1[box2->line]+((box2->x1)*dy/dx);
837
+ box2->m2=JOB->res.lines.m2[box2->line]+((box2->x1)*dy/dx);
838
+ box2->m3=JOB->res.lines.m3[box2->line]+((box2->x1)*dy/dx);
839
+ box2->m4=JOB->res.lines.m4[box2->line]+((box2->x1)*dy/dx);
840
+ } end_for_each(&(JOB->res.boxlist));
841
+ #endif
842
+
843
+ free(m);
844
+ if(JOB->cfg.verbose) fprintf(stderr,"\n# changed_chars= %d\n",diff);
845
+ return(diff);
846
+ }
847
+
848
+ /* ---- measure mean character
849
+ * recalculate mean width and high after changes in boxlist
850
+ * ToDo: only within a Range?
851
+ */
852
+ int calc_average() {
853
+ int i = 0, x0, y0, x1, y1;
854
+ struct box *box4;
855
+
856
+ JOB->res.numC = 0;
857
+ JOB->res.sumY = 0;
858
+ JOB->res.sumX = 0;
859
+ for_each_data(&(JOB->res.boxlist)) {
860
+ box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
861
+ if( box4->c != PICTURE ){
862
+ x0 = box4->x0; x1 = box4->x1;
863
+ y0 = box4->y0; y1 = box4->y1;
864
+ i++;
865
+ if (JOB->res.avX * JOB->res.avY > 0) {
866
+ if (x1 - x0 + 1 > 4 * JOB->res.avX
867
+ && y1 - y0 + 1 > 4 * JOB->res.avY) continue; /* small picture */
868
+ if (4 * (y1 - y0 + 1) < JOB->res.avY || y1 - y0 < 2)
869
+ continue; // dots .,-_ etc.
870
+ }
871
+ if (x1 - x0 + 1 < 4
872
+ && y1 - y0 + 1 < 6 ) continue; /* dots etc */
873
+ JOB->res.sumX += x1 - x0 + 1;
874
+ JOB->res.sumY += y1 - y0 + 1;
875
+ JOB->res.numC++;
876
+ }
877
+ } end_for_each(&(JOB->res.boxlist));
878
+ if ( JOB->res.numC ) { /* avoid div 0 */
879
+ JOB->res.avY = (JOB->res.sumY+JOB->res.numC/2) / JOB->res.numC;
880
+ JOB->res.avX = (JOB->res.sumX+JOB->res.numC/2) / JOB->res.numC;
881
+ }
882
+ if (JOB->cfg.verbose){
883
+ fprintf(stderr, "# averages: mXmY= %d %d nC= %d n= %d\n",
884
+ JOB->res.avX, JOB->res.avY, JOB->res.numC, i);
885
+ }
886
+ return 0;
887
+ }
888
+
889
+
890
+ /* ---- analyse boxes, find pictures and mark (do this first!!!)
891
+ */
892
+ int detect_pictures(job_t *job) {
893
+ int i = 0, x0, y0, x1, y1, num_h;
894
+ struct box *box2, *box4;
895
+
896
+ if ( job->res.numC == 0 ) {
897
+ if (job->cfg.verbose) fprintf(stderr,
898
+ "# detect.c L%d Warning: numC=0\n", __LINE__);
899
+ return -1;
900
+ }
901
+ /* ToDo: set Y to uppercase mean value? */
902
+ job->res.avY = (job->res.sumY+job->res.numC/2) / job->res.numC;
903
+ job->res.avX = (job->res.sumX+job->res.numC/2) / job->res.numC;
904
+ /* ToDo: two highest volumes? crosses, on extreme volume + on border */
905
+ if (job->cfg.verbose)
906
+ fprintf(stderr, "# detect.c L%d pictures, frames, mXmY= %d %d ... ",
907
+ __LINE__, job->res.avX, job->res.avY);
908
+ for_each_data(&(job->res.boxlist)) {
909
+ box2 = (struct box *)list_get_current(&(job->res.boxlist));
910
+ if (box2->c == PICTURE) continue;
911
+ x0 = box2->x0; x1 = box2->x1;
912
+ y0 = box2->y0; y1 = box2->y1;
913
+
914
+ /* pictures could be of unusual size */
915
+ if (x1 - x0 + 1 > 4 * job->res.avX || y1 - y0 + 1 > 4 * job->res.avY) {
916
+ /* count objects on same baseline which could be chars */
917
+ /* else: big headlines could be misinterpreted as pictures */
918
+ num_h=0;
919
+ for_each_data(&(job->res.boxlist)) {
920
+ box4 = (struct box *)list_get_current(&(job->res.boxlist));
921
+ if (box4->c == PICTURE) continue;
922
+ if (box4->y1-box4->y0 > 2*(y1-y0)) continue;
923
+ if (2*(box4->y1-box4->y0) < y1-y0) continue;
924
+ if (box4->y0 > y0 + (y1-y0+1)/2
925
+ || box4->y0 < y0 - (y1-y0+1)/2
926
+ || box4->y1 > y1 + (y1-y0+1)/2
927
+ || box4->y1 < y1 - (y1-y0+1)/2) continue;
928
+ // ToDo: continue if numcross() only 1, example: |||IIIll|||
929
+ num_h++;
930
+ } end_for_each(&(job->res.boxlist));
931
+ if (num_h>4) continue;
932
+ box2->c = PICTURE;
933
+ i++;
934
+ }
935
+ /* ToDo: pictures could have low contrast=Sum((pixel(p,x,y)-160)^2) */
936
+ } end_for_each(&(job->res.boxlist));
937
+ // start second iteration
938
+ if (job->cfg.verbose) {
939
+ fprintf(stderr, " %d - boxes %d\n", i, job->res.numC-i);
940
+ }
941
+ calc_average();
942
+ return 0;
943
+ }