entangledstate-isbn 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (290) hide show
  1. data/README +1 -1
  2. data/Rakefile +0 -18
  3. data/VERSION +1 -0
  4. data/isbn.gemspec +290 -7
  5. data/lib/isbn.rb +6 -6
  6. data/src/gocr-0.48/.cvsignore +6 -0
  7. data/src/gocr-0.48/AUTHORS +7 -0
  8. data/src/gocr-0.48/BUGS +55 -0
  9. data/src/gocr-0.48/CREDITS +17 -0
  10. data/src/gocr-0.48/HISTORY +243 -0
  11. data/src/gocr-0.48/INSTALL +83 -0
  12. data/src/gocr-0.48/Makefile +193 -0
  13. data/src/gocr-0.48/Makefile.in +193 -0
  14. data/src/gocr-0.48/README +165 -0
  15. data/src/gocr-0.48/READMEde.txt +80 -0
  16. data/src/gocr-0.48/REMARK.txt +18 -0
  17. data/src/gocr-0.48/REVIEW +538 -0
  18. data/src/gocr-0.48/TODO +65 -0
  19. data/src/gocr-0.48/bin/.cvsignore +2 -0
  20. data/src/gocr-0.48/bin/create_db +38 -0
  21. data/src/gocr-0.48/bin/gocr.tcl +527 -0
  22. data/src/gocr-0.48/bin/gocr_chk.sh +44 -0
  23. data/src/gocr-0.48/configure +4689 -0
  24. data/src/gocr-0.48/configure.in +71 -0
  25. data/src/gocr-0.48/doc/.#Makefile.1.6 +39 -0
  26. data/src/gocr-0.48/doc/.cvsignore +2 -0
  27. data/src/gocr-0.48/doc/Makefile +39 -0
  28. data/src/gocr-0.48/doc/Makefile.in +39 -0
  29. data/src/gocr-0.48/doc/example.dtd +53 -0
  30. data/src/gocr-0.48/doc/example.xml +21 -0
  31. data/src/gocr-0.48/doc/examples.txt +67 -0
  32. data/src/gocr-0.48/doc/gocr.html +578 -0
  33. data/src/gocr-0.48/doc/unicode.txt +57 -0
  34. data/src/gocr-0.48/examples/.#Makefile.1.22 +166 -0
  35. data/src/gocr-0.48/examples/4x6.png +0 -0
  36. data/src/gocr-0.48/examples/4x6.txt +2 -0
  37. data/src/gocr-0.48/examples/5x7.png +0 -0
  38. data/src/gocr-0.48/examples/5x7.png.txt +2 -0
  39. data/src/gocr-0.48/examples/5x8.png +0 -0
  40. data/src/gocr-0.48/examples/5x8.png.txt +2 -0
  41. data/src/gocr-0.48/examples/Makefile +166 -0
  42. data/src/gocr-0.48/examples/color.fig +20 -0
  43. data/src/gocr-0.48/examples/ex.fig +16 -0
  44. data/src/gocr-0.48/examples/font.tex +22 -0
  45. data/src/gocr-0.48/examples/font1.tex +46 -0
  46. data/src/gocr-0.48/examples/font2.fig +27 -0
  47. data/src/gocr-0.48/examples/font_nw.tex +24 -0
  48. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  49. data/src/gocr-0.48/examples/handwrt1.txt +10 -0
  50. data/src/gocr-0.48/examples/inverse.fig +20 -0
  51. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  52. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  53. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +4 -0
  54. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a.txt +6 -0
  56. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-b.png.txt +4 -0
  58. data/src/gocr-0.48/examples/polish.tex +28 -0
  59. data/src/gocr-0.48/examples/rotate45.fig +14 -0
  60. data/src/gocr-0.48/examples/score +36 -0
  61. data/src/gocr-0.48/examples/text.tex +28 -0
  62. data/src/gocr-0.48/gocr.spec +143 -0
  63. data/src/gocr-0.48/gpl.html +537 -0
  64. data/src/gocr-0.48/include/.cvsignore +2 -0
  65. data/src/gocr-0.48/include/config.h +36 -0
  66. data/src/gocr-0.48/include/config.h.in +36 -0
  67. data/src/gocr-0.48/include/version.h +2 -0
  68. data/src/gocr-0.48/install-sh +3 -0
  69. data/src/gocr-0.48/make.bat +57 -0
  70. data/src/gocr-0.48/man/.cvsignore +2 -0
  71. data/src/gocr-0.48/man/Makefile +29 -0
  72. data/src/gocr-0.48/man/Makefile.in +29 -0
  73. data/src/gocr-0.48/man/man1/gocr.1 +166 -0
  74. data/src/gocr-0.48/src/.cvsignore +4 -0
  75. data/src/gocr-0.48/src/Makefile +132 -0
  76. data/src/gocr-0.48/src/Makefile.in +132 -0
  77. data/src/gocr-0.48/src/amiga.h +31 -0
  78. data/src/gocr-0.48/src/barcode.c +846 -0
  79. data/src/gocr-0.48/src/barcode.c.orig +593 -0
  80. data/src/gocr-0.48/src/barcode.h +11 -0
  81. data/src/gocr-0.48/src/box.c +372 -0
  82. data/src/gocr-0.48/src/database.c +462 -0
  83. data/src/gocr-0.48/src/detect.c +943 -0
  84. data/src/gocr-0.48/src/gocr.c +373 -0
  85. data/src/gocr-0.48/src/gocr.h +288 -0
  86. data/src/gocr-0.48/src/jconv.c +168 -0
  87. data/src/gocr-0.48/src/job.c +84 -0
  88. data/src/gocr-0.48/src/lines.c +350 -0
  89. data/src/gocr-0.48/src/list.c +334 -0
  90. data/src/gocr-0.48/src/list.h +90 -0
  91. data/src/gocr-0.48/src/ocr0.c +6756 -0
  92. data/src/gocr-0.48/src/ocr0.h +63 -0
  93. data/src/gocr-0.48/src/ocr0n.c +1475 -0
  94. data/src/gocr-0.48/src/ocr1.c +85 -0
  95. data/src/gocr-0.48/src/ocr1.h +3 -0
  96. data/src/gocr-0.48/src/otsu.c +289 -0
  97. data/src/gocr-0.48/src/otsu.h +23 -0
  98. data/src/gocr-0.48/src/output.c +289 -0
  99. data/src/gocr-0.48/src/output.h +37 -0
  100. data/src/gocr-0.48/src/pcx.c +153 -0
  101. data/src/gocr-0.48/src/pcx.h +9 -0
  102. data/src/gocr-0.48/src/pgm2asc.c +2893 -0
  103. data/src/gocr-0.48/src/pgm2asc.h +105 -0
  104. data/src/gocr-0.48/src/pixel.c +537 -0
  105. data/src/gocr-0.48/src/pnm.c +533 -0
  106. data/src/gocr-0.48/src/pnm.h +35 -0
  107. data/src/gocr-0.48/src/progress.c +87 -0
  108. data/src/gocr-0.48/src/progress.h +42 -0
  109. data/src/gocr-0.48/src/remove.c +703 -0
  110. data/src/gocr-0.48/src/tga.c +87 -0
  111. data/src/gocr-0.48/src/tga.h +6 -0
  112. data/src/gocr-0.48/src/unicode.c +1314 -0
  113. data/src/gocr-0.48/src/unicode.h +1257 -0
  114. data/src/jpeg-7/Makefile.am +133 -0
  115. data/src/jpeg-7/Makefile.in +1089 -0
  116. data/src/jpeg-7/README +322 -0
  117. data/src/jpeg-7/aclocal.m4 +8990 -0
  118. data/src/jpeg-7/ansi2knr.1 +36 -0
  119. data/src/jpeg-7/ansi2knr.c +739 -0
  120. data/src/jpeg-7/cderror.h +132 -0
  121. data/src/jpeg-7/cdjpeg.c +181 -0
  122. data/src/jpeg-7/cdjpeg.h +187 -0
  123. data/src/jpeg-7/change.log +270 -0
  124. data/src/jpeg-7/cjpeg.1 +325 -0
  125. data/src/jpeg-7/cjpeg.c +616 -0
  126. data/src/jpeg-7/ckconfig.c +402 -0
  127. data/src/jpeg-7/coderules.txt +118 -0
  128. data/src/jpeg-7/config.guess +1561 -0
  129. data/src/jpeg-7/config.sub +1686 -0
  130. data/src/jpeg-7/configure +17139 -0
  131. data/src/jpeg-7/configure.ac +317 -0
  132. data/src/jpeg-7/depcomp +630 -0
  133. data/src/jpeg-7/djpeg.1 +251 -0
  134. data/src/jpeg-7/djpeg.c +617 -0
  135. data/src/jpeg-7/example.c +433 -0
  136. data/src/jpeg-7/filelist.txt +215 -0
  137. data/src/jpeg-7/install-sh +520 -0
  138. data/src/jpeg-7/install.txt +1097 -0
  139. data/src/jpeg-7/jaricom.c +148 -0
  140. data/src/jpeg-7/jcapimin.c +282 -0
  141. data/src/jpeg-7/jcapistd.c +161 -0
  142. data/src/jpeg-7/jcarith.c +921 -0
  143. data/src/jpeg-7/jccoefct.c +453 -0
  144. data/src/jpeg-7/jccolor.c +459 -0
  145. data/src/jpeg-7/jcdctmgr.c +482 -0
  146. data/src/jpeg-7/jchuff.c +1612 -0
  147. data/src/jpeg-7/jcinit.c +65 -0
  148. data/src/jpeg-7/jcmainct.c +293 -0
  149. data/src/jpeg-7/jcmarker.c +667 -0
  150. data/src/jpeg-7/jcmaster.c +770 -0
  151. data/src/jpeg-7/jcomapi.c +106 -0
  152. data/src/jpeg-7/jconfig.bcc +48 -0
  153. data/src/jpeg-7/jconfig.cfg +45 -0
  154. data/src/jpeg-7/jconfig.dj +38 -0
  155. data/src/jpeg-7/jconfig.mac +43 -0
  156. data/src/jpeg-7/jconfig.manx +43 -0
  157. data/src/jpeg-7/jconfig.mc6 +52 -0
  158. data/src/jpeg-7/jconfig.sas +43 -0
  159. data/src/jpeg-7/jconfig.st +42 -0
  160. data/src/jpeg-7/jconfig.txt +155 -0
  161. data/src/jpeg-7/jconfig.vc +45 -0
  162. data/src/jpeg-7/jconfig.vms +37 -0
  163. data/src/jpeg-7/jconfig.wat +38 -0
  164. data/src/jpeg-7/jcparam.c +632 -0
  165. data/src/jpeg-7/jcprepct.c +358 -0
  166. data/src/jpeg-7/jcsample.c +545 -0
  167. data/src/jpeg-7/jctrans.c +381 -0
  168. data/src/jpeg-7/jdapimin.c +396 -0
  169. data/src/jpeg-7/jdapistd.c +275 -0
  170. data/src/jpeg-7/jdarith.c +762 -0
  171. data/src/jpeg-7/jdatadst.c +151 -0
  172. data/src/jpeg-7/jdatasrc.c +212 -0
  173. data/src/jpeg-7/jdcoefct.c +736 -0
  174. data/src/jpeg-7/jdcolor.c +396 -0
  175. data/src/jpeg-7/jdct.h +393 -0
  176. data/src/jpeg-7/jddctmgr.c +382 -0
  177. data/src/jpeg-7/jdhuff.c +1309 -0
  178. data/src/jpeg-7/jdinput.c +384 -0
  179. data/src/jpeg-7/jdmainct.c +512 -0
  180. data/src/jpeg-7/jdmarker.c +1360 -0
  181. data/src/jpeg-7/jdmaster.c +663 -0
  182. data/src/jpeg-7/jdmerge.c +400 -0
  183. data/src/jpeg-7/jdpostct.c +290 -0
  184. data/src/jpeg-7/jdsample.c +361 -0
  185. data/src/jpeg-7/jdtrans.c +136 -0
  186. data/src/jpeg-7/jerror.c +252 -0
  187. data/src/jpeg-7/jerror.h +304 -0
  188. data/src/jpeg-7/jfdctflt.c +174 -0
  189. data/src/jpeg-7/jfdctfst.c +230 -0
  190. data/src/jpeg-7/jfdctint.c +4348 -0
  191. data/src/jpeg-7/jidctflt.c +242 -0
  192. data/src/jpeg-7/jidctfst.c +368 -0
  193. data/src/jpeg-7/jidctint.c +5137 -0
  194. data/src/jpeg-7/jinclude.h +91 -0
  195. data/src/jpeg-7/jmemansi.c +167 -0
  196. data/src/jpeg-7/jmemdos.c +638 -0
  197. data/src/jpeg-7/jmemdosa.asm +379 -0
  198. data/src/jpeg-7/jmemmac.c +289 -0
  199. data/src/jpeg-7/jmemmgr.c +1118 -0
  200. data/src/jpeg-7/jmemname.c +276 -0
  201. data/src/jpeg-7/jmemnobs.c +109 -0
  202. data/src/jpeg-7/jmemsys.h +198 -0
  203. data/src/jpeg-7/jmorecfg.h +369 -0
  204. data/src/jpeg-7/jpegint.h +395 -0
  205. data/src/jpeg-7/jpeglib.h +1135 -0
  206. data/src/jpeg-7/jpegtran.1 +272 -0
  207. data/src/jpeg-7/jpegtran.c +546 -0
  208. data/src/jpeg-7/jquant1.c +856 -0
  209. data/src/jpeg-7/jquant2.c +1310 -0
  210. data/src/jpeg-7/jutils.c +179 -0
  211. data/src/jpeg-7/jversion.h +14 -0
  212. data/src/jpeg-7/libjpeg.map +4 -0
  213. data/src/jpeg-7/libjpeg.txt +3067 -0
  214. data/src/jpeg-7/ltmain.sh +8406 -0
  215. data/src/jpeg-7/makcjpeg.st +36 -0
  216. data/src/jpeg-7/makdjpeg.st +36 -0
  217. data/src/jpeg-7/makeadsw.vc6 +77 -0
  218. data/src/jpeg-7/makeasln.vc9 +33 -0
  219. data/src/jpeg-7/makecdep.vc6 +82 -0
  220. data/src/jpeg-7/makecdsp.vc6 +130 -0
  221. data/src/jpeg-7/makecmak.vc6 +159 -0
  222. data/src/jpeg-7/makecvcp.vc9 +186 -0
  223. data/src/jpeg-7/makeddep.vc6 +82 -0
  224. data/src/jpeg-7/makeddsp.vc6 +130 -0
  225. data/src/jpeg-7/makedmak.vc6 +159 -0
  226. data/src/jpeg-7/makedvcp.vc9 +186 -0
  227. data/src/jpeg-7/makefile.ansi +220 -0
  228. data/src/jpeg-7/makefile.bcc +291 -0
  229. data/src/jpeg-7/makefile.dj +226 -0
  230. data/src/jpeg-7/makefile.manx +220 -0
  231. data/src/jpeg-7/makefile.mc6 +255 -0
  232. data/src/jpeg-7/makefile.mms +224 -0
  233. data/src/jpeg-7/makefile.sas +258 -0
  234. data/src/jpeg-7/makefile.unix +234 -0
  235. data/src/jpeg-7/makefile.vc +217 -0
  236. data/src/jpeg-7/makefile.vms +142 -0
  237. data/src/jpeg-7/makefile.wat +239 -0
  238. data/src/jpeg-7/makejdep.vc6 +423 -0
  239. data/src/jpeg-7/makejdsp.vc6 +285 -0
  240. data/src/jpeg-7/makejdsw.vc6 +29 -0
  241. data/src/jpeg-7/makejmak.vc6 +425 -0
  242. data/src/jpeg-7/makejsln.vc9 +17 -0
  243. data/src/jpeg-7/makejvcp.vc9 +328 -0
  244. data/src/jpeg-7/makeproj.mac +213 -0
  245. data/src/jpeg-7/makerdep.vc6 +6 -0
  246. data/src/jpeg-7/makerdsp.vc6 +78 -0
  247. data/src/jpeg-7/makermak.vc6 +110 -0
  248. data/src/jpeg-7/makervcp.vc9 +133 -0
  249. data/src/jpeg-7/maketdep.vc6 +43 -0
  250. data/src/jpeg-7/maketdsp.vc6 +122 -0
  251. data/src/jpeg-7/maketmak.vc6 +131 -0
  252. data/src/jpeg-7/maketvcp.vc9 +178 -0
  253. data/src/jpeg-7/makewdep.vc6 +6 -0
  254. data/src/jpeg-7/makewdsp.vc6 +78 -0
  255. data/src/jpeg-7/makewmak.vc6 +110 -0
  256. data/src/jpeg-7/makewvcp.vc9 +133 -0
  257. data/src/jpeg-7/makljpeg.st +68 -0
  258. data/src/jpeg-7/maktjpeg.st +30 -0
  259. data/src/jpeg-7/makvms.opt +4 -0
  260. data/src/jpeg-7/missing +376 -0
  261. data/src/jpeg-7/rdbmp.c +439 -0
  262. data/src/jpeg-7/rdcolmap.c +253 -0
  263. data/src/jpeg-7/rdgif.c +38 -0
  264. data/src/jpeg-7/rdjpgcom.1 +63 -0
  265. data/src/jpeg-7/rdjpgcom.c +515 -0
  266. data/src/jpeg-7/rdppm.c +459 -0
  267. data/src/jpeg-7/rdrle.c +387 -0
  268. data/src/jpeg-7/rdswitch.c +365 -0
  269. data/src/jpeg-7/rdtarga.c +500 -0
  270. data/src/jpeg-7/structure.txt +945 -0
  271. data/src/jpeg-7/testimg.bmp +0 -0
  272. data/src/jpeg-7/testimg.jpg +0 -0
  273. data/src/jpeg-7/testimg.ppm +4 -0
  274. data/src/jpeg-7/testimgp.jpg +0 -0
  275. data/src/jpeg-7/testorig.jpg +0 -0
  276. data/src/jpeg-7/testprog.jpg +0 -0
  277. data/src/jpeg-7/transupp.c +1533 -0
  278. data/src/jpeg-7/transupp.h +205 -0
  279. data/src/jpeg-7/usage.txt +605 -0
  280. data/src/jpeg-7/wizard.txt +211 -0
  281. data/src/jpeg-7/wrbmp.c +442 -0
  282. data/src/jpeg-7/wrgif.c +399 -0
  283. data/src/jpeg-7/wrjpgcom.1 +103 -0
  284. data/src/jpeg-7/wrjpgcom.c +583 -0
  285. data/src/jpeg-7/wrppm.c +269 -0
  286. data/src/jpeg-7/wrrle.c +305 -0
  287. data/src/jpeg-7/wrtarga.c +253 -0
  288. metadata +287 -6
  289. data/LICENSE +0 -20
  290. data/VERSION.yml +0 -4
@@ -0,0 +1,168 @@
1
+ /* OCR Aug00 JS
2
+ // PGM gray ASCII=P2 RAW=P5
3
+ // PPM RGB ASCII=P3 RAW=P6
4
+ // PBM B/W ASCII=P1 RAW=P4
5
+ // ToDo:
6
+ // - pbm-raw to pgm also for x!=0 (mod 8)
7
+ // v0.01 bug eliminated
8
+ // v0.02 convert renamed into jconv because ImageMagick uses same name
9
+ // v0.03 code review bbg
10
+ // program is not used anymore, use "convert -verbose -crop 0x0+1+1" instead
11
+ */
12
+
13
+ // #include <iostream.h>
14
+ #include "config.h"
15
+ #include <stdio.h>
16
+ #include <stdlib.h>
17
+ #include <assert.h>
18
+ #include <string.h>
19
+ #include "pnm.h"
20
+ #ifdef HAVE_PAM_H
21
+ # include <pam.h>
22
+ #endif
23
+ #include "pcx.h"
24
+ #include "tga.h"
25
+
26
+ void help( void ) {
27
+ printf("jconv version Aug2000 JS (pnm-raw,pcx8,tga24)\n"
28
+ "use: jconv [options] ?infile.pnm? ?outfile.pgm? ?ox? ?oy? ?dx? ?dy?\n"
29
+ "options: -shrink -pbm -? -help\n"
30
+ "example: jconv -shrink -pbm font.pbm font.pbm 0 0 0 0\n");
31
+ exit(1);
32
+ }
33
+
34
+ int main(int argn, char *argv[])
35
+ {
36
+ char *inam, *onam;
37
+ pix bild;
38
+ int ox, oy, dx, dy, x, y, i, vvv = 0;
39
+
40
+ #ifdef HAVE_PAM_H
41
+ pnm_init(&argn, argv);
42
+ #endif
43
+ // skip options
44
+ for (i = 1; i < argn; i++) {
45
+ if (argv[i][0] != '-')
46
+ break;
47
+ if (!strcmp(argv[i], "-?"))
48
+ help();
49
+ else if (!strcmp(argv[i], "-help"))
50
+ help();
51
+ else if (!strcmp(argv[i], "-shrink"))
52
+ vvv |= 2;
53
+ else if (!strcmp(argv[i], "-pbm"))
54
+ vvv |= 4;
55
+ else
56
+ printf("unknown option: %s\n", argv[i]);
57
+ }
58
+
59
+ if (argn - i != 6)
60
+ help();
61
+ inam = argv[i++];
62
+ onam = argv[i++];
63
+ ox = atoi(argv[i++]);
64
+ oy = atoi(argv[i++]);
65
+ dx = atoi(argv[i++]);
66
+ dy = atoi(argv[i++]);
67
+ printf("# in=%s out=%s offs=%d,%d len=%d,%d vvv=%d\n",
68
+ inam, onam, ox, oy, dx, dy, vvv);
69
+
70
+ // ----- read picture
71
+ if (strstr(inam, ".pbm") ||
72
+ strstr(inam, ".pgm") ||
73
+ strstr(inam, ".ppm") ||
74
+ strstr(inam, ".pnm") ||
75
+ strstr(inam, ".pam"))
76
+ readpgm(inam, &bild, 1);
77
+ else if (strstr(inam, ".pcx"))
78
+ readpcx(inam, &bild, 1);
79
+ else if (strstr(inam, ".tga"))
80
+ readtga(inam, &bild, ((vvv > 1) ? 0 : 1));
81
+ else {
82
+ printf("Error: unknown suffix\n");
83
+ exit(1);
84
+ }
85
+ if (ox < 0 || ox >= bild.x)
86
+ ox = 0;
87
+ if (oy < 0 || ox >= bild.y)
88
+ oy = 0;
89
+ if (dx <= 0 || ox + dx > bild.x)
90
+ dx = bild.x - ox;
91
+ if (dy <= 0 || oy + dy > bild.y)
92
+ dy = bild.y - oy;
93
+ if ((vvv & 2) == 2 && bild.bpp == 1) { // -shrink
94
+ int x, y;
95
+ printf("# shrinking PGM: offs=%d,%d len=%d,%d\n", ox, oy, dx, dy);
96
+ for (y = 0; y < dy; y++) { // shrink upper border
97
+ for (x = 0; x < dx; x++)
98
+ if (bild.p[x + ox + (y + oy) * bild.x] < 127)
99
+ break;
100
+ if (x < dx) {
101
+ if (y > 0)
102
+ y--;
103
+ oy += y;
104
+ dy -= y;
105
+ break;
106
+ }
107
+ }
108
+ for (y = 0; y < dy; y++) { // shrink lower border
109
+ for (x = 0; x < dx; x++)
110
+ if (bild.p[ox + x + (oy + dy - y - 1) * bild.x] < 127)
111
+ break;
112
+ if (x < dx) {
113
+ if (y > 0)
114
+ y--;
115
+ dy -= y;
116
+ break;
117
+ }
118
+ }
119
+ for (x = 0; x < dx; x++) { // shrink left border
120
+ for (y = 0; y < dy; y++)
121
+ if (bild.p[x + ox + (y + oy) * bild.x] < 127)
122
+ break;
123
+ if (y < dy) {
124
+ if (x > 0)
125
+ x--;
126
+ ox += x;
127
+ dx -= x;
128
+ break;
129
+ }
130
+ }
131
+ for (x = 0; x < dx; x++) { // shrink right border
132
+ for (y = 0; y < dy; y++)
133
+ if (bild.p[ox + dx - x - 1 + (oy + y) * bild.x] < 127)
134
+ break;
135
+ if (y < dy) {
136
+ if (x > 0)
137
+ x--;
138
+ dx -= x;
139
+ break;
140
+ }
141
+ }
142
+ }
143
+ printf("# final dimension: offs=%d,%d len=%d,%d bpp=%d\n",
144
+ ox, oy, dx, dy, bild.bpp);
145
+
146
+ /* bbg: could be changed to memmoves */
147
+ // ---- new size
148
+ for (y = 0; y < dy; y++)
149
+ for (x = 0; x < dx; x++)
150
+ for (i = 0; i < 3; i++)
151
+ bild.p[i + bild.bpp * (x + dx * y)] =
152
+ bild.p[i + bild.bpp * (x + ox + (y + oy) * bild.x)];
153
+ bild.x = dx;
154
+ bild.y = dy;
155
+ // ---- write internal picture of textsite
156
+ printf("# write %s\n", onam);
157
+ if (strstr(onam, ".pbm"))
158
+ writepbm(onam, &bild);
159
+ else if (strstr(onam, ".pgm"))
160
+ writepgm(onam, &bild);
161
+ else if (strstr(onam, ".ppm"))
162
+ writeppm(onam, &bild);
163
+ else if (strstr(onam, ".pnm"))
164
+ writepgm(onam, &bild);
165
+ else
166
+ printf("Error: unknown suffix");
167
+ free( bild.p );
168
+ }
@@ -0,0 +1,84 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2006 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for email address */
20
+
21
+ #include "pgm2asc.h"
22
+ #include "gocr.h"
23
+
24
+ /* initialize job structure */
25
+ void job_init(job_t *job) {
26
+ /* init source */
27
+ job->src.fname = "-";
28
+ /* FIXME jb: init pix */
29
+ job->src.p.p = NULL;
30
+
31
+ /* init results */
32
+ list_init( &job->res.boxlist );
33
+ list_init( &job->res.linelist );
34
+ job->res.avX = 5;
35
+ job->res.avY = 8;
36
+ job->res.sumX = 0;
37
+ job->res.sumY = 0;
38
+ job->res.numC = 0;
39
+ job->res.lines.dy=0;
40
+ job->res.lines.num=0;
41
+
42
+ /* init temporaries */
43
+ list_init( &job->tmp.dblist );
44
+ job->tmp.n_run = 0;
45
+ /* FIXME jb: init ppo */
46
+ job->tmp.ppo.p = NULL;
47
+ job->tmp.ppo.x = 0;
48
+ job->tmp.ppo.y = 0;
49
+
50
+ /* init cfg */
51
+ job->cfg.cs = 0;
52
+ job->cfg.spc = 0;
53
+ job->cfg.mode = 0;
54
+ job->cfg.dust_size = -1; /* auto detect */
55
+ job->cfg.only_numbers = 0;
56
+ job->cfg.verbose = 0;
57
+ job->cfg.out_format = UTF8; /* old: ISO8859_1; */
58
+ job->cfg.lc = "_";
59
+ job->cfg.db_path = (char*)NULL;
60
+ job->cfg.cfilter = (char*)NULL;
61
+ job->cfg.certainty = 95;
62
+ job->cfg.unrec_marker = "_";
63
+ }
64
+
65
+ /* free job structure */
66
+ void job_free(job_t *job) {
67
+
68
+ /* if tmp is just a copy of the pointer to the original image */
69
+ if (job->tmp.ppo.p==job->src.p.p) job->tmp.ppo.p=NULL;
70
+
71
+ /* FIMXE jb: free lists
72
+ * list_free( &job->res.linelist );
73
+ * list_free( &job->tmp.dblist );
74
+ */
75
+
76
+ list_and_data_free(&(job->res.boxlist), (void (*)(void *))free_box);
77
+
78
+ /* FIXME jb: free pix */
79
+ if (job->src.p.p) { free(job->src.p.p); job->src.p.p=NULL; }
80
+
81
+ /* FIXME jb: free pix */
82
+ if (job->tmp.ppo.p) { free(job->tmp.ppo.p); job->tmp.ppo.p=NULL; }
83
+
84
+ }
@@ -0,0 +1,350 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2009 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+ */
21
+
22
+ #include <stdlib.h>
23
+ #include <stdio.h>
24
+ #include <string.h>
25
+ #include <limits.h>
26
+ #include <assert.h>
27
+ #include "pgm2asc.h"
28
+ #include "gocr.h"
29
+ #include "unicode.h"
30
+
31
+ const char *getTextLine (int line) {
32
+ int i;
33
+ Element *elem;
34
+
35
+ if (line < 0 || line > list_total(&(JOB->res.linelist)))
36
+ return NULL;
37
+
38
+ for ( i = 0, elem = JOB->res.linelist.start.next; i < line && elem != NULL; i++ )
39
+ elem = elem->next;
40
+
41
+ if ( elem != NULL )
42
+ return (const char *)elem->data;
43
+
44
+ return NULL;
45
+ }
46
+
47
+ void free_textlines(void) {
48
+ for_each_data(&(JOB->res.linelist)) {
49
+ if (list_get_current(&(JOB->res.linelist)))
50
+ free(list_get_current(&(JOB->res.linelist)));
51
+ } end_for_each(&(JOB->res.linelist));
52
+ list_free(&(JOB->res.linelist));
53
+ }
54
+
55
+ /* append a string (s1) to the string buffer (buffer) of length (len)
56
+ * if buffer is to small or len==0 realloc buffer, len+=512
57
+ */
58
+ char *append_to_line(char *buffer, const char *s1, int *len) {
59
+ char *temp;
60
+ int slen=0, alen;
61
+ if( s1==NULL || s1[0] == 0 ){
62
+ fprintf(stderr,"\n#BUG: appending 0 to a line makes no sense!");
63
+ return buffer;
64
+ }
65
+ if ( *len>0 ) slen= strlen(buffer); // used buffer
66
+ alen = strlen(s1);
67
+ if ( slen+alen+1 >= *len ) {
68
+ *len+=512;
69
+ temp = (char *)realloc(buffer, *len);
70
+ if( !temp ) { fprintf(stderr,"realloc failed!\n"); *len-=512; return buffer; }
71
+ else buffer = temp; // buffer successfull enlarged
72
+ }
73
+ temp = buffer + slen; // end of buffered string
74
+ memcpy(temp,s1,alen+1); // copy including end sign '\0'
75
+ return buffer;
76
+ }
77
+
78
+ int calc_median_gap(struct tlines * lines) {
79
+ int gaps[MAXlines], l;
80
+ if (lines->num<2) return 0;
81
+ for (l = 0; l < lines->num - 1; l++)
82
+ gaps[l] = lines->m2[l + 1] - lines->m3[l];
83
+ qsort(gaps, lines->num - 1, sizeof(gaps[0]), intcompare);
84
+ return gaps[(lines->num - 1) / 2];
85
+ }
86
+
87
+ /*
88
+ * Return the indent in pixels of the least-indented line.
89
+ * Will be subtracted as base_indent to avoid negativ indent.
90
+ *
91
+ * This is adjusted to account for an angle on the page as
92
+ * a whole. For instance, if the page is rotated clockwise,
93
+ * lower lines may be physically closer to the left edge
94
+ * than higher lines that are logically less indented.
95
+ * We rotate around (0,0). Note that this rotation could
96
+ * rotate lines "off the left margin", leading to a negative
97
+ * indent.
98
+ *
99
+ * boxlist -- list of character boxes.
100
+ * dx, dy -- rotation angle as vector
101
+ */
102
+ int get_least_line_indent(List * boxlist, int dx, int dy) {
103
+ int min_indent = INT_MAX;
104
+ int adjusted_indent;
105
+ struct box * box2;
106
+ if (JOB->cfg.verbose)
107
+ fprintf(stderr, "get_least_line_indent: rot.vector dxdy %d %d\n",
108
+ dx, dy);
109
+ for_each_data(boxlist) {
110
+ box2 = (struct box *)list_get_current(boxlist);
111
+ /* if num == -1, indicates this is a space or newline box,
112
+ * inserted in list_insert_spaces. */
113
+ if (box2->num != -1) {
114
+ adjusted_indent = box2->x0;
115
+ if (dx) adjusted_indent += box2->y0 * dy / dx;
116
+ if (adjusted_indent < min_indent) {
117
+ min_indent = adjusted_indent;
118
+ if (dy!=0 && JOB->cfg.verbose)
119
+ fprintf(stderr,
120
+ "# Line %2d, unadjusted xy %3d %3d, adjusted x %2d\n",
121
+ box2->line, box2->x0, box2->y0, adjusted_indent);
122
+ }
123
+ }
124
+ } end_for_each(boxlist);
125
+ if (JOB->cfg.verbose)
126
+ fprintf(stderr, "# Minimum adjusted x: %d (min_indent)\n", min_indent);
127
+ return min_indent;
128
+ }
129
+
130
+ /* collect all the chars from the box tree and write them to a string buffer
131
+ mo is the mode: mode&8 means, use chars even if unsure recognized
132
+ ToDo: store full text(?), store decoded text+boxes+position chars (v0.4)
133
+ (HTML,UTF,ASCII,XML), not wchar incl. descriptions (at<95% in red)
134
+ remove decode(*c, job->cfg.out_format) from gocr.c!
135
+ XML add alternate-tags, format tags and position tags
136
+ ToDo: better output XML to stdout instead of circumstantial store to lines
137
+ not all texts/images follow the line concept?
138
+ Better use a tree of objects where leafes are chars instead of simple list.
139
+ Chars or objects are taken into account. Objects can be text strings
140
+ or XML strings.
141
+ */
142
+ void store_boxtree_lines(int mo) {
143
+ char *buffer; /* temp buffer for text */
144
+ int i = 0, j = 0;
145
+ int len = 1024; // initial buffer length for text line
146
+ struct box *box2;
147
+ int median_gap = 0;
148
+ int max_single_space_gap = 0;
149
+ struct tlines line_info;
150
+ int line, line_gap, oldline=-1;
151
+ int left_margin;
152
+ int i1=0, i2=0;
153
+
154
+ buffer = (char *)malloc(len);
155
+ if ( !buffer ) {
156
+ fprintf(stderr,"malloc failed!\n"); // ToDo: index_to_error_list
157
+ return;
158
+ }
159
+ *buffer = 0;
160
+
161
+ if ( JOB->cfg.verbose&1 )
162
+ fprintf(stderr,"# store boxtree to lines ...");
163
+
164
+ /* wew: calculate the median line gap, to determine line spacing
165
+ * for the text output. The line gap used is between one line's
166
+ * m3 (baseline) and the next line's m2 (height of non-rising
167
+ * lowercase). We use these lines as they are the least likely
168
+ * to vary according to actual character content of lines.
169
+ */
170
+ median_gap = calc_median_gap(&JOB->res.lines);
171
+ if (median_gap <= 0) {
172
+ fprintf(stderr, "# Warning: non-positive median line gap of %d\n",
173
+ median_gap);
174
+ median_gap = 8;
175
+ max_single_space_gap = 12; /* arbitrary */
176
+ } else {
177
+ max_single_space_gap = median_gap * 7 / 4;
178
+ }
179
+
180
+ // Will be subtracted as base_indent to avoid negativ indent.
181
+ left_margin = get_least_line_indent(&JOB->res.boxlist,
182
+ JOB->res.lines.dx,
183
+ JOB->res.lines.dy);
184
+
185
+ if (JOB->cfg.out_format==XML) { /* subject of change */
186
+ char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
187
+ /* output lot of usefull information for XML filter */
188
+ sprintf(s1,"<page x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\">\n",
189
+ 0,0,0,0);
190
+ buffer=append_to_line(buffer,s1,&len);
191
+ sprintf(s1,"<block x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\">\n",
192
+ 0,0,0,0);
193
+ buffer=append_to_line(buffer,s1,&len);
194
+ }
195
+
196
+ for_each_data(&(JOB->res.boxlist)) {
197
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
198
+ line = box2->line;
199
+ line_info = JOB->res.lines;
200
+ /* reset the output char if certainty is below the limit v0.44 */
201
+ if (box2->num_ac && box2->wac[0]<JOB->cfg.certainty) box2->c=UNKNOWN;
202
+ if (line!=oldline) {
203
+ if (JOB->cfg.out_format==XML && oldline>-1) { /* subject of change */
204
+ buffer=append_to_line(buffer,"</line>\n",&len);
205
+ list_app( &(JOB->res.linelist), (void *)strdup(buffer) ); // wcsdup
206
+ memset(buffer, 0, len);
207
+ j=0; // reset counter for new line
208
+ }
209
+ if (JOB->cfg.out_format==XML) { /* subject of change */
210
+ char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
211
+ /* output lot of usefull information for XML filter */
212
+ sprintf(s1,"<line x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" value=\"%d\">\n",
213
+ line_info.x0[line],line_info.m1[line],
214
+ line_info.x1[line]-line_info.x0[line]+1,
215
+ line_info.m4[line]-line_info.m1[line],line);
216
+ buffer=append_to_line(buffer,s1,&len);
217
+ }
218
+ oldline=line;
219
+ }
220
+ if (box2->c > ' ' &&
221
+ box2->c <= 'z') i1++; /* count non-space chars */
222
+ if (box2->c == '\n') {
223
+ if (JOB->cfg.out_format!=XML) { /* subject of change */
224
+ line_info = JOB->res.lines;
225
+ line = box2->line;
226
+ if (line > 0) {
227
+ line_gap = line_info.m2[line] - line_info.m3[line - 1];
228
+ for (line_gap -= max_single_space_gap; line_gap > 0;
229
+ line_gap -= median_gap) {
230
+ buffer=append_to_line(buffer,"\n",&len);
231
+ j++; /* count chars in line */
232
+ }
233
+ }
234
+ list_app( &(JOB->res.linelist), (void *)strdup(buffer) ); // wcsdup
235
+ memset(buffer, 0, len);
236
+ j=0; // reset counter for new line
237
+ }
238
+ }
239
+ if (box2->c == ' ') // fill large gaps with spaces
240
+ {
241
+ if (JOB->res.avX) { /* avoid SIGFPE */
242
+ if (JOB->cfg.out_format==XML) { /* subject of change */
243
+ char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
244
+ /* output lot of usefull information for XML filter */
245
+ sprintf(s1," <space x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" />\n",
246
+ box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1);
247
+ buffer=append_to_line(buffer,s1,&len);
248
+ } else
249
+ for (i = (box2->x1 - box2->x0) / (2 * JOB->res.avX) + 1; i > 0; i--) {
250
+ buffer=append_to_line(buffer," ",&len);
251
+ j++; /* number of chars in line */
252
+ }
253
+ }
254
+ }
255
+ else if (box2->c != '\n') {
256
+ if (j==0 && JOB->res.avX) /* first char in new line? */ {
257
+ int indent = box2->x0 - JOB->res.lines.x0[box2->line];
258
+ /* correct for angle of page as a whole. */
259
+ if (JOB->res.lines.dx)
260
+ indent += box2->y0 * JOB->res.lines.dy / JOB->res.lines.dx;
261
+ /* subtract the base margin. */
262
+ indent -= left_margin;
263
+ if (JOB->cfg.out_format==XML) { /* subject of change */
264
+ char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
265
+ /* output lot of usefull information for XML filter */
266
+ sprintf(s1," <space x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" />\n",
267
+ box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1);
268
+ buffer=append_to_line(buffer,s1,&len);
269
+ } else
270
+ for (i = indent / JOB->res.avX; i > 0; i--) {
271
+ buffer=append_to_line(buffer," ",&len); j++;
272
+ }
273
+ }
274
+ if (JOB->cfg.out_format==XML) { /* subject of change */
275
+ char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
276
+ /* output lot of usefull information for XML filter */
277
+ sprintf(s1," <box x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" value=\"",
278
+ box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1);
279
+ buffer=append_to_line(buffer,s1,&len);
280
+ if (box2->num_ac>1) { /* ToDo: output a list of alternatives */
281
+ }
282
+ }
283
+ if (box2->c != UNKNOWN && box2->c != 0) {
284
+ buffer=
285
+ append_to_line(buffer,decode(box2->c,JOB->cfg.out_format),&len);
286
+ if (box2->c > ' ' &&
287
+ box2->c <= 'z') i2++; /* count non-space chars */
288
+ } else { /* c == UNKNOWN or 0 */
289
+ wchar_t cc; cc=box2->c;
290
+ if (box2->num_ac>0 && box2->tas[0]
291
+ && (JOB->cfg.out_format!=XML || box2->tas[0][0]!='<')) {
292
+ /* output glued chars or ... (?) Jan08 */
293
+ buffer=append_to_line(buffer,box2->tas[0],&len);
294
+ j+=strlen(box2->tas[0]);
295
+ } else { /* ToDo: leave string empty? set placeholder per option */
296
+ /* output dummy string to mark UNKNOWN */
297
+ if(JOB->cfg.unrec_marker[0])
298
+ buffer = append_to_line(buffer, JOB->cfg.unrec_marker, &len);
299
+ }
300
+ }
301
+ if (JOB->cfg.out_format==XML) {
302
+ if (box2->num_ac>0) {
303
+ /* output alist ToDo: separate <altbox ...> */
304
+ int i1; char s1[256];
305
+ sprintf(s1,"\" numac=\"%d\" weights=\"",box2->num_ac);
306
+ buffer=append_to_line(buffer,s1,&len);
307
+ for (i1=0;i1<box2->num_ac;i1++) {
308
+ sprintf(s1,"%d",box2->wac[i1]);
309
+ buffer=append_to_line(buffer,s1,&len);
310
+ if (i1+1<box2->num_ac) buffer=append_to_line(buffer,",",&len);
311
+ }
312
+ if (box2->num_ac>1)
313
+ buffer=append_to_line(buffer,"\" achars=\"",&len);
314
+ for (i1=1;i1<box2->num_ac;i1++) {
315
+ if (box2->tas[i1] && box2->tas[i1][0]!='<')
316
+ buffer=append_to_line(buffer,box2->tas[i1],&len);
317
+ else
318
+ buffer=append_to_line(buffer,
319
+ decode(box2->tac[i1],JOB->cfg.out_format),&len);
320
+ // ToDo: add tas[] (achars->avalues or alternate_strings?
321
+ if (i1+1<box2->num_ac) buffer=append_to_line(buffer,",",&len);
322
+ }
323
+ }
324
+ buffer=append_to_line(buffer,"\" />\n",&len);
325
+ }
326
+ if (box2->num_ac && box2->tas[0]) {
327
+ if (box2->tas[0][0]=='<') { /* output special XML object */
328
+ buffer=append_to_line(buffer,box2->tas[0],&len);
329
+ buffer=append_to_line(buffer,"\n",&len);
330
+ j+=strlen(box2->tas[0]);
331
+ }
332
+ }
333
+ j++; /* number of chars in line */
334
+ }
335
+ i++;
336
+ } end_for_each(&(JOB->res.boxlist));
337
+ if (JOB->cfg.out_format==XML && oldline>-1) { /* subject of change */
338
+ buffer=append_to_line(buffer,"</line>\n",&len);
339
+ }
340
+ if (JOB->cfg.out_format==XML) { /* subject of change */
341
+ buffer=append_to_line(buffer,"</block>\n</page>\n",&len);
342
+ }
343
+
344
+ /* do not forget last line */
345
+ // is there no \n in the last line? If there is, delete next line.
346
+ list_app( &(JOB->res.linelist), (void *)strdup(buffer) );
347
+ free(buffer);
348
+ if( JOB->cfg.verbose&1 )
349
+ fprintf(stderr,"... %d lines, boxes= %d, chars= %d\n",i,i1,i2);
350
+ }