isbn 2.0.4 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (288) hide show
  1. data/{README → README.md} +5 -11
  2. data/Rakefile +20 -14
  3. data/isbn.gemspec +23 -0
  4. data/lib/isbn.rb +2 -0
  5. data/test/isbn_spec.rb +1 -1
  6. metadata +29 -316
  7. data/VERSION +0 -1
  8. data/src/gocr-0.48/.cvsignore +0 -6
  9. data/src/gocr-0.48/AUTHORS +0 -7
  10. data/src/gocr-0.48/BUGS +0 -55
  11. data/src/gocr-0.48/CREDITS +0 -17
  12. data/src/gocr-0.48/HISTORY +0 -243
  13. data/src/gocr-0.48/INSTALL +0 -83
  14. data/src/gocr-0.48/Makefile +0 -193
  15. data/src/gocr-0.48/Makefile.in +0 -193
  16. data/src/gocr-0.48/README +0 -165
  17. data/src/gocr-0.48/READMEde.txt +0 -80
  18. data/src/gocr-0.48/REMARK.txt +0 -18
  19. data/src/gocr-0.48/REVIEW +0 -538
  20. data/src/gocr-0.48/TODO +0 -65
  21. data/src/gocr-0.48/bin/.cvsignore +0 -2
  22. data/src/gocr-0.48/bin/create_db +0 -38
  23. data/src/gocr-0.48/bin/gocr.tcl +0 -527
  24. data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
  25. data/src/gocr-0.48/configure +0 -4689
  26. data/src/gocr-0.48/configure.in +0 -71
  27. data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
  28. data/src/gocr-0.48/doc/.cvsignore +0 -2
  29. data/src/gocr-0.48/doc/Makefile +0 -39
  30. data/src/gocr-0.48/doc/Makefile.in +0 -39
  31. data/src/gocr-0.48/doc/example.dtd +0 -53
  32. data/src/gocr-0.48/doc/example.xml +0 -21
  33. data/src/gocr-0.48/doc/examples.txt +0 -67
  34. data/src/gocr-0.48/doc/gocr.html +0 -578
  35. data/src/gocr-0.48/doc/unicode.txt +0 -57
  36. data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
  37. data/src/gocr-0.48/examples/4x6.png +0 -0
  38. data/src/gocr-0.48/examples/4x6.txt +0 -2
  39. data/src/gocr-0.48/examples/5x7.png +0 -0
  40. data/src/gocr-0.48/examples/5x7.png.txt +0 -2
  41. data/src/gocr-0.48/examples/5x8.png +0 -0
  42. data/src/gocr-0.48/examples/5x8.png.txt +0 -2
  43. data/src/gocr-0.48/examples/Makefile +0 -166
  44. data/src/gocr-0.48/examples/color.fig +0 -20
  45. data/src/gocr-0.48/examples/ex.fig +0 -16
  46. data/src/gocr-0.48/examples/font.tex +0 -22
  47. data/src/gocr-0.48/examples/font1.tex +0 -46
  48. data/src/gocr-0.48/examples/font2.fig +0 -27
  49. data/src/gocr-0.48/examples/font_nw.tex +0 -24
  50. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  51. data/src/gocr-0.48/examples/handwrt1.txt +0 -10
  52. data/src/gocr-0.48/examples/inverse.fig +0 -20
  53. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
  56. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-a.txt +0 -6
  58. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  59. data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
  60. data/src/gocr-0.48/examples/polish.tex +0 -28
  61. data/src/gocr-0.48/examples/rotate45.fig +0 -14
  62. data/src/gocr-0.48/examples/score +0 -36
  63. data/src/gocr-0.48/examples/text.tex +0 -28
  64. data/src/gocr-0.48/gpl.html +0 -537
  65. data/src/gocr-0.48/include/.cvsignore +0 -2
  66. data/src/gocr-0.48/include/config.h +0 -36
  67. data/src/gocr-0.48/include/config.h.in +0 -36
  68. data/src/gocr-0.48/include/version.h +0 -2
  69. data/src/gocr-0.48/install-sh +0 -3
  70. data/src/gocr-0.48/make.bat +0 -57
  71. data/src/gocr-0.48/man/.cvsignore +0 -2
  72. data/src/gocr-0.48/man/Makefile +0 -29
  73. data/src/gocr-0.48/man/Makefile.in +0 -29
  74. data/src/gocr-0.48/man/man1/gocr.1 +0 -166
  75. data/src/gocr-0.48/src/.cvsignore +0 -4
  76. data/src/gocr-0.48/src/Makefile +0 -132
  77. data/src/gocr-0.48/src/Makefile.in +0 -132
  78. data/src/gocr-0.48/src/amiga.h +0 -31
  79. data/src/gocr-0.48/src/barcode.c +0 -846
  80. data/src/gocr-0.48/src/barcode.c.orig +0 -593
  81. data/src/gocr-0.48/src/barcode.h +0 -11
  82. data/src/gocr-0.48/src/box.c +0 -372
  83. data/src/gocr-0.48/src/database.c +0 -462
  84. data/src/gocr-0.48/src/detect.c +0 -943
  85. data/src/gocr-0.48/src/gocr.c +0 -373
  86. data/src/gocr-0.48/src/gocr.h +0 -288
  87. data/src/gocr-0.48/src/jconv.c +0 -168
  88. data/src/gocr-0.48/src/job.c +0 -84
  89. data/src/gocr-0.48/src/lines.c +0 -350
  90. data/src/gocr-0.48/src/list.c +0 -334
  91. data/src/gocr-0.48/src/list.h +0 -90
  92. data/src/gocr-0.48/src/ocr0.c +0 -6756
  93. data/src/gocr-0.48/src/ocr0.h +0 -63
  94. data/src/gocr-0.48/src/ocr0n.c +0 -1475
  95. data/src/gocr-0.48/src/ocr1.c +0 -85
  96. data/src/gocr-0.48/src/ocr1.h +0 -3
  97. data/src/gocr-0.48/src/otsu.c +0 -289
  98. data/src/gocr-0.48/src/otsu.h +0 -23
  99. data/src/gocr-0.48/src/output.c +0 -289
  100. data/src/gocr-0.48/src/output.h +0 -37
  101. data/src/gocr-0.48/src/pcx.c +0 -153
  102. data/src/gocr-0.48/src/pcx.h +0 -9
  103. data/src/gocr-0.48/src/pgm2asc.c +0 -2893
  104. data/src/gocr-0.48/src/pgm2asc.h +0 -105
  105. data/src/gocr-0.48/src/pixel.c +0 -537
  106. data/src/gocr-0.48/src/pnm.c +0 -533
  107. data/src/gocr-0.48/src/pnm.h +0 -35
  108. data/src/gocr-0.48/src/progress.c +0 -87
  109. data/src/gocr-0.48/src/progress.h +0 -42
  110. data/src/gocr-0.48/src/remove.c +0 -703
  111. data/src/gocr-0.48/src/tga.c +0 -87
  112. data/src/gocr-0.48/src/tga.h +0 -6
  113. data/src/gocr-0.48/src/unicode.c +0 -1314
  114. data/src/gocr-0.48/src/unicode.h +0 -1257
  115. data/src/jpeg-7/Makefile.am +0 -133
  116. data/src/jpeg-7/Makefile.in +0 -1089
  117. data/src/jpeg-7/README +0 -322
  118. data/src/jpeg-7/aclocal.m4 +0 -8990
  119. data/src/jpeg-7/ansi2knr.1 +0 -36
  120. data/src/jpeg-7/ansi2knr.c +0 -739
  121. data/src/jpeg-7/cderror.h +0 -132
  122. data/src/jpeg-7/cdjpeg.c +0 -181
  123. data/src/jpeg-7/cdjpeg.h +0 -187
  124. data/src/jpeg-7/change.log +0 -270
  125. data/src/jpeg-7/cjpeg.1 +0 -325
  126. data/src/jpeg-7/cjpeg.c +0 -616
  127. data/src/jpeg-7/ckconfig.c +0 -402
  128. data/src/jpeg-7/coderules.txt +0 -118
  129. data/src/jpeg-7/config.guess +0 -1561
  130. data/src/jpeg-7/config.sub +0 -1686
  131. data/src/jpeg-7/configure +0 -17139
  132. data/src/jpeg-7/configure.ac +0 -317
  133. data/src/jpeg-7/depcomp +0 -630
  134. data/src/jpeg-7/djpeg.1 +0 -251
  135. data/src/jpeg-7/djpeg.c +0 -617
  136. data/src/jpeg-7/example.c +0 -433
  137. data/src/jpeg-7/filelist.txt +0 -215
  138. data/src/jpeg-7/install-sh +0 -520
  139. data/src/jpeg-7/install.txt +0 -1097
  140. data/src/jpeg-7/jaricom.c +0 -148
  141. data/src/jpeg-7/jcapimin.c +0 -282
  142. data/src/jpeg-7/jcapistd.c +0 -161
  143. data/src/jpeg-7/jcarith.c +0 -921
  144. data/src/jpeg-7/jccoefct.c +0 -453
  145. data/src/jpeg-7/jccolor.c +0 -459
  146. data/src/jpeg-7/jcdctmgr.c +0 -482
  147. data/src/jpeg-7/jchuff.c +0 -1612
  148. data/src/jpeg-7/jcinit.c +0 -65
  149. data/src/jpeg-7/jcmainct.c +0 -293
  150. data/src/jpeg-7/jcmarker.c +0 -667
  151. data/src/jpeg-7/jcmaster.c +0 -770
  152. data/src/jpeg-7/jcomapi.c +0 -106
  153. data/src/jpeg-7/jconfig.bcc +0 -48
  154. data/src/jpeg-7/jconfig.cfg +0 -45
  155. data/src/jpeg-7/jconfig.dj +0 -38
  156. data/src/jpeg-7/jconfig.mac +0 -43
  157. data/src/jpeg-7/jconfig.manx +0 -43
  158. data/src/jpeg-7/jconfig.mc6 +0 -52
  159. data/src/jpeg-7/jconfig.sas +0 -43
  160. data/src/jpeg-7/jconfig.st +0 -42
  161. data/src/jpeg-7/jconfig.txt +0 -155
  162. data/src/jpeg-7/jconfig.vc +0 -45
  163. data/src/jpeg-7/jconfig.vms +0 -37
  164. data/src/jpeg-7/jconfig.wat +0 -38
  165. data/src/jpeg-7/jcparam.c +0 -632
  166. data/src/jpeg-7/jcprepct.c +0 -358
  167. data/src/jpeg-7/jcsample.c +0 -545
  168. data/src/jpeg-7/jctrans.c +0 -381
  169. data/src/jpeg-7/jdapimin.c +0 -396
  170. data/src/jpeg-7/jdapistd.c +0 -275
  171. data/src/jpeg-7/jdarith.c +0 -762
  172. data/src/jpeg-7/jdatadst.c +0 -151
  173. data/src/jpeg-7/jdatasrc.c +0 -212
  174. data/src/jpeg-7/jdcoefct.c +0 -736
  175. data/src/jpeg-7/jdcolor.c +0 -396
  176. data/src/jpeg-7/jdct.h +0 -393
  177. data/src/jpeg-7/jddctmgr.c +0 -382
  178. data/src/jpeg-7/jdhuff.c +0 -1309
  179. data/src/jpeg-7/jdinput.c +0 -384
  180. data/src/jpeg-7/jdmainct.c +0 -512
  181. data/src/jpeg-7/jdmarker.c +0 -1360
  182. data/src/jpeg-7/jdmaster.c +0 -663
  183. data/src/jpeg-7/jdmerge.c +0 -400
  184. data/src/jpeg-7/jdpostct.c +0 -290
  185. data/src/jpeg-7/jdsample.c +0 -361
  186. data/src/jpeg-7/jdtrans.c +0 -136
  187. data/src/jpeg-7/jerror.c +0 -252
  188. data/src/jpeg-7/jerror.h +0 -304
  189. data/src/jpeg-7/jfdctflt.c +0 -174
  190. data/src/jpeg-7/jfdctfst.c +0 -230
  191. data/src/jpeg-7/jfdctint.c +0 -4348
  192. data/src/jpeg-7/jidctflt.c +0 -242
  193. data/src/jpeg-7/jidctfst.c +0 -368
  194. data/src/jpeg-7/jidctint.c +0 -5137
  195. data/src/jpeg-7/jinclude.h +0 -91
  196. data/src/jpeg-7/jmemansi.c +0 -167
  197. data/src/jpeg-7/jmemdos.c +0 -638
  198. data/src/jpeg-7/jmemdosa.asm +0 -379
  199. data/src/jpeg-7/jmemmac.c +0 -289
  200. data/src/jpeg-7/jmemmgr.c +0 -1118
  201. data/src/jpeg-7/jmemname.c +0 -276
  202. data/src/jpeg-7/jmemnobs.c +0 -109
  203. data/src/jpeg-7/jmemsys.h +0 -198
  204. data/src/jpeg-7/jmorecfg.h +0 -369
  205. data/src/jpeg-7/jpegint.h +0 -395
  206. data/src/jpeg-7/jpeglib.h +0 -1135
  207. data/src/jpeg-7/jpegtran.1 +0 -272
  208. data/src/jpeg-7/jpegtran.c +0 -546
  209. data/src/jpeg-7/jquant1.c +0 -856
  210. data/src/jpeg-7/jquant2.c +0 -1310
  211. data/src/jpeg-7/jutils.c +0 -179
  212. data/src/jpeg-7/jversion.h +0 -14
  213. data/src/jpeg-7/libjpeg.map +0 -4
  214. data/src/jpeg-7/libjpeg.txt +0 -3067
  215. data/src/jpeg-7/ltmain.sh +0 -8406
  216. data/src/jpeg-7/makcjpeg.st +0 -36
  217. data/src/jpeg-7/makdjpeg.st +0 -36
  218. data/src/jpeg-7/makeadsw.vc6 +0 -77
  219. data/src/jpeg-7/makeasln.vc9 +0 -33
  220. data/src/jpeg-7/makecdep.vc6 +0 -82
  221. data/src/jpeg-7/makecdsp.vc6 +0 -130
  222. data/src/jpeg-7/makecmak.vc6 +0 -159
  223. data/src/jpeg-7/makecvcp.vc9 +0 -186
  224. data/src/jpeg-7/makeddep.vc6 +0 -82
  225. data/src/jpeg-7/makeddsp.vc6 +0 -130
  226. data/src/jpeg-7/makedmak.vc6 +0 -159
  227. data/src/jpeg-7/makedvcp.vc9 +0 -186
  228. data/src/jpeg-7/makefile.ansi +0 -220
  229. data/src/jpeg-7/makefile.bcc +0 -291
  230. data/src/jpeg-7/makefile.dj +0 -226
  231. data/src/jpeg-7/makefile.manx +0 -220
  232. data/src/jpeg-7/makefile.mc6 +0 -255
  233. data/src/jpeg-7/makefile.mms +0 -224
  234. data/src/jpeg-7/makefile.sas +0 -258
  235. data/src/jpeg-7/makefile.unix +0 -234
  236. data/src/jpeg-7/makefile.vc +0 -217
  237. data/src/jpeg-7/makefile.vms +0 -142
  238. data/src/jpeg-7/makefile.wat +0 -239
  239. data/src/jpeg-7/makejdep.vc6 +0 -423
  240. data/src/jpeg-7/makejdsp.vc6 +0 -285
  241. data/src/jpeg-7/makejdsw.vc6 +0 -29
  242. data/src/jpeg-7/makejmak.vc6 +0 -425
  243. data/src/jpeg-7/makejsln.vc9 +0 -17
  244. data/src/jpeg-7/makejvcp.vc9 +0 -328
  245. data/src/jpeg-7/makeproj.mac +0 -213
  246. data/src/jpeg-7/makerdep.vc6 +0 -6
  247. data/src/jpeg-7/makerdsp.vc6 +0 -78
  248. data/src/jpeg-7/makermak.vc6 +0 -110
  249. data/src/jpeg-7/makervcp.vc9 +0 -133
  250. data/src/jpeg-7/maketdep.vc6 +0 -43
  251. data/src/jpeg-7/maketdsp.vc6 +0 -122
  252. data/src/jpeg-7/maketmak.vc6 +0 -131
  253. data/src/jpeg-7/maketvcp.vc9 +0 -178
  254. data/src/jpeg-7/makewdep.vc6 +0 -6
  255. data/src/jpeg-7/makewdsp.vc6 +0 -78
  256. data/src/jpeg-7/makewmak.vc6 +0 -110
  257. data/src/jpeg-7/makewvcp.vc9 +0 -133
  258. data/src/jpeg-7/makljpeg.st +0 -68
  259. data/src/jpeg-7/maktjpeg.st +0 -30
  260. data/src/jpeg-7/makvms.opt +0 -4
  261. data/src/jpeg-7/missing +0 -376
  262. data/src/jpeg-7/rdbmp.c +0 -439
  263. data/src/jpeg-7/rdcolmap.c +0 -253
  264. data/src/jpeg-7/rdgif.c +0 -38
  265. data/src/jpeg-7/rdjpgcom.1 +0 -63
  266. data/src/jpeg-7/rdjpgcom.c +0 -515
  267. data/src/jpeg-7/rdppm.c +0 -459
  268. data/src/jpeg-7/rdrle.c +0 -387
  269. data/src/jpeg-7/rdswitch.c +0 -365
  270. data/src/jpeg-7/rdtarga.c +0 -500
  271. data/src/jpeg-7/structure.txt +0 -945
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +0 -4
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +0 -1533
  279. data/src/jpeg-7/transupp.h +0 -205
  280. data/src/jpeg-7/usage.txt +0 -605
  281. data/src/jpeg-7/wizard.txt +0 -211
  282. data/src/jpeg-7/wrbmp.c +0 -442
  283. data/src/jpeg-7/wrgif.c +0 -399
  284. data/src/jpeg-7/wrjpgcom.1 +0 -103
  285. data/src/jpeg-7/wrjpgcom.c +0 -583
  286. data/src/jpeg-7/wrppm.c +0 -269
  287. data/src/jpeg-7/wrrle.c +0 -305
  288. data/src/jpeg-7/wrtarga.c +0 -253
@@ -1,87 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 1999 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
- */
21
-
22
- #include <stdio.h>
23
- #include <stdlib.h>
24
- #include <assert.h>
25
-
26
- #include "tga.h"
27
-
28
- typedef unsigned char byte;
29
-
30
- // --- needed for reading TGA-files
31
- #if 0
32
- char read_b(FILE *f1){ // filter #-comments
33
- char c;
34
- c=fgetc(f1); assert(!feof(f1)); assert(!ferror(f1));
35
- return c;
36
- }
37
- #endif
38
-
39
- //byte tga[18]={ 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,24,32};
40
- /* header_hex= 00 00 02 00 00 00 00 00 00 00 00 00 xl xh yl yh
41
- * 18 20 -- -- -- -- -- -- -- -- -- -- -- -- -- -- */
42
-
43
- void readtga(char *name,pix *p,int mode){ // see pcx.format.txt
44
- // mode: 0=gray,1=RGB
45
- int nx,ny,i,x,y;
46
- FILE *f1;
47
- unsigned char *pic,h[18];
48
-
49
- f1=fopen(name,"rb"); if(!f1) fprintf(stderr," error opening file\n");
50
- assert(f1); // open-error
51
- assert(fread(h,1,18,f1)==18); /* 18 Byte lesen -> h[] */
52
- assert(h[ 0]== 0); // TGA0
53
- assert(h[ 1]== 0); // TGA1
54
- assert(h[ 2]== 2); // TGA2 no run length encoding
55
- for(i=3;i<12;i++)
56
- assert(h[ i]== 0); // ???
57
- assert(h[16]==0x18); // TGA16
58
- assert(h[17]==0x20); // TGA17
59
- nx = h[12] + (h[13]<<8); /* x-dimension low high */
60
- ny = h[14] + (h[15]<<8); /* y-dimension low high */
61
- fprintf(stderr,"# TGA version=%d x=%d y=%d", h[2],nx,ny );
62
- fflush(stdout);
63
- pic=(unsigned char *)malloc( 3*nx*ny );
64
- assert(pic!=NULL); // no memory
65
- assert(ny==(int)fread(pic,3*nx,ny,f1)); // read all lines BGR
66
- if(mode==0)
67
- {
68
- for(y=0;y<ny;y++) /* BGR => gray */
69
- for(x=0;x<nx;x++)
70
- { i=x+y*nx; pic[i]=(pic[i*3+0]+pic[i*3+1]+pic[i*3+2])/3; }
71
- }
72
- else
73
- if(mode==1)
74
- {
75
- byte b;
76
- for(y=0;y<ny;y++) /* BGR => RGB */
77
- for(x=0;x<nx;x++)
78
- { i=x+y*nx; b=pic[i*3+0]; pic[i*3+0]=pic[i*3+2]; pic[i*3+2]=b; }
79
- }
80
- else assert(0); // wrong mode
81
- fclose(f1);
82
- p->p=pic; p->x=nx; p->y=ny; p->bpp=1+2*mode;
83
- fprintf(stderr," mode=%d\n",mode);
84
- }
85
-
86
- // ------------------------------------------------------------------------
87
-
@@ -1,6 +0,0 @@
1
-
2
- #include "pnm.h"
3
-
4
- void readtga(char *name,pix *p,int mode); // mode: 0=gray 1=RGB
5
-
6
- // ------------------------------------------------------------------------
@@ -1,1314 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2007 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
- */
21
-
22
- #include "unicode.h"
23
- #include <stdio.h>
24
-
25
- /* FIXME jb global */
26
- int warn=0; /* if 1 a message is generated if composition is not defined */
27
-
28
- /* Arguments: the character (main), and the modifier (accent, etc). See the
29
- function if you want to know the modifiers.
30
- Description: This function intends to be a small helper, to avoid having
31
- to write switches in functions. It's therefore mainly to accents, and
32
- specially for the most usual ones. It supports the basic greek
33
- characters too, which is actually not very helpful.
34
- Returns: the unicode character corresponding to the composed character.
35
-
36
- ToDo:
37
- - It seems to me, that tables should be more effectiv.
38
- So we should use tables in future? (js)
39
- */
40
- wchar_t compose(wchar_t main, wchar_t modifier) {
41
- /* supported by now: part of ISO8859-1, basic greek characters */
42
- if( main == UNKNOWN || main == PICTURE ) return main;
43
- #ifdef DEBUG
44
- if(modifier!=UNICODE_NULL && modifier!=SPACE)
45
- printf(" compose(%c,%d)",(char)main,(int)modifier);
46
- #endif
47
- if(main>127 && modifier!=0 && modifier!=SPACE && warn)
48
- fprintf(stderr,"# Warning compose %04x + %04x>127\n",
49
- (int)modifier,(int)main);
50
- switch (modifier) {
51
- case UNICODE_NULL:
52
- case SPACE:
53
- return (wchar_t)main;
54
-
55
- case APOSTROPHE: /* do NOT USE this. It's here for compatibility only.
56
- Use ACUTE_ACCENT instead. */
57
- fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
58
-
59
- case ACUTE_ACCENT: /* acute/cedilla */
60
- switch (main) {
61
- case 'a': return LATIN_SMALL_LETTER_A_WITH_ACUTE;
62
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_ACUTE;
63
- case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_ACUTE;
64
- case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_ACUTE;
65
- case 'c': return LATIN_SMALL_LETTER_C_WITH_ACUTE;
66
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_ACUTE;
67
- case 'e': return LATIN_SMALL_LETTER_E_WITH_ACUTE;
68
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_ACUTE;
69
- case 'g': return LATIN_SMALL_LETTER_G_WITH_ACUTE;
70
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_ACUTE;
71
- case 'i': return LATIN_SMALL_LETTER_I_WITH_ACUTE;
72
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_ACUTE;
73
- case 'l': return LATIN_SMALL_LETTER_L_WITH_ACUTE;
74
- case 'L': return LATIN_CAPITAL_LETTER_L_WITH_ACUTE;
75
- case 'n': return LATIN_SMALL_LETTER_N_WITH_ACUTE;
76
- case 'N': return LATIN_CAPITAL_LETTER_N_WITH_ACUTE;
77
- case 'o': return LATIN_SMALL_LETTER_O_WITH_ACUTE;
78
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
79
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
80
- case 'r': return LATIN_SMALL_LETTER_R_WITH_ACUTE;
81
- case 'R': return LATIN_CAPITAL_LETTER_R_WITH_ACUTE;
82
- case 's': return LATIN_SMALL_LETTER_S_WITH_ACUTE;
83
- case 'S': return LATIN_CAPITAL_LETTER_S_WITH_ACUTE;
84
- case 'u': return LATIN_SMALL_LETTER_U_WITH_ACUTE;
85
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_ACUTE;
86
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_ACUTE;
87
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_ACUTE;
88
- case 'z': return LATIN_SMALL_LETTER_Z_WITH_ACUTE;
89
- case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_ACUTE;
90
- default:
91
- if(warn)fprintf( stderr, " COMPOSE: ACUTE_ACCENT+%04x not defined\n",(int)main);
92
- }
93
- break;
94
-
95
- case BREVE: /* caron (latin2) "u"-above-... (small bow) */
96
- switch (main) {
97
- /* FIXME write separate heuristics for breve */
98
- case 'a': return LATIN_SMALL_LETTER_A_WITH_BREVE;
99
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_BREVE;
100
- case 'e': return LATIN_SMALL_LETTER_E_WITH_BREVE;
101
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_BREVE;
102
- case 'g': return LATIN_SMALL_LETTER_G_WITH_BREVE;
103
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_BREVE;
104
- case 'i': return LATIN_SMALL_LETTER_I_WITH_BREVE;
105
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_BREVE;
106
- case 'o': return LATIN_SMALL_LETTER_O_WITH_BREVE;
107
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_BREVE;
108
- case 'u': return LATIN_SMALL_LETTER_U_WITH_BREVE;
109
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_BREVE;
110
- default:
111
- if(warn)fprintf( stderr, " COMPOSE: BREVE+%04x not defined\n",(int)main);
112
- }
113
- break;
114
-
115
- case CARON: /* caron (latin2) "v"-above-... */
116
- switch (main) {
117
- case 'a': return LATIN_SMALL_LETTER_A_WITH_CARON;
118
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CARON;
119
- case 'c': return LATIN_SMALL_LETTER_C_WITH_CARON;
120
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CARON;
121
- case 'e': return LATIN_SMALL_LETTER_E_WITH_CARON;
122
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CARON;
123
- case 'i': return LATIN_SMALL_LETTER_I_WITH_CARON;
124
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CARON;
125
- case 'o': return LATIN_SMALL_LETTER_O_WITH_CARON;
126
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
127
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
128
- case 's': return LATIN_SMALL_LETTER_S_WITH_CARON;
129
- case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CARON;
130
- case 'u': return LATIN_SMALL_LETTER_U_WITH_CARON;
131
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CARON;
132
- case 'z': return LATIN_SMALL_LETTER_Z_WITH_CARON;
133
- case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_CARON;
134
- default:
135
- if(warn)fprintf( stderr, " COMPOSE: CARON+%04x not defined\n",(int)main);
136
- }
137
- break;
138
-
139
- case CEDILLA:
140
- switch (main) {
141
- case 'c': return LATIN_SMALL_LETTER_C_WITH_CEDILLA;
142
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
143
- default:
144
- if(warn)fprintf( stderr, " COMPOSE: CEDILLA+%04x not defined\n",(int)main);
145
- }
146
- break;
147
-
148
- case TILDE:
149
- switch (main) {
150
- case 'a': return LATIN_SMALL_LETTER_A_WITH_TILDE;
151
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_TILDE;
152
- case 'i': return LATIN_SMALL_LETTER_I_WITH_TILDE;
153
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_TILDE;
154
- case 'n': return LATIN_SMALL_LETTER_N_WITH_TILDE;
155
- case 'N': return LATIN_CAPITAL_LETTER_N_WITH_TILDE;
156
- case 'o': return LATIN_SMALL_LETTER_O_WITH_TILDE;
157
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
158
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
159
- case 'u': return LATIN_SMALL_LETTER_U_WITH_TILDE;
160
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_TILDE;
161
- default:
162
- if(warn)fprintf( stderr, " COMPOSE: TILDE+%04x not defined\n",(int)main);
163
- }
164
- break;
165
-
166
- case GRAVE_ACCENT:
167
- switch (main) {
168
- case 'a': return LATIN_SMALL_LETTER_A_WITH_GRAVE;
169
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_GRAVE;
170
- case 'e': return LATIN_SMALL_LETTER_E_WITH_GRAVE;
171
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_GRAVE;
172
- case 'i': return LATIN_SMALL_LETTER_I_WITH_GRAVE;
173
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_GRAVE;
174
- case 'n': return LATIN_SMALL_LETTER_N_WITH_GRAVE;
175
- case 'N': return LATIN_CAPITAL_LETTER_N_WITH_GRAVE;
176
- case 'o': return LATIN_SMALL_LETTER_O_WITH_GRAVE;
177
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
178
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
179
- case 'u': return LATIN_SMALL_LETTER_U_WITH_GRAVE;
180
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_GRAVE;
181
- default:
182
- if(warn)fprintf( stderr, " COMPOSE: GRAVE_ACCENT+%04x not defined\n",(int)main);
183
- }
184
- break;
185
-
186
- case QUOTATION_MARK: /* do NOT USE this. It's here for compatibility only.
187
- Use DIAERESIS instead. */
188
- fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
189
-
190
- case DIAERESIS:
191
- switch (main) {
192
- case 'a': return LATIN_SMALL_LETTER_A_WITH_DIAERESIS;
193
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS;
194
- case 'e': return LATIN_SMALL_LETTER_E_WITH_DIAERESIS;
195
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS;
196
- case 'i': return LATIN_SMALL_LETTER_I_WITH_DIAERESIS;
197
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS;
198
- case 'o': return LATIN_SMALL_LETTER_O_WITH_DIAERESIS;
199
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
200
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
201
- case 'u': return LATIN_SMALL_LETTER_U_WITH_DIAERESIS;
202
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS;
203
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_DIAERESIS;
204
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
205
- default:
206
- if(warn)fprintf( stderr, " COMPOSE: DIAERESIS+%04x (%c) not defined\n",(int)main,(char)main);
207
- }
208
- break;
209
-
210
- case CIRCUMFLEX_ACCENT: /* ^ */
211
- switch (main) {
212
- case 'a': return LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX;
213
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX;
214
- case 'c': return LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX;
215
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX;
216
- case 'e': return LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX;
217
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX;
218
- case 'g': return LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX;
219
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX;
220
- case 'h': return LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX;
221
- case 'H': return LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX;
222
- case 'i': return LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX;
223
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX;
224
- case 'j': return LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX;
225
- case 'J': return LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX;
226
- case 'o': return LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX;
227
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
228
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
229
- case 's': return LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX;
230
- case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX;
231
- case 'u': return LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX;
232
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX;
233
- case 'w': return LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX;
234
- case 'W': return LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX;
235
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX;
236
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX;
237
- default:
238
- if(warn)fprintf( stderr, " COMPOSE: CIRCUMFLEX_ACCENT+%04x not defined\n",(int)main);
239
- }
240
- break;
241
-
242
- case MACRON: /* a minus sign above the char (latin2) */
243
- switch (main) {
244
- case 'a': return LATIN_SMALL_LETTER_A_WITH_MACRON;
245
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_MACRON;
246
- case 'e': return LATIN_SMALL_LETTER_E_WITH_MACRON;
247
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_MACRON;
248
- case 'i': return LATIN_SMALL_LETTER_I_WITH_MACRON;
249
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_MACRON;
250
- case 'o': return LATIN_SMALL_LETTER_O_WITH_MACRON;
251
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_MACRON;
252
- case 'u': return LATIN_SMALL_LETTER_U_WITH_MACRON;
253
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_MACRON;
254
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_MACRON;
255
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_MACRON;
256
- case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_MACRON;
257
- case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_MACRON;
258
- case '=': return IDENTICAL_TO;
259
- case '-': return '=';
260
- case ' ': return MODIFIER_LETTER_MACRON;
261
- default:
262
- if(warn)fprintf( stderr, " COMPOSE: MACRON+%04x not defined\n",(int)main);
263
- }
264
- break;
265
-
266
- case DOT_ABOVE: /* latin2 */
267
- switch (main) {
268
- case 'a': return LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE;
269
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE;
270
- case 'c': return LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE;
271
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE;
272
- case 'e': return LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE;
273
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE;
274
- case 'g': return LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE;
275
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE;
276
- case 'l': return 'i'; /* correct wrong recognition */
277
- case 'i': return 'i';
278
- case LATIN_SMALL_LETTER_DOTLESS_I: return 'i';
279
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
280
- case 'j': return 'j';
281
- case 'o': return LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE;
282
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE;
283
- case 'z': return LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE;
284
- case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE;
285
- case ',': return ';';
286
- case '.': return ':';
287
- default:
288
- if(warn)fprintf( stderr, " COMPOSE: DOT_ABOVE+%04x not defined\n",(int)main);
289
- }
290
- break;
291
-
292
- case RING_ABOVE:
293
- switch (main) {
294
- case 'a': return LATIN_SMALL_LETTER_A_WITH_RING_ABOVE;
295
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE;
296
- case 'u': return LATIN_SMALL_LETTER_U_WITH_RING_ABOVE;
297
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE;
298
- default:
299
- if(warn)fprintf( stderr, " COMPOSE: RING_ABOVE+%04x not defined\n",(int)main);
300
- }
301
- break;
302
-
303
- case 'e': /* e ligatures: ae, oe. */
304
- case 'E':
305
- switch (main) {
306
- case 'a': return LATIN_SMALL_LETTER_AE;
307
- case 'A': return LATIN_CAPITAL_LETTER_AE;
308
- case 'o': return LATIN_SMALL_LIGATURE_OE;
309
- case 'O': return LATIN_CAPITAL_LIGATURE_OE;
310
- case '0': return LATIN_CAPITAL_LIGATURE_OE;
311
- default:
312
- if(warn)fprintf( stderr, " COMPOSE: %04x+e/E not defined\n",(int)main);
313
- }
314
- break;
315
-
316
- case 'g': /* greek */
317
- switch (main) {
318
- /* missing 0x37A-0x390 */
319
- /* weird cases: Q -> theta (it resembles a little, doesn't it?)
320
- V -> psi (what can I do?) */
321
- case 'A': return GREEK_CAPITAL_LETTER_ALPHA;
322
- case 'B': return GREEK_CAPITAL_LETTER_BETA;
323
- case 'G': return GREEK_CAPITAL_LETTER_GAMMA;
324
- case 'D': return GREEK_CAPITAL_LETTER_DELTA;
325
- case 'E': return GREEK_CAPITAL_LETTER_EPSILON;
326
- case 'Z': return GREEK_CAPITAL_LETTER_ZETA;
327
- case 'H': return GREEK_CAPITAL_LETTER_ETA;
328
- case 'Q': return GREEK_CAPITAL_LETTER_THETA;
329
- case 'I': return GREEK_CAPITAL_LETTER_IOTA;
330
- case 'K': return GREEK_CAPITAL_LETTER_KAPPA;
331
- case 'L': return GREEK_CAPITAL_LETTER_LAMDA;
332
- case 'M': return GREEK_CAPITAL_LETTER_MU;
333
- case 'N': return GREEK_CAPITAL_LETTER_NU;
334
- case 'X': return GREEK_CAPITAL_LETTER_XI;
335
- case 'O': return GREEK_CAPITAL_LETTER_OMICRON;
336
- case 'P': return GREEK_CAPITAL_LETTER_PI;
337
- case 'R': return GREEK_CAPITAL_LETTER_RHO;
338
- case 'S': return GREEK_CAPITAL_LETTER_SIGMA;
339
- case 'T': return GREEK_CAPITAL_LETTER_TAU;
340
- case 'Y': return GREEK_CAPITAL_LETTER_UPSILON;
341
- case 'F': return GREEK_CAPITAL_LETTER_PHI;
342
- case 'C': return GREEK_CAPITAL_LETTER_CHI;
343
- case 'V': return GREEK_CAPITAL_LETTER_PSI;
344
- case 'W': return GREEK_CAPITAL_LETTER_OMEGA;
345
- /*
346
- case '': return GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA;
347
- case '': return GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA;
348
- case '': return GREEK_SMALL_LETTER_ALPHA_WITH_TONOS;
349
- case '': return GREEK_SMALL_LETTER_EPSILON_WITH_TONOS;
350
- case '': return GREEK_SMALL_LETTER_ETA_WITH_TONOS;
351
- case '': return GREEK_SMALL_LETTER_IOTA_WITH_TONOS;
352
- case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS;
353
- */
354
- case 'a': return GREEK_SMALL_LETTER_ALPHA;
355
- case 'b': return GREEK_SMALL_LETTER_BETA;
356
- case 'g': return GREEK_SMALL_LETTER_GAMMA;
357
- case 'd': return GREEK_SMALL_LETTER_DELTA;
358
- case 'e': return GREEK_SMALL_LETTER_EPSILON;
359
- case 'z': return GREEK_SMALL_LETTER_ZETA;
360
- case 'h': return GREEK_SMALL_LETTER_ETA;
361
- case 'q': return GREEK_SMALL_LETTER_THETA;
362
- case 'i': return GREEK_SMALL_LETTER_IOTA;
363
- case 'k': return GREEK_SMALL_LETTER_KAPPA;
364
- case 'l': return GREEK_SMALL_LETTER_LAMDA;
365
- case 'm': return GREEK_SMALL_LETTER_MU;
366
- case 'n': return GREEK_SMALL_LETTER_NU;
367
- case 'x': return GREEK_SMALL_LETTER_XI;
368
- case 'o': return GREEK_SMALL_LETTER_OMICRON;
369
- case 'p': return GREEK_SMALL_LETTER_PI;
370
- case 'r': return GREEK_SMALL_LETTER_RHO;
371
- case '&': return GREEK_SMALL_LETTER_FINAL_SIGMA;
372
- case 's': return GREEK_SMALL_LETTER_SIGMA;
373
- case 't': return GREEK_SMALL_LETTER_TAU;
374
- case 'y': return GREEK_SMALL_LETTER_UPSILON;
375
- case 'f': return GREEK_SMALL_LETTER_PHI;
376
- case 'c': return GREEK_SMALL_LETTER_CHI;
377
- case 'v': return GREEK_SMALL_LETTER_PSI;
378
- case 'w': return GREEK_SMALL_LETTER_OMEGA;
379
- /*
380
- case '': return GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA;
381
- case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA;
382
- case '': return GREEK_SMALL_LETTER_OMICRON_WITH_TONOS;
383
- case '': return GREEK_SMALL_LETTER_UPSILON_WITH_TONOS;
384
- case '': return GREEK_SMALL_LETTER_OMEGA_WITH_TONOS;
385
- case '': return GREEK_BETA_SYMBOL;
386
- case '': return GREEK_THETA_SYMBOL;
387
- case '': return GREEK_UPSILON_WITH_HOOK_SYMBOL;
388
- case '': return GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL;
389
- case '': return GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL;
390
- case '': return GREEK_PHI_SYMBOL;
391
- case '': return GREEK_PI_SYMBOL;
392
- */
393
- default:
394
- if(warn)fprintf( stderr, " COMPOSE: GREEK %04x not defined\n",(int)main);
395
- }
396
- break;
397
-
398
- default:
399
- fprintf( stderr, " COMPOSE: modifier %04x not defined\n",(int)modifier);
400
- }
401
- return (wchar_t)main;
402
- }
403
-
404
- #define UNDEFINED "~"
405
-
406
- /* Arguments: character in Unicode format, type of format to convert to.
407
- Returns: a string containing the Unicode character converted to the chosen
408
- format. This string is statically allocated and should not be freed.
409
- ToDo: better using tables?
410
- */
411
- const char *decode(wchar_t c, FORMAT type) {
412
- /* static char d; --- js: big bug (missing \0) if &d returned */
413
- /*FIXME jb static*/ static char bbuf[8*32]; /* space for 8 buffers, rotating */
414
- /*FIXME jb static*/ static char *buf=bbuf; /* used for UTF8 sequences and undefined codes */
415
- buf+=32; if(buf>=bbuf+8*32) buf=bbuf;
416
- buf[0]=buf[1]=buf[2]=0;
417
- switch (type) {
418
- case ISO8859_1:
419
- if ( c <= 0xFF ) { /* UNICODE == ISO8859-1 */
420
- buf[0] = (char)c;
421
- return buf;
422
- }
423
- switch (c) { /* not found in list, but perhaps we can describe it */
424
- /* todo: add greek. GREEK_SMALL_LETTER_ALPHA = alpha */
425
-
426
- /* general puctuation */
427
- case HYPHEN:
428
- return (const char *)"-";
429
- case FIGURE_DASH:
430
- case EN_DASH:
431
- return (const char *)"--";
432
- case EM_DASH:
433
- return (const char *)"---";
434
- case LEFT_SINGLE_QUOTATION_MARK:
435
- return (const char *)"`";
436
- case RIGHT_SINGLE_QUOTATION_MARK:
437
- return (const char *)"'";
438
- case SINGLE_LOW_9_QUOTATION_MARK:
439
- return (const char *)",";
440
- case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
441
- return (const char *)UNDEFINED;
442
- case LEFT_DOUBLE_QUOTATION_MARK:
443
- return (const char *)"``";
444
- case RIGHT_DOUBLE_QUOTATION_MARK:
445
- return (const char *)"''";
446
- case DOUBLE_LOW_9_QUOTATION_MARK:
447
- return (const char *)",,";
448
- case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
449
- return (const char *)UNDEFINED;
450
- case DAGGER:
451
- return (const char *)"+";
452
- case DOUBLE_DAGGER:
453
- return (const char *)"*";
454
- case BULLET:
455
- return (const char *)"*";
456
- case TRIANGULAR_BULLET:
457
- return (const char *)"*";
458
- case HYPHENATION_POINT:
459
- return (const char *)"-";
460
- case HORIZONTAL_ELLIPSIS:
461
- return (const char *)"...";
462
- case PER_MILLE_SIGN:
463
- return (const char *)"%%"; /* awk! */
464
- case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
465
- return (const char *)"<";
466
- case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
467
- return (const char *)">";
468
- case EURO_CURRENCY_SIGN:
469
- return (const char *)"EUR"; /* change it! */
470
-
471
- /* ligatures */
472
- case LATIN_SMALL_LIGATURE_FF:
473
- return (const char *)"ff";
474
- case LATIN_SMALL_LIGATURE_FI:
475
- return (const char *)"fi";
476
- case LATIN_SMALL_LIGATURE_FL:
477
- return (const char *)"fl";
478
- case LATIN_SMALL_LIGATURE_FFI:
479
- return (const char *)"ffi";
480
- case LATIN_SMALL_LIGATURE_FFL:
481
- return (const char *)"ffl";
482
- case LATIN_SMALL_LIGATURE_LONG_S_T:
483
- case LATIN_SMALL_LIGATURE_ST:
484
- return (const char *)"st";
485
-
486
- /* extra */
487
- case UNKNOWN:
488
- return (const char *)"_";
489
- case PICTURE:
490
- return (const char *)"_"; /* Due to Mobile OCR */
491
-
492
- default:
493
- /* snprintf seems to be no standard, so I use insecure sprintf */
494
- sprintf(buf,"\\code(%04x)",(unsigned)c);
495
- return buf; /* UNDEFINED; */
496
- }
497
- break;
498
- case TeX:
499
- if ( c >= SPACE && c <= TILDE ) { /* ASCII */
500
- switch (c) {
501
- case '$':
502
- return (const char *)"\\$";
503
- case '&':
504
- return (const char *)"\\&";
505
- case '%':
506
- return (const char *)"\\%";
507
- case '#':
508
- return (const char *)"\\#";
509
- case '_':
510
- return (const char *)"\\_";
511
- case '{':
512
- return (const char *)"\\{";
513
- case '}':
514
- return (const char *)"\\}";
515
- case '\\':
516
- return (const char *)"$\\backslash$";
517
- case '~':
518
- return (const char *)"\\~{}";
519
- case '^':
520
- return (const char *)"\\^{}";
521
- default:
522
- buf[0] = (char)c;
523
- return (const char *)buf;
524
- }
525
- }
526
- switch (c) {
527
- /* ISO8859_1 */
528
- case NO_BREAK_SPACE:
529
- return (const char *)"~";
530
- case INVERTED_EXCLAMATION_MARK:
531
- return (const char *)"!'";
532
- case CENT_SIGN:
533
- return (const char *)"\\textcent"; /* \usepackage{textcomp} */
534
- case POUND_SIGN:
535
- return (const char *)"\\pounds";
536
- case EURO_CURRENCY_SIGN:
537
- return (const char *)"\\euro"; /* \usepackage{eurosans} */
538
- case CURRENCY_SIGN:
539
- return (const char *)"\\textcurrency"; /* \usepackage{textcomp} */
540
- case YEN_SIGN:
541
- return (const char *)"\\textyen"; /* \usepackage{textcomp} */
542
- case BROKEN_BAR:
543
- return (const char *)"\\textbrokenbar"; /* \usepackage{textcomp} */
544
- case SECTION_SIGN:
545
- return (const char *)"\\S";
546
- case DIAERESIS:
547
- return (const char *)"\"";
548
- case COPYRIGHT_SIGN:
549
- return (const char *)"\\copyright";
550
- case FEMININE_ORDINAL_INDICATOR:
551
- return (const char *)"$^{\\underbar{a}}$";
552
- case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
553
- return (const char *)"\\flqq{}";
554
- case NOT_SIGN:
555
- return (const char *)"$\\lnot$";
556
- case SOFT_HYPHEN:
557
- return (const char *)"\\-";
558
- case REGISTERED_SIGN:
559
- return (const char *)"\\textregistered";/* \usepackage{textcomp} */
560
- case MACRON:
561
- return (const char *)"\\textasciimacron";/* \usepackage{textcomp} */
562
- case DEGREE_SIGN:
563
- return (const char *)"$^{o}$";
564
- case PLUS_MINUS_SIGN:
565
- return (const char *)"$\\pm$";
566
- case SUPERSCRIPT_TWO:
567
- return (const char *)"$^{2}$";
568
- case SUPERSCRIPT_THREE:
569
- return (const char *)"$^{3}$";
570
- case ACUTE_ACCENT:
571
- return (const char *)"\\( \\prime \\)";
572
- case MICRO_SIGN:
573
- return (const char *)"$\\mu$";
574
- case PILCROW_SIGN:
575
- return (const char *)"\\P";
576
- case MIDDLE_DOT:
577
- return (const char *)"$\\cdot$";
578
- case CEDILLA:
579
- return (const char *)"\\,";
580
- case SUPERSCRIPT_ONE:
581
- return (const char *)"$^{1}$";
582
- case MASCULINE_ORDINAL_INDICATOR:
583
- return (const char *)"$^{\\underbar{o}}$";
584
- case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
585
- return (const char *)"\\frqq{}";
586
- case VULGAR_FRACTION_ONE_QUARTER: /* these fractions are not good*/
587
- return (const char *)"\\( 1\\over 4 \\)";
588
- case VULGAR_FRACTION_ONE_HALF:
589
- return (const char *)"\\( 1\\over 2 \\)";
590
- case VULGAR_FRACTION_THREE_QUARTERS:
591
- return (const char *)"\\( 3\\over 4 \\)";
592
- case INVERTED_QUESTION_MARK:
593
- return (const char *)"?'";
594
- case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
595
- return (const char *)"\\`A";
596
- case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
597
- return (const char *)"\\'A";
598
- case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
599
- return (const char *)"\\^A";
600
- case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
601
- return (const char *)"\\~A";
602
- case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
603
- return (const char *)"\\\"A";
604
- case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
605
- return (const char *)"\\AA";
606
- case LATIN_CAPITAL_LETTER_AE:
607
- return (const char *)"\\AE";
608
- case LATIN_CAPITAL_LETTER_C_WITH_CARON:
609
- return (const char *)"\\v{C}";
610
- case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
611
- return (const char *)"\\C";
612
- case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
613
- return (const char *)"\\`E";
614
- case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
615
- return (const char *)"\\'E";
616
- case LATIN_CAPITAL_LETTER_E_WITH_CARON:
617
- return (const char *)"\\v{E}";
618
- case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
619
- return (const char *)"\\^E";
620
- case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
621
- return (const char *)"\\\"E";
622
- case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
623
- return (const char *)"\\`I";
624
- case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
625
- return (const char *)"\\'I";
626
- case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
627
- return (const char *)"\\^I";
628
- case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
629
- return (const char *)"\\\"I";
630
- case LATIN_CAPITAL_LETTER_ETH:
631
- return (const char *)UNDEFINED;
632
- case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
633
- return (const char *)"\\~N";
634
- case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
635
- return (const char *)"\\`O";
636
- case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
637
- return (const char *)"\\'O";
638
- case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
639
- return (const char *)"\\^O";
640
- case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
641
- return (const char *)"\\~O";
642
- case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
643
- return (const char *)"\\\"O";
644
- case MULTIPLICATION_SIGN:
645
- return (const char *)"$\\times$";
646
- case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
647
- return (const char *)"\\O";
648
- case LATIN_CAPITAL_LETTER_S_WITH_CARON:
649
- return (const char *)"\\v{S}";
650
- case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
651
- return (const char *)"\\`U";
652
- case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
653
- return (const char *)"\\'U";
654
- case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
655
- return (const char *)"\\^U";
656
- case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
657
- return (const char *)"\\\"U";
658
- case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
659
- return (const char *)"\\'Y";
660
- case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
661
- return (const char *)"\\v{Z}";
662
- case LATIN_CAPITAL_LETTER_THORN:
663
- return (const char *)UNDEFINED;
664
- case LATIN_SMALL_LETTER_SHARP_S:
665
- return (const char *)"\\ss";
666
- case LATIN_SMALL_LETTER_A_WITH_GRAVE:
667
- return (const char *)"\\`a";
668
- case LATIN_SMALL_LETTER_A_WITH_ACUTE:
669
- return (const char *)"\\'a";
670
- case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
671
- return (const char *)"\\^a";
672
- case LATIN_SMALL_LETTER_A_WITH_TILDE:
673
- return (const char *)"\\~a";
674
- case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
675
- return (const char *)"\\\"a";
676
- case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
677
- return (const char *)"\\aa";
678
- case LATIN_SMALL_LETTER_AE:
679
- return (const char *)"\\ae";
680
- case LATIN_SMALL_LETTER_C_WITH_CARON:
681
- return (const char *)"\\v{c}";
682
- case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
683
- return (const char *)"\\c";
684
- case LATIN_SMALL_LETTER_E_WITH_GRAVE:
685
- return (const char *)"\\`e";
686
- case LATIN_SMALL_LETTER_E_WITH_ACUTE:
687
- return (const char *)"\\'e";
688
- case LATIN_SMALL_LETTER_E_WITH_CARON:
689
- return (const char *)"\\v{e}";
690
- case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
691
- return (const char *)"\\^e";
692
- case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
693
- return (const char *)"\\\"e";
694
- case LATIN_SMALL_LETTER_I_WITH_GRAVE:
695
- return (const char *)"\\`i";
696
- case LATIN_SMALL_LETTER_I_WITH_ACUTE:
697
- return (const char *)"\\'i";
698
- case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
699
- return (const char *)"\\^i";
700
- case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
701
- return (const char *)"\\\"i";
702
- case LATIN_SMALL_LETTER_ETH:
703
- return (const char *)UNDEFINED;
704
- case LATIN_SMALL_LETTER_N_WITH_TILDE:
705
- return (const char *)"\\~n";
706
- case LATIN_SMALL_LETTER_O_WITH_GRAVE:
707
- return (const char *)"\\`o";
708
- case LATIN_SMALL_LETTER_O_WITH_ACUTE:
709
- return (const char *)"\\'o";
710
- case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
711
- return (const char *)"\\^o";
712
- case LATIN_SMALL_LETTER_O_WITH_TILDE:
713
- return (const char *)"\\~o";
714
- case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
715
- return (const char *)"\\\"o";
716
- case DIVISION_SIGN:
717
- return (const char *)"$\\div$";
718
- case LATIN_SMALL_LETTER_O_WITH_STROKE:
719
- return (const char *)"\\o";
720
- case LATIN_SMALL_LETTER_S_WITH_CARON:
721
- return (const char *)"\\v{s}";
722
- case LATIN_SMALL_LETTER_U_WITH_GRAVE:
723
- return (const char *)"\\`u";
724
- case LATIN_SMALL_LETTER_U_WITH_ACUTE:
725
- return (const char *)"\\'u";
726
- case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
727
- return (const char *)"\\^u";
728
- case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
729
- return (const char *)"\\\"u";
730
- case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
731
- return (const char *)"\\'y";
732
- case LATIN_SMALL_LETTER_THORN:
733
- return (const char *)UNDEFINED;
734
- case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
735
- return (const char *)"\\\"y";
736
- case LATIN_SMALL_LETTER_Z_WITH_CARON:
737
- return (const char *)"\\v{z}";
738
-
739
- /* greek */
740
- /* some (punctuation, accents, accented capital) greek letters missing*/
741
- case GREEK_CAPITAL_LETTER_ALPHA:
742
- return (const char *)"A";
743
- case GREEK_CAPITAL_LETTER_BETA:
744
- return (const char *)"B";
745
- case GREEK_CAPITAL_LETTER_GAMMA:
746
- return (const char *)"\\( \\Gamma \\)";
747
- case GREEK_CAPITAL_LETTER_DELTA:
748
- return (const char *)"\\( \\Delta \\)";
749
- case GREEK_CAPITAL_LETTER_EPSILON:
750
- return (const char *)"E";
751
- case GREEK_CAPITAL_LETTER_ZETA:
752
- return (const char *)"Z";
753
- case GREEK_CAPITAL_LETTER_ETA:
754
- return (const char *)"H";
755
- case GREEK_CAPITAL_LETTER_THETA:
756
- return (const char *)"\\( \\Theta \\)";
757
- case GREEK_CAPITAL_LETTER_IOTA:
758
- return (const char *)"I";
759
- case GREEK_CAPITAL_LETTER_KAPPA:
760
- return (const char *)"K";
761
- case GREEK_CAPITAL_LETTER_LAMDA:
762
- return (const char *)"\\( \\Lambda \\)";
763
- case GREEK_CAPITAL_LETTER_MU:
764
- return (const char *)"M";
765
- case GREEK_CAPITAL_LETTER_NU:
766
- return (const char *)"N";
767
- case GREEK_CAPITAL_LETTER_XI:
768
- return (const char *)"\\( \\Xi \\)";
769
- case GREEK_CAPITAL_LETTER_OMICRON:
770
- return (const char *)"O";
771
- case GREEK_CAPITAL_LETTER_PI:
772
- return (const char *)"\\( \\Pi \\)";
773
- case GREEK_CAPITAL_LETTER_RHO:
774
- return (const char *)"P";
775
- case GREEK_CAPITAL_LETTER_SIGMA:
776
- return (const char *)"\\( \\Sigma \\)";
777
- case GREEK_CAPITAL_LETTER_TAU:
778
- return (const char *)"T";
779
- case GREEK_CAPITAL_LETTER_UPSILON:
780
- return (const char *)"\\( \\Upsilon \\)";
781
- case GREEK_CAPITAL_LETTER_PHI:
782
- return (const char *)"\\( \\Phi \\)";
783
- case GREEK_CAPITAL_LETTER_CHI:
784
- return (const char *)"\\( \\Chi \\)";
785
- case GREEK_CAPITAL_LETTER_PSI:
786
- return (const char *)"\\( \\Psi \\)";
787
- case GREEK_CAPITAL_LETTER_OMEGA:
788
- return (const char *)"\\( \\Omega \\)";
789
- case GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA:
790
- return (const char *)UNDEFINED;
791
- case GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA:
792
- return (const char *)UNDEFINED;
793
- case GREEK_SMALL_LETTER_ALPHA_WITH_TONOS:
794
- return (const char *)UNDEFINED;
795
- case GREEK_SMALL_LETTER_EPSILON_WITH_TONOS:
796
- return (const char *)UNDEFINED;
797
- case GREEK_SMALL_LETTER_ETA_WITH_TONOS:
798
- return (const char *)UNDEFINED;
799
- case GREEK_SMALL_LETTER_IOTA_WITH_TONOS:
800
- return (const char *)UNDEFINED;
801
- case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS:
802
- return (const char *)UNDEFINED;
803
- case GREEK_SMALL_LETTER_ALPHA:
804
- return (const char *)"\\( \\alpha \\)";
805
- case GREEK_SMALL_LETTER_BETA:
806
- return (const char *)"\\( \\beta \\)";
807
- case GREEK_SMALL_LETTER_GAMMA:
808
- return (const char *)"\\( \\gamma \\)";
809
- case GREEK_SMALL_LETTER_DELTA:
810
- return (const char *)"\\( \\delta \\)";
811
- case GREEK_SMALL_LETTER_EPSILON:
812
- return (const char *)"\\( \\epsilon \\)";
813
- case GREEK_SMALL_LETTER_ZETA:
814
- return (const char *)"\\( \\zeta \\)";
815
- case GREEK_SMALL_LETTER_ETA:
816
- return (const char *)"\\( \\eta \\)";
817
- case GREEK_SMALL_LETTER_THETA:
818
- return (const char *)"\\( \\theta \\)";
819
- case GREEK_SMALL_LETTER_IOTA:
820
- return (const char *)"\\( \\iota \\)";
821
- case GREEK_SMALL_LETTER_KAPPA:
822
- return (const char *)"\\( \\kappa \\)";
823
- case GREEK_SMALL_LETTER_LAMDA:
824
- return (const char *)"\\( \\lambda \\)";
825
- case GREEK_SMALL_LETTER_MU:
826
- return (const char *)"\\( \\mu \\)";
827
- case GREEK_SMALL_LETTER_NU:
828
- return (const char *)"\\( \\nu \\)";
829
- case GREEK_SMALL_LETTER_XI:
830
- return (const char *)"\\( \\xi \\)";
831
- case GREEK_SMALL_LETTER_OMICRON:
832
- return (const char *)"\\( \\omicron \\)";
833
- case GREEK_SMALL_LETTER_PI:
834
- return (const char *)"\\( \\pi \\)";
835
- case GREEK_SMALL_LETTER_RHO:
836
- return (const char *)"\\( \\rho \\)";
837
- case GREEK_SMALL_LETTER_FINAL_SIGMA:
838
- return (const char *)"\\( \\varsigma \\)";
839
- case GREEK_SMALL_LETTER_SIGMA:
840
- return (const char *)"\\( \\sigma \\)";
841
- case GREEK_SMALL_LETTER_TAU:
842
- return (const char *)"\\( \\tau \\)";
843
- case GREEK_SMALL_LETTER_UPSILON:
844
- return (const char *)"\\( \\upsilon \\)";
845
- case GREEK_SMALL_LETTER_PHI:
846
- return (const char *)"\\( \\varphi \\)";
847
- case GREEK_SMALL_LETTER_CHI:
848
- return (const char *)"\\( \\chi \\)";
849
- case GREEK_SMALL_LETTER_PSI:
850
- return (const char *)"\\( \\psi \\)";
851
- case GREEK_SMALL_LETTER_OMEGA:
852
- return (const char *)"\\( \\omega \\)";
853
- case GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA:
854
- return (const char *)UNDEFINED;
855
- case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA:
856
- return (const char *)UNDEFINED;
857
- case GREEK_SMALL_LETTER_OMICRON_WITH_TONOS:
858
- return (const char *)UNDEFINED;
859
- case GREEK_SMALL_LETTER_UPSILON_WITH_TONOS:
860
- return (const char *)UNDEFINED;
861
- case GREEK_SMALL_LETTER_OMEGA_WITH_TONOS:
862
- return (const char *)UNDEFINED;
863
- case GREEK_BETA_SYMBOL:
864
- return (const char *)UNDEFINED;
865
- case GREEK_THETA_SYMBOL:
866
- return (const char *)"\\( \\vartheta \\)";
867
- case GREEK_UPSILON_WITH_HOOK_SYMBOL:
868
- return (const char *)UNDEFINED;
869
- case GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL:
870
- return (const char *)UNDEFINED;
871
- case GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL:
872
- return (const char *)UNDEFINED;
873
- case GREEK_PHI_SYMBOL:
874
- return (const char *)"\\( \\phi \\)";
875
- case GREEK_PI_SYMBOL:
876
- return (const char *)"\\( \\varpi \\)";
877
- /* and some greek letters missing*/
878
-
879
- /* punctuation (partial) */
880
- case HYPHEN:
881
- return (const char *)"-";
882
- case NON_BREAKING_HYPHEN:
883
- return (const char *)UNDEFINED;
884
- case FIGURE_DASH:
885
- case EN_DASH:
886
- return (const char *)"--";
887
- case EM_DASH:
888
- return (const char *)"---";
889
- case HORIZONTAL_BAR:
890
- return (const char *)UNDEFINED;
891
- case LEFT_SINGLE_QUOTATION_MARK:
892
- return (const char *)"`";
893
- case RIGHT_SINGLE_QUOTATION_MARK:
894
- return (const char *)"'";
895
- case SINGLE_LOW_9_QUOTATION_MARK:
896
- return (const char *)"\\glq{}";
897
- case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
898
- return (const char *)UNDEFINED;
899
- case LEFT_DOUBLE_QUOTATION_MARK:
900
- return (const char *)"``";
901
- case RIGHT_DOUBLE_QUOTATION_MARK:
902
- return (const char *)"''";
903
- case DOUBLE_LOW_9_QUOTATION_MARK:
904
- return (const char *)"\\glqq{}";
905
- case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
906
- return (const char *)UNDEFINED;
907
- case DAGGER:
908
- return (const char *)"\\dag";
909
- case DOUBLE_DAGGER:
910
- return (const char *)"\\ddag";
911
- case BULLET:
912
- return (const char *)"$\\bullet$";
913
- case TRIANGULAR_BULLET:
914
- return (const char *)"$\\blacktriangleright";
915
- case HYPHENATION_POINT:
916
- return (const char *)"\\-";
917
- case HORIZONTAL_ELLIPSIS:
918
- return (const char *)"\\ldots";
919
- case PER_MILLE_SIGN:
920
- return (const char *)UNDEFINED;
921
- case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
922
- return (const char *)"\\flq{}";
923
- case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
924
- return (const char *)"\\frq{}";
925
- /* ligatures */
926
- case LATIN_SMALL_LIGATURE_FF:
927
- return (const char *)"ff";
928
- case LATIN_SMALL_LIGATURE_FI:
929
- return (const char *)"fi";
930
- case LATIN_SMALL_LIGATURE_FL:
931
- return (const char *)"fl";
932
- case LATIN_SMALL_LIGATURE_FFI:
933
- return (const char *)"ffi";
934
- case LATIN_SMALL_LIGATURE_FFL:
935
- return (const char *)"ffl";
936
- case LATIN_SMALL_LIGATURE_LONG_S_T:
937
- case LATIN_SMALL_LIGATURE_ST:
938
- return (const char *)"st";
939
- /* reserved */
940
- case 0:
941
- return (const char *)"";
942
- case UNKNOWN:
943
- return (const char *)"\\_";
944
- case PICTURE:
945
- return (const char *)"(PICTURE)";
946
- default:
947
- /* snprintf seems to be no standard, so I use insecure sprintf */
948
- sprintf(buf,"\\symbol{%u}",(unsigned)c);
949
- return buf; /* UNDEFINED; */
950
- }
951
- case HTML:
952
- if ( c >= SPACE && c <= TILDE ) { /* ASCII */
953
- switch (c) {
954
- case '&':
955
- return (const char *)"&amp;";
956
- /* semicolon must not be coded */
957
- case '\'':
958
- return (const char *)"&apos;";
959
- case '"':
960
- return (const char *)"&quot;";
961
- case '<':
962
- return (const char *)"&lt;";
963
- case '>':
964
- return (const char *)"&gt;";
965
- }
966
- buf[0] = (char)c;
967
- return buf;
968
- }
969
- switch (c) {
970
- case PICTURE:
971
- return (const char *)"<!--PICTURE-->";
972
- case UNKNOWN:
973
- return (const char *)"_"; /* better use colored symbol? */
974
- case LINE_FEED:
975
- return (const char *)"<br />"; /* \n handled somwhere else? */
976
- case FORM_FEED:
977
- case CARRIAGE_RETURN:
978
- return (const char *)"<br />";
979
- case NO_BREAK_SPACE:
980
- return (const char *)"<nobr />";
981
- case INVERTED_EXCLAMATION_MARK:
982
- return (const char *)"&iexcl;";
983
- case CENT_SIGN:
984
- return (const char *)"&cent;";
985
- case POUND_SIGN:
986
- return (const char *)"&pound;";
987
- case CURRENCY_SIGN:
988
- return (const char *)"&curren;";
989
- case YEN_SIGN:
990
- return (const char *)"&yen;";
991
- case BROKEN_BAR:
992
- return (const char *)"&brvbar;";
993
- case SECTION_SIGN:
994
- return (const char *)"&sect;";
995
- case DIAERESIS:
996
- return (const char *)"&uml;";
997
- case COPYRIGHT_SIGN:
998
- return (const char *)"&copy;";
999
- case FEMININE_ORDINAL_INDICATOR:
1000
- return (const char *)"&ordfem;";
1001
- case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1002
- return (const char *)"&laquo;";
1003
- case NOT_SIGN:
1004
- return (const char *)"&not;";
1005
- case SOFT_HYPHEN:
1006
- return (const char *)"&shy;";
1007
- case REGISTERED_SIGN:
1008
- return (const char *)"&reg;";
1009
- case MACRON:
1010
- return (const char *)"&macr;";
1011
- case DEGREE_SIGN:
1012
- return (const char *)"&deg;";
1013
- case PLUS_MINUS_SIGN:
1014
- return (const char *)"&plusmn;";
1015
- case SUPERSCRIPT_TWO:
1016
- return (const char *)"&sup2;";
1017
- case SUPERSCRIPT_THREE:
1018
- return (const char *)"&sup3;";
1019
- case ACUTE_ACCENT:
1020
- return (const char *)"&acute;";
1021
- case MICRO_SIGN:
1022
- return (const char *)"&micro;";
1023
- case PILCROW_SIGN:
1024
- return (const char *)"&para;";
1025
- case MIDDLE_DOT:
1026
- return (const char *)"&middot;";
1027
- case CEDILLA:
1028
- return (const char *)"&cedil;";
1029
- case SUPERSCRIPT_ONE:
1030
- return (const char *)"&sup1;";
1031
- case MASCULINE_ORDINAL_INDICATOR:
1032
- return (const char *)"&ordm;";
1033
- case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1034
- return (const char *)"&raquo;";
1035
- case VULGAR_FRACTION_ONE_QUARTER:
1036
- return (const char *)"&frac14;";
1037
- case VULGAR_FRACTION_ONE_HALF:
1038
- return (const char *)"&frac12;";
1039
- case VULGAR_FRACTION_THREE_QUARTERS:
1040
- return (const char *)"&frac34;";
1041
- case INVERTED_QUESTION_MARK:
1042
- return (const char *)"&iquest;";
1043
- case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
1044
- return (const char *)"&Agrave;";
1045
- case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
1046
- return (const char *)"&Aacute;";
1047
- case LATIN_CAPITAL_LETTER_A_WITH_BREVE:
1048
- return (const char *)"&Abreve;";
1049
- case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
1050
- return (const char *)"&Acirc;";
1051
- case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
1052
- return (const char *)"&Atilde;";
1053
- case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
1054
- return (const char *)"&Auml;";
1055
- case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
1056
- return (const char *)"&Aring;";
1057
- case LATIN_CAPITAL_LETTER_AE:
1058
- return (const char *)"&AElig;";
1059
- case LATIN_CAPITAL_LETTER_C_WITH_CARON:
1060
- return (const char *)"&Ccaron;";
1061
- case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
1062
- return (const char *)"&Ccedil;";
1063
- case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
1064
- return (const char *)"&Egrave;";
1065
- case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
1066
- return (const char *)"&Eacute;";
1067
- case LATIN_CAPITAL_LETTER_E_WITH_CARON:
1068
- return (const char *)"&Ecaron;";
1069
- case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
1070
- return (const char *)"&Ecirc;";
1071
- case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
1072
- return (const char *)"&Euml;";
1073
- case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
1074
- return (const char *)"&Igrave;";
1075
- case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
1076
- return (const char *)"&Iacute;";
1077
- case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
1078
- return (const char *)"&Icirc;";
1079
- case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
1080
- return (const char *)"&Iuml;";
1081
- case LATIN_CAPITAL_LETTER_ETH:
1082
- return (const char *)"&ETH;";
1083
- case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
1084
- return (const char *)"&Ntilde;";
1085
- case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
1086
- return (const char *)"&Ograve;";
1087
- case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
1088
- return (const char *)"&Oacute;";
1089
- case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
1090
- return (const char *)"&Ocirc;";
1091
- case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
1092
- return (const char *)"&Otilde;";
1093
- case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
1094
- return (const char *)"&Ouml;";
1095
- case MULTIPLICATION_SIGN:
1096
- return (const char *)"&times";
1097
- case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
1098
- return (const char *)"&Oslash;";
1099
- case LATIN_CAPITAL_LETTER_S_WITH_CARON:
1100
- return (const char *)"&Scaron;";
1101
- case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
1102
- return (const char *)"&Ugrave;";
1103
- case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
1104
- return (const char *)"&Uacute;";
1105
- case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
1106
- return (const char *)"&Ucirc;";
1107
- case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
1108
- return (const char *)"&Uuml;";
1109
- case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
1110
- return (const char *)"&Yacute;";
1111
- case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
1112
- return (const char *)"&Zcaron;";
1113
- case LATIN_CAPITAL_LETTER_THORN:
1114
- return (const char *)"&THORN;";
1115
- case LATIN_SMALL_LETTER_SHARP_S:
1116
- return (const char *)"&szlig;";
1117
- case LATIN_SMALL_LETTER_A_WITH_GRAVE:
1118
- return (const char *)"&agrave;";
1119
- case LATIN_SMALL_LETTER_A_WITH_ACUTE:
1120
- return (const char *)"&aacute;";
1121
- case LATIN_SMALL_LETTER_A_WITH_BREVE:
1122
- return (const char *)"&abreve;";
1123
- case LATIN_SMALL_LETTER_A_WITH_CARON:
1124
- return (const char *)"&acaron;";
1125
- case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
1126
- return (const char *)"&acirc;";
1127
- case LATIN_SMALL_LETTER_A_WITH_TILDE:
1128
- return (const char *)"&atilde;";
1129
- case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
1130
- return (const char *)"&auml;";
1131
- case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
1132
- return (const char *)"&aring;";
1133
- case LATIN_SMALL_LETTER_AE:
1134
- return (const char *)"&aelig;";
1135
- case LATIN_SMALL_LETTER_C_WITH_CARON:
1136
- return (const char *)"&ccaron;";
1137
- case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
1138
- return (const char *)"&ccedil;";
1139
- case LATIN_SMALL_LETTER_E_WITH_GRAVE:
1140
- return (const char *)"&egrave;";
1141
- case LATIN_SMALL_LETTER_E_WITH_ACUTE:
1142
- return (const char *)"&eacute;";
1143
- case LATIN_SMALL_LETTER_E_WITH_CARON:
1144
- return (const char *)"&ecaron;";
1145
- case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
1146
- return (const char *)"&ecirc;";
1147
- case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
1148
- return (const char *)"&euml;";
1149
- case LATIN_SMALL_LETTER_I_WITH_GRAVE:
1150
- return (const char *)"&igrave;";
1151
- case LATIN_SMALL_LETTER_I_WITH_ACUTE:
1152
- return (const char *)"&iacute;";
1153
- case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
1154
- return (const char *)"&icirc;";
1155
- case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
1156
- return (const char *)"&iuml;";
1157
- case LATIN_SMALL_LETTER_ETH:
1158
- return (const char *)"&eth;";
1159
- case LATIN_SMALL_LETTER_N_WITH_TILDE:
1160
- return (const char *)"&ntilde;";
1161
- case LATIN_SMALL_LETTER_O_WITH_GRAVE:
1162
- return (const char *)"&ograve;";
1163
- case LATIN_SMALL_LETTER_O_WITH_ACUTE:
1164
- return (const char *)"&oacute;";
1165
- case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
1166
- return (const char *)"&ocirc;";
1167
- case LATIN_SMALL_LETTER_O_WITH_TILDE:
1168
- return (const char *)"&otilde;";
1169
- case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
1170
- return (const char *)"&ouml;";
1171
- case DIVISION_SIGN:
1172
- return (const char *)"&divide;";
1173
- case LATIN_SMALL_LETTER_O_WITH_STROKE:
1174
- return (const char *)"&oslash;";
1175
- case LATIN_SMALL_LETTER_S_WITH_CARON:
1176
- return (const char *)"&scaron;";
1177
- case LATIN_SMALL_LETTER_U_WITH_GRAVE:
1178
- return (const char *)"&ugrave;";
1179
- case LATIN_SMALL_LETTER_U_WITH_ACUTE:
1180
- return (const char *)"&uacute;";
1181
- case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
1182
- return (const char *)"&ucirc;";
1183
- case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
1184
- return (const char *)"&uuml;";
1185
- case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
1186
- return (const char *)"&yacute;";
1187
- case LATIN_SMALL_LETTER_THORN:
1188
- return (const char *)"&thorn;";
1189
- case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
1190
- return (const char *)"&yuml;";
1191
- case LATIN_SMALL_LETTER_Z_WITH_CARON:
1192
- return (const char *)"&zcaron;";
1193
- case EURO_CURRENCY_SIGN:
1194
- return (const char *)"&euro;";
1195
- case 0:
1196
- return (const char *)"";
1197
- default:
1198
- sprintf(buf,"&#%u;",(unsigned)c);
1199
- return buf; /* undefined */
1200
- }
1201
- /* break; unreachable code */
1202
- case XML: /* only 5 &xxx;-ENTITIES ar defined by default */
1203
- if ( c >= SPACE && c <= TILDE ) { /* ASCII */
1204
- switch (c) {
1205
- case '&':
1206
- return (const char *)"&amp;";
1207
- case '\'':
1208
- return (const char *)"&apos;";
1209
- case '"':
1210
- return (const char *)"&quot;";
1211
- case '<':
1212
- return (const char *)"&lt;";
1213
- case '>':
1214
- return (const char *)"&gt;";
1215
- }
1216
- buf[0] = (char)c;
1217
- return buf;
1218
- }
1219
- switch (c) { /* subject of change! */
1220
- case PICTURE:
1221
- return (const char *)"(PICTURE)";
1222
- case UNKNOWN:
1223
- return (const char *)"_"; /* better use colored symbol? */
1224
- case LINE_FEED: /* \n handled somwhere else? */
1225
- case FORM_FEED:
1226
- case CARRIAGE_RETURN:
1227
- return (const char *)"<br />";
1228
- case NO_BREAK_SPACE:
1229
- return (const char *)"<nobr />";
1230
- case 0:
1231
- return (const char *)"";
1232
- default:
1233
- sprintf(buf,"&#x%03x;",(unsigned)c);
1234
- return buf; /* undefined */
1235
- }
1236
- /* break; unreachable code */
1237
- case SGML:
1238
- switch (c) {
1239
- default:
1240
- sprintf(buf,"&#%u;",(unsigned)c);
1241
- return buf; /* UNDEFINED */
1242
- }
1243
- /* break; unreachable code */
1244
- case ASCII: /* mainly used for debugging */
1245
- if ( c=='\n' || (c>= 0x20 && c <= 0x7F) ) {
1246
- buf[0] = (char)c;
1247
- return buf;
1248
- }
1249
- switch (c) {
1250
- /* extra */
1251
- case UNKNOWN:
1252
- return (const char *)"(?)";
1253
- case PICTURE:
1254
- return (const char *)"(?)";
1255
-
1256
- default:
1257
- /* snprintf seems to be no standard, so I use insecure sprintf */
1258
- if ((unsigned)c>255) sprintf(buf,"(0x%04x)",(unsigned)c);
1259
- else sprintf(buf,"(0x%02x)",(unsigned)c);
1260
- return buf; /* UNDEFINED; */
1261
- }
1262
- /* break; unreachable code */
1263
- default: /* use UTF8 as default, test with xterm -u8 */
1264
- /* extra */
1265
- if ( c == UNKNOWN ) return (const char *)"_";
1266
- if ( c == PICTURE ) return (const char *)"_"; /* Due to Mobile OCR */
1267
- if ( c <= (wchar_t)0x0000007F ) { /* UTF8 == 7bit ASCII */
1268
- buf[0] = (char)c;
1269
- return buf;
1270
- }
1271
- if ( c <= (wchar_t)0x000007FF ) { /* UTF8 == 11bit */
1272
- buf[0] = (char)(0xc0|((c>> 6) & 0x1f)); /* 110xxxxx */
1273
- buf[1] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1274
- buf[2] = (char)0; /* terminate string */
1275
- return buf;
1276
- }
1277
- /* wchar_t is 16bit for Borland-C !? Jan07 */
1278
- if ( c <= (wchar_t)0x0000FFFF ) { /* UTF8 == 16bit */
1279
- buf[0] = (char)(0xe0|((c>>12) & 0x0f)); /* 1110xxxx */
1280
- buf[1] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1281
- buf[2] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1282
- buf[3] = (char)0; /* terminate string */
1283
- return buf;
1284
- }
1285
- if ( c <= (wchar_t)0x001FFFFF ) { /* UTF8 == 21bit */
1286
- buf[0] = (char)(0xf0|((c>>18) & 0x07)); /* 11110xxx */
1287
- buf[1] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1288
- buf[2] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1289
- buf[3] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1290
- buf[4] = (char)0; /* terminate string */
1291
- return buf;
1292
- }
1293
- if ( c <= (wchar_t)0x03FFFFFF ) { /* UTF8 == 26bit */
1294
- buf[0] = (char)(0xf8|((c>>24) & 0x03)); /* 111110xx */
1295
- buf[1] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1296
- buf[2] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1297
- buf[3] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1298
- buf[4] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1299
- buf[5] = (char)0; /* terminate string */
1300
- return buf;
1301
- }
1302
- if ( c <= (wchar_t)0x7FFFFFFF ) { /* UTF8 == 31bit */
1303
- buf[0] = (char)(0xfc|((c>>30) & 0x01)); /* 1111110x */
1304
- buf[1] = (char)(0x80|((c>>24) & 0x3f)); /* 10xxxxxx */
1305
- buf[2] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1306
- buf[3] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1307
- buf[4] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1308
- buf[5] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1309
- buf[6] = (char)0; /* terminate string */
1310
- return buf;
1311
- }
1312
- return (const char *)UNDEFINED;
1313
- }
1314
- }