isbn 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. data/{README → README.md} +5 -11
  2. data/Rakefile +20 -14
  3. data/isbn.gemspec +23 -0
  4. data/lib/isbn.rb +2 -0
  5. data/test/isbn_spec.rb +1 -1
  6. metadata +29 -316
  7. data/VERSION +0 -1
  8. data/src/gocr-0.48/.cvsignore +0 -6
  9. data/src/gocr-0.48/AUTHORS +0 -7
  10. data/src/gocr-0.48/BUGS +0 -55
  11. data/src/gocr-0.48/CREDITS +0 -17
  12. data/src/gocr-0.48/HISTORY +0 -243
  13. data/src/gocr-0.48/INSTALL +0 -83
  14. data/src/gocr-0.48/Makefile +0 -193
  15. data/src/gocr-0.48/Makefile.in +0 -193
  16. data/src/gocr-0.48/README +0 -165
  17. data/src/gocr-0.48/READMEde.txt +0 -80
  18. data/src/gocr-0.48/REMARK.txt +0 -18
  19. data/src/gocr-0.48/REVIEW +0 -538
  20. data/src/gocr-0.48/TODO +0 -65
  21. data/src/gocr-0.48/bin/.cvsignore +0 -2
  22. data/src/gocr-0.48/bin/create_db +0 -38
  23. data/src/gocr-0.48/bin/gocr.tcl +0 -527
  24. data/src/gocr-0.48/bin/gocr_chk.sh +0 -44
  25. data/src/gocr-0.48/configure +0 -4689
  26. data/src/gocr-0.48/configure.in +0 -71
  27. data/src/gocr-0.48/doc/.#Makefile.1.6 +0 -39
  28. data/src/gocr-0.48/doc/.cvsignore +0 -2
  29. data/src/gocr-0.48/doc/Makefile +0 -39
  30. data/src/gocr-0.48/doc/Makefile.in +0 -39
  31. data/src/gocr-0.48/doc/example.dtd +0 -53
  32. data/src/gocr-0.48/doc/example.xml +0 -21
  33. data/src/gocr-0.48/doc/examples.txt +0 -67
  34. data/src/gocr-0.48/doc/gocr.html +0 -578
  35. data/src/gocr-0.48/doc/unicode.txt +0 -57
  36. data/src/gocr-0.48/examples/.#Makefile.1.22 +0 -166
  37. data/src/gocr-0.48/examples/4x6.png +0 -0
  38. data/src/gocr-0.48/examples/4x6.txt +0 -2
  39. data/src/gocr-0.48/examples/5x7.png +0 -0
  40. data/src/gocr-0.48/examples/5x7.png.txt +0 -2
  41. data/src/gocr-0.48/examples/5x8.png +0 -0
  42. data/src/gocr-0.48/examples/5x8.png.txt +0 -2
  43. data/src/gocr-0.48/examples/Makefile +0 -166
  44. data/src/gocr-0.48/examples/color.fig +0 -20
  45. data/src/gocr-0.48/examples/ex.fig +0 -16
  46. data/src/gocr-0.48/examples/font.tex +0 -22
  47. data/src/gocr-0.48/examples/font1.tex +0 -46
  48. data/src/gocr-0.48/examples/font2.fig +0 -27
  49. data/src/gocr-0.48/examples/font_nw.tex +0 -24
  50. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  51. data/src/gocr-0.48/examples/handwrt1.txt +0 -10
  52. data/src/gocr-0.48/examples/inverse.fig +0 -20
  53. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  54. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +0 -4
  56. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-a.txt +0 -6
  58. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  59. data/src/gocr-0.48/examples/ocr-b.png.txt +0 -4
  60. data/src/gocr-0.48/examples/polish.tex +0 -28
  61. data/src/gocr-0.48/examples/rotate45.fig +0 -14
  62. data/src/gocr-0.48/examples/score +0 -36
  63. data/src/gocr-0.48/examples/text.tex +0 -28
  64. data/src/gocr-0.48/gpl.html +0 -537
  65. data/src/gocr-0.48/include/.cvsignore +0 -2
  66. data/src/gocr-0.48/include/config.h +0 -36
  67. data/src/gocr-0.48/include/config.h.in +0 -36
  68. data/src/gocr-0.48/include/version.h +0 -2
  69. data/src/gocr-0.48/install-sh +0 -3
  70. data/src/gocr-0.48/make.bat +0 -57
  71. data/src/gocr-0.48/man/.cvsignore +0 -2
  72. data/src/gocr-0.48/man/Makefile +0 -29
  73. data/src/gocr-0.48/man/Makefile.in +0 -29
  74. data/src/gocr-0.48/man/man1/gocr.1 +0 -166
  75. data/src/gocr-0.48/src/.cvsignore +0 -4
  76. data/src/gocr-0.48/src/Makefile +0 -132
  77. data/src/gocr-0.48/src/Makefile.in +0 -132
  78. data/src/gocr-0.48/src/amiga.h +0 -31
  79. data/src/gocr-0.48/src/barcode.c +0 -846
  80. data/src/gocr-0.48/src/barcode.c.orig +0 -593
  81. data/src/gocr-0.48/src/barcode.h +0 -11
  82. data/src/gocr-0.48/src/box.c +0 -372
  83. data/src/gocr-0.48/src/database.c +0 -462
  84. data/src/gocr-0.48/src/detect.c +0 -943
  85. data/src/gocr-0.48/src/gocr.c +0 -373
  86. data/src/gocr-0.48/src/gocr.h +0 -288
  87. data/src/gocr-0.48/src/jconv.c +0 -168
  88. data/src/gocr-0.48/src/job.c +0 -84
  89. data/src/gocr-0.48/src/lines.c +0 -350
  90. data/src/gocr-0.48/src/list.c +0 -334
  91. data/src/gocr-0.48/src/list.h +0 -90
  92. data/src/gocr-0.48/src/ocr0.c +0 -6756
  93. data/src/gocr-0.48/src/ocr0.h +0 -63
  94. data/src/gocr-0.48/src/ocr0n.c +0 -1475
  95. data/src/gocr-0.48/src/ocr1.c +0 -85
  96. data/src/gocr-0.48/src/ocr1.h +0 -3
  97. data/src/gocr-0.48/src/otsu.c +0 -289
  98. data/src/gocr-0.48/src/otsu.h +0 -23
  99. data/src/gocr-0.48/src/output.c +0 -289
  100. data/src/gocr-0.48/src/output.h +0 -37
  101. data/src/gocr-0.48/src/pcx.c +0 -153
  102. data/src/gocr-0.48/src/pcx.h +0 -9
  103. data/src/gocr-0.48/src/pgm2asc.c +0 -2893
  104. data/src/gocr-0.48/src/pgm2asc.h +0 -105
  105. data/src/gocr-0.48/src/pixel.c +0 -537
  106. data/src/gocr-0.48/src/pnm.c +0 -533
  107. data/src/gocr-0.48/src/pnm.h +0 -35
  108. data/src/gocr-0.48/src/progress.c +0 -87
  109. data/src/gocr-0.48/src/progress.h +0 -42
  110. data/src/gocr-0.48/src/remove.c +0 -703
  111. data/src/gocr-0.48/src/tga.c +0 -87
  112. data/src/gocr-0.48/src/tga.h +0 -6
  113. data/src/gocr-0.48/src/unicode.c +0 -1314
  114. data/src/gocr-0.48/src/unicode.h +0 -1257
  115. data/src/jpeg-7/Makefile.am +0 -133
  116. data/src/jpeg-7/Makefile.in +0 -1089
  117. data/src/jpeg-7/README +0 -322
  118. data/src/jpeg-7/aclocal.m4 +0 -8990
  119. data/src/jpeg-7/ansi2knr.1 +0 -36
  120. data/src/jpeg-7/ansi2knr.c +0 -739
  121. data/src/jpeg-7/cderror.h +0 -132
  122. data/src/jpeg-7/cdjpeg.c +0 -181
  123. data/src/jpeg-7/cdjpeg.h +0 -187
  124. data/src/jpeg-7/change.log +0 -270
  125. data/src/jpeg-7/cjpeg.1 +0 -325
  126. data/src/jpeg-7/cjpeg.c +0 -616
  127. data/src/jpeg-7/ckconfig.c +0 -402
  128. data/src/jpeg-7/coderules.txt +0 -118
  129. data/src/jpeg-7/config.guess +0 -1561
  130. data/src/jpeg-7/config.sub +0 -1686
  131. data/src/jpeg-7/configure +0 -17139
  132. data/src/jpeg-7/configure.ac +0 -317
  133. data/src/jpeg-7/depcomp +0 -630
  134. data/src/jpeg-7/djpeg.1 +0 -251
  135. data/src/jpeg-7/djpeg.c +0 -617
  136. data/src/jpeg-7/example.c +0 -433
  137. data/src/jpeg-7/filelist.txt +0 -215
  138. data/src/jpeg-7/install-sh +0 -520
  139. data/src/jpeg-7/install.txt +0 -1097
  140. data/src/jpeg-7/jaricom.c +0 -148
  141. data/src/jpeg-7/jcapimin.c +0 -282
  142. data/src/jpeg-7/jcapistd.c +0 -161
  143. data/src/jpeg-7/jcarith.c +0 -921
  144. data/src/jpeg-7/jccoefct.c +0 -453
  145. data/src/jpeg-7/jccolor.c +0 -459
  146. data/src/jpeg-7/jcdctmgr.c +0 -482
  147. data/src/jpeg-7/jchuff.c +0 -1612
  148. data/src/jpeg-7/jcinit.c +0 -65
  149. data/src/jpeg-7/jcmainct.c +0 -293
  150. data/src/jpeg-7/jcmarker.c +0 -667
  151. data/src/jpeg-7/jcmaster.c +0 -770
  152. data/src/jpeg-7/jcomapi.c +0 -106
  153. data/src/jpeg-7/jconfig.bcc +0 -48
  154. data/src/jpeg-7/jconfig.cfg +0 -45
  155. data/src/jpeg-7/jconfig.dj +0 -38
  156. data/src/jpeg-7/jconfig.mac +0 -43
  157. data/src/jpeg-7/jconfig.manx +0 -43
  158. data/src/jpeg-7/jconfig.mc6 +0 -52
  159. data/src/jpeg-7/jconfig.sas +0 -43
  160. data/src/jpeg-7/jconfig.st +0 -42
  161. data/src/jpeg-7/jconfig.txt +0 -155
  162. data/src/jpeg-7/jconfig.vc +0 -45
  163. data/src/jpeg-7/jconfig.vms +0 -37
  164. data/src/jpeg-7/jconfig.wat +0 -38
  165. data/src/jpeg-7/jcparam.c +0 -632
  166. data/src/jpeg-7/jcprepct.c +0 -358
  167. data/src/jpeg-7/jcsample.c +0 -545
  168. data/src/jpeg-7/jctrans.c +0 -381
  169. data/src/jpeg-7/jdapimin.c +0 -396
  170. data/src/jpeg-7/jdapistd.c +0 -275
  171. data/src/jpeg-7/jdarith.c +0 -762
  172. data/src/jpeg-7/jdatadst.c +0 -151
  173. data/src/jpeg-7/jdatasrc.c +0 -212
  174. data/src/jpeg-7/jdcoefct.c +0 -736
  175. data/src/jpeg-7/jdcolor.c +0 -396
  176. data/src/jpeg-7/jdct.h +0 -393
  177. data/src/jpeg-7/jddctmgr.c +0 -382
  178. data/src/jpeg-7/jdhuff.c +0 -1309
  179. data/src/jpeg-7/jdinput.c +0 -384
  180. data/src/jpeg-7/jdmainct.c +0 -512
  181. data/src/jpeg-7/jdmarker.c +0 -1360
  182. data/src/jpeg-7/jdmaster.c +0 -663
  183. data/src/jpeg-7/jdmerge.c +0 -400
  184. data/src/jpeg-7/jdpostct.c +0 -290
  185. data/src/jpeg-7/jdsample.c +0 -361
  186. data/src/jpeg-7/jdtrans.c +0 -136
  187. data/src/jpeg-7/jerror.c +0 -252
  188. data/src/jpeg-7/jerror.h +0 -304
  189. data/src/jpeg-7/jfdctflt.c +0 -174
  190. data/src/jpeg-7/jfdctfst.c +0 -230
  191. data/src/jpeg-7/jfdctint.c +0 -4348
  192. data/src/jpeg-7/jidctflt.c +0 -242
  193. data/src/jpeg-7/jidctfst.c +0 -368
  194. data/src/jpeg-7/jidctint.c +0 -5137
  195. data/src/jpeg-7/jinclude.h +0 -91
  196. data/src/jpeg-7/jmemansi.c +0 -167
  197. data/src/jpeg-7/jmemdos.c +0 -638
  198. data/src/jpeg-7/jmemdosa.asm +0 -379
  199. data/src/jpeg-7/jmemmac.c +0 -289
  200. data/src/jpeg-7/jmemmgr.c +0 -1118
  201. data/src/jpeg-7/jmemname.c +0 -276
  202. data/src/jpeg-7/jmemnobs.c +0 -109
  203. data/src/jpeg-7/jmemsys.h +0 -198
  204. data/src/jpeg-7/jmorecfg.h +0 -369
  205. data/src/jpeg-7/jpegint.h +0 -395
  206. data/src/jpeg-7/jpeglib.h +0 -1135
  207. data/src/jpeg-7/jpegtran.1 +0 -272
  208. data/src/jpeg-7/jpegtran.c +0 -546
  209. data/src/jpeg-7/jquant1.c +0 -856
  210. data/src/jpeg-7/jquant2.c +0 -1310
  211. data/src/jpeg-7/jutils.c +0 -179
  212. data/src/jpeg-7/jversion.h +0 -14
  213. data/src/jpeg-7/libjpeg.map +0 -4
  214. data/src/jpeg-7/libjpeg.txt +0 -3067
  215. data/src/jpeg-7/ltmain.sh +0 -8406
  216. data/src/jpeg-7/makcjpeg.st +0 -36
  217. data/src/jpeg-7/makdjpeg.st +0 -36
  218. data/src/jpeg-7/makeadsw.vc6 +0 -77
  219. data/src/jpeg-7/makeasln.vc9 +0 -33
  220. data/src/jpeg-7/makecdep.vc6 +0 -82
  221. data/src/jpeg-7/makecdsp.vc6 +0 -130
  222. data/src/jpeg-7/makecmak.vc6 +0 -159
  223. data/src/jpeg-7/makecvcp.vc9 +0 -186
  224. data/src/jpeg-7/makeddep.vc6 +0 -82
  225. data/src/jpeg-7/makeddsp.vc6 +0 -130
  226. data/src/jpeg-7/makedmak.vc6 +0 -159
  227. data/src/jpeg-7/makedvcp.vc9 +0 -186
  228. data/src/jpeg-7/makefile.ansi +0 -220
  229. data/src/jpeg-7/makefile.bcc +0 -291
  230. data/src/jpeg-7/makefile.dj +0 -226
  231. data/src/jpeg-7/makefile.manx +0 -220
  232. data/src/jpeg-7/makefile.mc6 +0 -255
  233. data/src/jpeg-7/makefile.mms +0 -224
  234. data/src/jpeg-7/makefile.sas +0 -258
  235. data/src/jpeg-7/makefile.unix +0 -234
  236. data/src/jpeg-7/makefile.vc +0 -217
  237. data/src/jpeg-7/makefile.vms +0 -142
  238. data/src/jpeg-7/makefile.wat +0 -239
  239. data/src/jpeg-7/makejdep.vc6 +0 -423
  240. data/src/jpeg-7/makejdsp.vc6 +0 -285
  241. data/src/jpeg-7/makejdsw.vc6 +0 -29
  242. data/src/jpeg-7/makejmak.vc6 +0 -425
  243. data/src/jpeg-7/makejsln.vc9 +0 -17
  244. data/src/jpeg-7/makejvcp.vc9 +0 -328
  245. data/src/jpeg-7/makeproj.mac +0 -213
  246. data/src/jpeg-7/makerdep.vc6 +0 -6
  247. data/src/jpeg-7/makerdsp.vc6 +0 -78
  248. data/src/jpeg-7/makermak.vc6 +0 -110
  249. data/src/jpeg-7/makervcp.vc9 +0 -133
  250. data/src/jpeg-7/maketdep.vc6 +0 -43
  251. data/src/jpeg-7/maketdsp.vc6 +0 -122
  252. data/src/jpeg-7/maketmak.vc6 +0 -131
  253. data/src/jpeg-7/maketvcp.vc9 +0 -178
  254. data/src/jpeg-7/makewdep.vc6 +0 -6
  255. data/src/jpeg-7/makewdsp.vc6 +0 -78
  256. data/src/jpeg-7/makewmak.vc6 +0 -110
  257. data/src/jpeg-7/makewvcp.vc9 +0 -133
  258. data/src/jpeg-7/makljpeg.st +0 -68
  259. data/src/jpeg-7/maktjpeg.st +0 -30
  260. data/src/jpeg-7/makvms.opt +0 -4
  261. data/src/jpeg-7/missing +0 -376
  262. data/src/jpeg-7/rdbmp.c +0 -439
  263. data/src/jpeg-7/rdcolmap.c +0 -253
  264. data/src/jpeg-7/rdgif.c +0 -38
  265. data/src/jpeg-7/rdjpgcom.1 +0 -63
  266. data/src/jpeg-7/rdjpgcom.c +0 -515
  267. data/src/jpeg-7/rdppm.c +0 -459
  268. data/src/jpeg-7/rdrle.c +0 -387
  269. data/src/jpeg-7/rdswitch.c +0 -365
  270. data/src/jpeg-7/rdtarga.c +0 -500
  271. data/src/jpeg-7/structure.txt +0 -945
  272. data/src/jpeg-7/testimg.bmp +0 -0
  273. data/src/jpeg-7/testimg.jpg +0 -0
  274. data/src/jpeg-7/testimg.ppm +0 -4
  275. data/src/jpeg-7/testimgp.jpg +0 -0
  276. data/src/jpeg-7/testorig.jpg +0 -0
  277. data/src/jpeg-7/testprog.jpg +0 -0
  278. data/src/jpeg-7/transupp.c +0 -1533
  279. data/src/jpeg-7/transupp.h +0 -205
  280. data/src/jpeg-7/usage.txt +0 -605
  281. data/src/jpeg-7/wizard.txt +0 -211
  282. data/src/jpeg-7/wrbmp.c +0 -442
  283. data/src/jpeg-7/wrgif.c +0 -399
  284. data/src/jpeg-7/wrjpgcom.1 +0 -103
  285. data/src/jpeg-7/wrjpgcom.c +0 -583
  286. data/src/jpeg-7/wrppm.c +0 -269
  287. data/src/jpeg-7/wrrle.c +0 -305
  288. data/src/jpeg-7/wrtarga.c +0 -253
@@ -1,87 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 1999 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
- */
21
-
22
- #include <stdio.h>
23
- #include <stdlib.h>
24
- #include <assert.h>
25
-
26
- #include "tga.h"
27
-
28
- typedef unsigned char byte;
29
-
30
- // --- needed for reading TGA-files
31
- #if 0
32
- char read_b(FILE *f1){ // filter #-comments
33
- char c;
34
- c=fgetc(f1); assert(!feof(f1)); assert(!ferror(f1));
35
- return c;
36
- }
37
- #endif
38
-
39
- //byte tga[18]={ 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,24,32};
40
- /* header_hex= 00 00 02 00 00 00 00 00 00 00 00 00 xl xh yl yh
41
- * 18 20 -- -- -- -- -- -- -- -- -- -- -- -- -- -- */
42
-
43
- void readtga(char *name,pix *p,int mode){ // see pcx.format.txt
44
- // mode: 0=gray,1=RGB
45
- int nx,ny,i,x,y;
46
- FILE *f1;
47
- unsigned char *pic,h[18];
48
-
49
- f1=fopen(name,"rb"); if(!f1) fprintf(stderr," error opening file\n");
50
- assert(f1); // open-error
51
- assert(fread(h,1,18,f1)==18); /* 18 Byte lesen -> h[] */
52
- assert(h[ 0]== 0); // TGA0
53
- assert(h[ 1]== 0); // TGA1
54
- assert(h[ 2]== 2); // TGA2 no run length encoding
55
- for(i=3;i<12;i++)
56
- assert(h[ i]== 0); // ???
57
- assert(h[16]==0x18); // TGA16
58
- assert(h[17]==0x20); // TGA17
59
- nx = h[12] + (h[13]<<8); /* x-dimension low high */
60
- ny = h[14] + (h[15]<<8); /* y-dimension low high */
61
- fprintf(stderr,"# TGA version=%d x=%d y=%d", h[2],nx,ny );
62
- fflush(stdout);
63
- pic=(unsigned char *)malloc( 3*nx*ny );
64
- assert(pic!=NULL); // no memory
65
- assert(ny==(int)fread(pic,3*nx,ny,f1)); // read all lines BGR
66
- if(mode==0)
67
- {
68
- for(y=0;y<ny;y++) /* BGR => gray */
69
- for(x=0;x<nx;x++)
70
- { i=x+y*nx; pic[i]=(pic[i*3+0]+pic[i*3+1]+pic[i*3+2])/3; }
71
- }
72
- else
73
- if(mode==1)
74
- {
75
- byte b;
76
- for(y=0;y<ny;y++) /* BGR => RGB */
77
- for(x=0;x<nx;x++)
78
- { i=x+y*nx; b=pic[i*3+0]; pic[i*3+0]=pic[i*3+2]; pic[i*3+2]=b; }
79
- }
80
- else assert(0); // wrong mode
81
- fclose(f1);
82
- p->p=pic; p->x=nx; p->y=ny; p->bpp=1+2*mode;
83
- fprintf(stderr," mode=%d\n",mode);
84
- }
85
-
86
- // ------------------------------------------------------------------------
87
-
@@ -1,6 +0,0 @@
1
-
2
- #include "pnm.h"
3
-
4
- void readtga(char *name,pix *p,int mode); // mode: 0=gray 1=RGB
5
-
6
- // ------------------------------------------------------------------------
@@ -1,1314 +0,0 @@
1
- /*
2
- This is a Optical-Character-Recognition program
3
- Copyright (C) 2000-2007 Joerg Schulenburg
4
-
5
- This program is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU General Public License
7
- as published by the Free Software Foundation; either version 2
8
- of the License, or (at your option) any later version.
9
-
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU General Public License for more details.
14
-
15
- You should have received a copy of the GNU General Public License
16
- along with this program; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
-
19
- see README for EMAIL-address
20
- */
21
-
22
- #include "unicode.h"
23
- #include <stdio.h>
24
-
25
- /* FIXME jb global */
26
- int warn=0; /* if 1 a message is generated if composition is not defined */
27
-
28
- /* Arguments: the character (main), and the modifier (accent, etc). See the
29
- function if you want to know the modifiers.
30
- Description: This function intends to be a small helper, to avoid having
31
- to write switches in functions. It's therefore mainly to accents, and
32
- specially for the most usual ones. It supports the basic greek
33
- characters too, which is actually not very helpful.
34
- Returns: the unicode character corresponding to the composed character.
35
-
36
- ToDo:
37
- - It seems to me, that tables should be more effectiv.
38
- So we should use tables in future? (js)
39
- */
40
- wchar_t compose(wchar_t main, wchar_t modifier) {
41
- /* supported by now: part of ISO8859-1, basic greek characters */
42
- if( main == UNKNOWN || main == PICTURE ) return main;
43
- #ifdef DEBUG
44
- if(modifier!=UNICODE_NULL && modifier!=SPACE)
45
- printf(" compose(%c,%d)",(char)main,(int)modifier);
46
- #endif
47
- if(main>127 && modifier!=0 && modifier!=SPACE && warn)
48
- fprintf(stderr,"# Warning compose %04x + %04x>127\n",
49
- (int)modifier,(int)main);
50
- switch (modifier) {
51
- case UNICODE_NULL:
52
- case SPACE:
53
- return (wchar_t)main;
54
-
55
- case APOSTROPHE: /* do NOT USE this. It's here for compatibility only.
56
- Use ACUTE_ACCENT instead. */
57
- fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
58
-
59
- case ACUTE_ACCENT: /* acute/cedilla */
60
- switch (main) {
61
- case 'a': return LATIN_SMALL_LETTER_A_WITH_ACUTE;
62
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_ACUTE;
63
- case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_ACUTE;
64
- case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_ACUTE;
65
- case 'c': return LATIN_SMALL_LETTER_C_WITH_ACUTE;
66
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_ACUTE;
67
- case 'e': return LATIN_SMALL_LETTER_E_WITH_ACUTE;
68
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_ACUTE;
69
- case 'g': return LATIN_SMALL_LETTER_G_WITH_ACUTE;
70
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_ACUTE;
71
- case 'i': return LATIN_SMALL_LETTER_I_WITH_ACUTE;
72
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_ACUTE;
73
- case 'l': return LATIN_SMALL_LETTER_L_WITH_ACUTE;
74
- case 'L': return LATIN_CAPITAL_LETTER_L_WITH_ACUTE;
75
- case 'n': return LATIN_SMALL_LETTER_N_WITH_ACUTE;
76
- case 'N': return LATIN_CAPITAL_LETTER_N_WITH_ACUTE;
77
- case 'o': return LATIN_SMALL_LETTER_O_WITH_ACUTE;
78
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
79
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
80
- case 'r': return LATIN_SMALL_LETTER_R_WITH_ACUTE;
81
- case 'R': return LATIN_CAPITAL_LETTER_R_WITH_ACUTE;
82
- case 's': return LATIN_SMALL_LETTER_S_WITH_ACUTE;
83
- case 'S': return LATIN_CAPITAL_LETTER_S_WITH_ACUTE;
84
- case 'u': return LATIN_SMALL_LETTER_U_WITH_ACUTE;
85
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_ACUTE;
86
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_ACUTE;
87
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_ACUTE;
88
- case 'z': return LATIN_SMALL_LETTER_Z_WITH_ACUTE;
89
- case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_ACUTE;
90
- default:
91
- if(warn)fprintf( stderr, " COMPOSE: ACUTE_ACCENT+%04x not defined\n",(int)main);
92
- }
93
- break;
94
-
95
- case BREVE: /* caron (latin2) "u"-above-... (small bow) */
96
- switch (main) {
97
- /* FIXME write separate heuristics for breve */
98
- case 'a': return LATIN_SMALL_LETTER_A_WITH_BREVE;
99
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_BREVE;
100
- case 'e': return LATIN_SMALL_LETTER_E_WITH_BREVE;
101
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_BREVE;
102
- case 'g': return LATIN_SMALL_LETTER_G_WITH_BREVE;
103
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_BREVE;
104
- case 'i': return LATIN_SMALL_LETTER_I_WITH_BREVE;
105
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_BREVE;
106
- case 'o': return LATIN_SMALL_LETTER_O_WITH_BREVE;
107
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_BREVE;
108
- case 'u': return LATIN_SMALL_LETTER_U_WITH_BREVE;
109
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_BREVE;
110
- default:
111
- if(warn)fprintf( stderr, " COMPOSE: BREVE+%04x not defined\n",(int)main);
112
- }
113
- break;
114
-
115
- case CARON: /* caron (latin2) "v"-above-... */
116
- switch (main) {
117
- case 'a': return LATIN_SMALL_LETTER_A_WITH_CARON;
118
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CARON;
119
- case 'c': return LATIN_SMALL_LETTER_C_WITH_CARON;
120
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CARON;
121
- case 'e': return LATIN_SMALL_LETTER_E_WITH_CARON;
122
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CARON;
123
- case 'i': return LATIN_SMALL_LETTER_I_WITH_CARON;
124
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CARON;
125
- case 'o': return LATIN_SMALL_LETTER_O_WITH_CARON;
126
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
127
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
128
- case 's': return LATIN_SMALL_LETTER_S_WITH_CARON;
129
- case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CARON;
130
- case 'u': return LATIN_SMALL_LETTER_U_WITH_CARON;
131
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CARON;
132
- case 'z': return LATIN_SMALL_LETTER_Z_WITH_CARON;
133
- case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_CARON;
134
- default:
135
- if(warn)fprintf( stderr, " COMPOSE: CARON+%04x not defined\n",(int)main);
136
- }
137
- break;
138
-
139
- case CEDILLA:
140
- switch (main) {
141
- case 'c': return LATIN_SMALL_LETTER_C_WITH_CEDILLA;
142
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
143
- default:
144
- if(warn)fprintf( stderr, " COMPOSE: CEDILLA+%04x not defined\n",(int)main);
145
- }
146
- break;
147
-
148
- case TILDE:
149
- switch (main) {
150
- case 'a': return LATIN_SMALL_LETTER_A_WITH_TILDE;
151
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_TILDE;
152
- case 'i': return LATIN_SMALL_LETTER_I_WITH_TILDE;
153
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_TILDE;
154
- case 'n': return LATIN_SMALL_LETTER_N_WITH_TILDE;
155
- case 'N': return LATIN_CAPITAL_LETTER_N_WITH_TILDE;
156
- case 'o': return LATIN_SMALL_LETTER_O_WITH_TILDE;
157
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
158
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
159
- case 'u': return LATIN_SMALL_LETTER_U_WITH_TILDE;
160
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_TILDE;
161
- default:
162
- if(warn)fprintf( stderr, " COMPOSE: TILDE+%04x not defined\n",(int)main);
163
- }
164
- break;
165
-
166
- case GRAVE_ACCENT:
167
- switch (main) {
168
- case 'a': return LATIN_SMALL_LETTER_A_WITH_GRAVE;
169
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_GRAVE;
170
- case 'e': return LATIN_SMALL_LETTER_E_WITH_GRAVE;
171
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_GRAVE;
172
- case 'i': return LATIN_SMALL_LETTER_I_WITH_GRAVE;
173
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_GRAVE;
174
- case 'n': return LATIN_SMALL_LETTER_N_WITH_GRAVE;
175
- case 'N': return LATIN_CAPITAL_LETTER_N_WITH_GRAVE;
176
- case 'o': return LATIN_SMALL_LETTER_O_WITH_GRAVE;
177
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
178
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
179
- case 'u': return LATIN_SMALL_LETTER_U_WITH_GRAVE;
180
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_GRAVE;
181
- default:
182
- if(warn)fprintf( stderr, " COMPOSE: GRAVE_ACCENT+%04x not defined\n",(int)main);
183
- }
184
- break;
185
-
186
- case QUOTATION_MARK: /* do NOT USE this. It's here for compatibility only.
187
- Use DIAERESIS instead. */
188
- fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
189
-
190
- case DIAERESIS:
191
- switch (main) {
192
- case 'a': return LATIN_SMALL_LETTER_A_WITH_DIAERESIS;
193
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS;
194
- case 'e': return LATIN_SMALL_LETTER_E_WITH_DIAERESIS;
195
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS;
196
- case 'i': return LATIN_SMALL_LETTER_I_WITH_DIAERESIS;
197
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS;
198
- case 'o': return LATIN_SMALL_LETTER_O_WITH_DIAERESIS;
199
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
200
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
201
- case 'u': return LATIN_SMALL_LETTER_U_WITH_DIAERESIS;
202
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS;
203
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_DIAERESIS;
204
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
205
- default:
206
- if(warn)fprintf( stderr, " COMPOSE: DIAERESIS+%04x (%c) not defined\n",(int)main,(char)main);
207
- }
208
- break;
209
-
210
- case CIRCUMFLEX_ACCENT: /* ^ */
211
- switch (main) {
212
- case 'a': return LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX;
213
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX;
214
- case 'c': return LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX;
215
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX;
216
- case 'e': return LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX;
217
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX;
218
- case 'g': return LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX;
219
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX;
220
- case 'h': return LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX;
221
- case 'H': return LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX;
222
- case 'i': return LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX;
223
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX;
224
- case 'j': return LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX;
225
- case 'J': return LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX;
226
- case 'o': return LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX;
227
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
228
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
229
- case 's': return LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX;
230
- case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX;
231
- case 'u': return LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX;
232
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX;
233
- case 'w': return LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX;
234
- case 'W': return LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX;
235
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX;
236
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX;
237
- default:
238
- if(warn)fprintf( stderr, " COMPOSE: CIRCUMFLEX_ACCENT+%04x not defined\n",(int)main);
239
- }
240
- break;
241
-
242
- case MACRON: /* a minus sign above the char (latin2) */
243
- switch (main) {
244
- case 'a': return LATIN_SMALL_LETTER_A_WITH_MACRON;
245
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_MACRON;
246
- case 'e': return LATIN_SMALL_LETTER_E_WITH_MACRON;
247
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_MACRON;
248
- case 'i': return LATIN_SMALL_LETTER_I_WITH_MACRON;
249
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_MACRON;
250
- case 'o': return LATIN_SMALL_LETTER_O_WITH_MACRON;
251
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_MACRON;
252
- case 'u': return LATIN_SMALL_LETTER_U_WITH_MACRON;
253
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_MACRON;
254
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_MACRON;
255
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_MACRON;
256
- case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_MACRON;
257
- case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_MACRON;
258
- case '=': return IDENTICAL_TO;
259
- case '-': return '=';
260
- case ' ': return MODIFIER_LETTER_MACRON;
261
- default:
262
- if(warn)fprintf( stderr, " COMPOSE: MACRON+%04x not defined\n",(int)main);
263
- }
264
- break;
265
-
266
- case DOT_ABOVE: /* latin2 */
267
- switch (main) {
268
- case 'a': return LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE;
269
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE;
270
- case 'c': return LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE;
271
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE;
272
- case 'e': return LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE;
273
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE;
274
- case 'g': return LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE;
275
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE;
276
- case 'l': return 'i'; /* correct wrong recognition */
277
- case 'i': return 'i';
278
- case LATIN_SMALL_LETTER_DOTLESS_I: return 'i';
279
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
280
- case 'j': return 'j';
281
- case 'o': return LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE;
282
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE;
283
- case 'z': return LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE;
284
- case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE;
285
- case ',': return ';';
286
- case '.': return ':';
287
- default:
288
- if(warn)fprintf( stderr, " COMPOSE: DOT_ABOVE+%04x not defined\n",(int)main);
289
- }
290
- break;
291
-
292
- case RING_ABOVE:
293
- switch (main) {
294
- case 'a': return LATIN_SMALL_LETTER_A_WITH_RING_ABOVE;
295
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE;
296
- case 'u': return LATIN_SMALL_LETTER_U_WITH_RING_ABOVE;
297
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE;
298
- default:
299
- if(warn)fprintf( stderr, " COMPOSE: RING_ABOVE+%04x not defined\n",(int)main);
300
- }
301
- break;
302
-
303
- case 'e': /* e ligatures: ae, oe. */
304
- case 'E':
305
- switch (main) {
306
- case 'a': return LATIN_SMALL_LETTER_AE;
307
- case 'A': return LATIN_CAPITAL_LETTER_AE;
308
- case 'o': return LATIN_SMALL_LIGATURE_OE;
309
- case 'O': return LATIN_CAPITAL_LIGATURE_OE;
310
- case '0': return LATIN_CAPITAL_LIGATURE_OE;
311
- default:
312
- if(warn)fprintf( stderr, " COMPOSE: %04x+e/E not defined\n",(int)main);
313
- }
314
- break;
315
-
316
- case 'g': /* greek */
317
- switch (main) {
318
- /* missing 0x37A-0x390 */
319
- /* weird cases: Q -> theta (it resembles a little, doesn't it?)
320
- V -> psi (what can I do?) */
321
- case 'A': return GREEK_CAPITAL_LETTER_ALPHA;
322
- case 'B': return GREEK_CAPITAL_LETTER_BETA;
323
- case 'G': return GREEK_CAPITAL_LETTER_GAMMA;
324
- case 'D': return GREEK_CAPITAL_LETTER_DELTA;
325
- case 'E': return GREEK_CAPITAL_LETTER_EPSILON;
326
- case 'Z': return GREEK_CAPITAL_LETTER_ZETA;
327
- case 'H': return GREEK_CAPITAL_LETTER_ETA;
328
- case 'Q': return GREEK_CAPITAL_LETTER_THETA;
329
- case 'I': return GREEK_CAPITAL_LETTER_IOTA;
330
- case 'K': return GREEK_CAPITAL_LETTER_KAPPA;
331
- case 'L': return GREEK_CAPITAL_LETTER_LAMDA;
332
- case 'M': return GREEK_CAPITAL_LETTER_MU;
333
- case 'N': return GREEK_CAPITAL_LETTER_NU;
334
- case 'X': return GREEK_CAPITAL_LETTER_XI;
335
- case 'O': return GREEK_CAPITAL_LETTER_OMICRON;
336
- case 'P': return GREEK_CAPITAL_LETTER_PI;
337
- case 'R': return GREEK_CAPITAL_LETTER_RHO;
338
- case 'S': return GREEK_CAPITAL_LETTER_SIGMA;
339
- case 'T': return GREEK_CAPITAL_LETTER_TAU;
340
- case 'Y': return GREEK_CAPITAL_LETTER_UPSILON;
341
- case 'F': return GREEK_CAPITAL_LETTER_PHI;
342
- case 'C': return GREEK_CAPITAL_LETTER_CHI;
343
- case 'V': return GREEK_CAPITAL_LETTER_PSI;
344
- case 'W': return GREEK_CAPITAL_LETTER_OMEGA;
345
- /*
346
- case '': return GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA;
347
- case '': return GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA;
348
- case '': return GREEK_SMALL_LETTER_ALPHA_WITH_TONOS;
349
- case '': return GREEK_SMALL_LETTER_EPSILON_WITH_TONOS;
350
- case '': return GREEK_SMALL_LETTER_ETA_WITH_TONOS;
351
- case '': return GREEK_SMALL_LETTER_IOTA_WITH_TONOS;
352
- case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS;
353
- */
354
- case 'a': return GREEK_SMALL_LETTER_ALPHA;
355
- case 'b': return GREEK_SMALL_LETTER_BETA;
356
- case 'g': return GREEK_SMALL_LETTER_GAMMA;
357
- case 'd': return GREEK_SMALL_LETTER_DELTA;
358
- case 'e': return GREEK_SMALL_LETTER_EPSILON;
359
- case 'z': return GREEK_SMALL_LETTER_ZETA;
360
- case 'h': return GREEK_SMALL_LETTER_ETA;
361
- case 'q': return GREEK_SMALL_LETTER_THETA;
362
- case 'i': return GREEK_SMALL_LETTER_IOTA;
363
- case 'k': return GREEK_SMALL_LETTER_KAPPA;
364
- case 'l': return GREEK_SMALL_LETTER_LAMDA;
365
- case 'm': return GREEK_SMALL_LETTER_MU;
366
- case 'n': return GREEK_SMALL_LETTER_NU;
367
- case 'x': return GREEK_SMALL_LETTER_XI;
368
- case 'o': return GREEK_SMALL_LETTER_OMICRON;
369
- case 'p': return GREEK_SMALL_LETTER_PI;
370
- case 'r': return GREEK_SMALL_LETTER_RHO;
371
- case '&': return GREEK_SMALL_LETTER_FINAL_SIGMA;
372
- case 's': return GREEK_SMALL_LETTER_SIGMA;
373
- case 't': return GREEK_SMALL_LETTER_TAU;
374
- case 'y': return GREEK_SMALL_LETTER_UPSILON;
375
- case 'f': return GREEK_SMALL_LETTER_PHI;
376
- case 'c': return GREEK_SMALL_LETTER_CHI;
377
- case 'v': return GREEK_SMALL_LETTER_PSI;
378
- case 'w': return GREEK_SMALL_LETTER_OMEGA;
379
- /*
380
- case '': return GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA;
381
- case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA;
382
- case '': return GREEK_SMALL_LETTER_OMICRON_WITH_TONOS;
383
- case '': return GREEK_SMALL_LETTER_UPSILON_WITH_TONOS;
384
- case '': return GREEK_SMALL_LETTER_OMEGA_WITH_TONOS;
385
- case '': return GREEK_BETA_SYMBOL;
386
- case '': return GREEK_THETA_SYMBOL;
387
- case '': return GREEK_UPSILON_WITH_HOOK_SYMBOL;
388
- case '': return GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL;
389
- case '': return GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL;
390
- case '': return GREEK_PHI_SYMBOL;
391
- case '': return GREEK_PI_SYMBOL;
392
- */
393
- default:
394
- if(warn)fprintf( stderr, " COMPOSE: GREEK %04x not defined\n",(int)main);
395
- }
396
- break;
397
-
398
- default:
399
- fprintf( stderr, " COMPOSE: modifier %04x not defined\n",(int)modifier);
400
- }
401
- return (wchar_t)main;
402
- }
403
-
404
- #define UNDEFINED "~"
405
-
406
- /* Arguments: character in Unicode format, type of format to convert to.
407
- Returns: a string containing the Unicode character converted to the chosen
408
- format. This string is statically allocated and should not be freed.
409
- ToDo: better using tables?
410
- */
411
- const char *decode(wchar_t c, FORMAT type) {
412
- /* static char d; --- js: big bug (missing \0) if &d returned */
413
- /*FIXME jb static*/ static char bbuf[8*32]; /* space for 8 buffers, rotating */
414
- /*FIXME jb static*/ static char *buf=bbuf; /* used for UTF8 sequences and undefined codes */
415
- buf+=32; if(buf>=bbuf+8*32) buf=bbuf;
416
- buf[0]=buf[1]=buf[2]=0;
417
- switch (type) {
418
- case ISO8859_1:
419
- if ( c <= 0xFF ) { /* UNICODE == ISO8859-1 */
420
- buf[0] = (char)c;
421
- return buf;
422
- }
423
- switch (c) { /* not found in list, but perhaps we can describe it */
424
- /* todo: add greek. GREEK_SMALL_LETTER_ALPHA = alpha */
425
-
426
- /* general puctuation */
427
- case HYPHEN:
428
- return (const char *)"-";
429
- case FIGURE_DASH:
430
- case EN_DASH:
431
- return (const char *)"--";
432
- case EM_DASH:
433
- return (const char *)"---";
434
- case LEFT_SINGLE_QUOTATION_MARK:
435
- return (const char *)"`";
436
- case RIGHT_SINGLE_QUOTATION_MARK:
437
- return (const char *)"'";
438
- case SINGLE_LOW_9_QUOTATION_MARK:
439
- return (const char *)",";
440
- case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
441
- return (const char *)UNDEFINED;
442
- case LEFT_DOUBLE_QUOTATION_MARK:
443
- return (const char *)"``";
444
- case RIGHT_DOUBLE_QUOTATION_MARK:
445
- return (const char *)"''";
446
- case DOUBLE_LOW_9_QUOTATION_MARK:
447
- return (const char *)",,";
448
- case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
449
- return (const char *)UNDEFINED;
450
- case DAGGER:
451
- return (const char *)"+";
452
- case DOUBLE_DAGGER:
453
- return (const char *)"*";
454
- case BULLET:
455
- return (const char *)"*";
456
- case TRIANGULAR_BULLET:
457
- return (const char *)"*";
458
- case HYPHENATION_POINT:
459
- return (const char *)"-";
460
- case HORIZONTAL_ELLIPSIS:
461
- return (const char *)"...";
462
- case PER_MILLE_SIGN:
463
- return (const char *)"%%"; /* awk! */
464
- case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
465
- return (const char *)"<";
466
- case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
467
- return (const char *)">";
468
- case EURO_CURRENCY_SIGN:
469
- return (const char *)"EUR"; /* change it! */
470
-
471
- /* ligatures */
472
- case LATIN_SMALL_LIGATURE_FF:
473
- return (const char *)"ff";
474
- case LATIN_SMALL_LIGATURE_FI:
475
- return (const char *)"fi";
476
- case LATIN_SMALL_LIGATURE_FL:
477
- return (const char *)"fl";
478
- case LATIN_SMALL_LIGATURE_FFI:
479
- return (const char *)"ffi";
480
- case LATIN_SMALL_LIGATURE_FFL:
481
- return (const char *)"ffl";
482
- case LATIN_SMALL_LIGATURE_LONG_S_T:
483
- case LATIN_SMALL_LIGATURE_ST:
484
- return (const char *)"st";
485
-
486
- /* extra */
487
- case UNKNOWN:
488
- return (const char *)"_";
489
- case PICTURE:
490
- return (const char *)"_"; /* Due to Mobile OCR */
491
-
492
- default:
493
- /* snprintf seems to be no standard, so I use insecure sprintf */
494
- sprintf(buf,"\\code(%04x)",(unsigned)c);
495
- return buf; /* UNDEFINED; */
496
- }
497
- break;
498
- case TeX:
499
- if ( c >= SPACE && c <= TILDE ) { /* ASCII */
500
- switch (c) {
501
- case '$':
502
- return (const char *)"\\$";
503
- case '&':
504
- return (const char *)"\\&";
505
- case '%':
506
- return (const char *)"\\%";
507
- case '#':
508
- return (const char *)"\\#";
509
- case '_':
510
- return (const char *)"\\_";
511
- case '{':
512
- return (const char *)"\\{";
513
- case '}':
514
- return (const char *)"\\}";
515
- case '\\':
516
- return (const char *)"$\\backslash$";
517
- case '~':
518
- return (const char *)"\\~{}";
519
- case '^':
520
- return (const char *)"\\^{}";
521
- default:
522
- buf[0] = (char)c;
523
- return (const char *)buf;
524
- }
525
- }
526
- switch (c) {
527
- /* ISO8859_1 */
528
- case NO_BREAK_SPACE:
529
- return (const char *)"~";
530
- case INVERTED_EXCLAMATION_MARK:
531
- return (const char *)"!'";
532
- case CENT_SIGN:
533
- return (const char *)"\\textcent"; /* \usepackage{textcomp} */
534
- case POUND_SIGN:
535
- return (const char *)"\\pounds";
536
- case EURO_CURRENCY_SIGN:
537
- return (const char *)"\\euro"; /* \usepackage{eurosans} */
538
- case CURRENCY_SIGN:
539
- return (const char *)"\\textcurrency"; /* \usepackage{textcomp} */
540
- case YEN_SIGN:
541
- return (const char *)"\\textyen"; /* \usepackage{textcomp} */
542
- case BROKEN_BAR:
543
- return (const char *)"\\textbrokenbar"; /* \usepackage{textcomp} */
544
- case SECTION_SIGN:
545
- return (const char *)"\\S";
546
- case DIAERESIS:
547
- return (const char *)"\"";
548
- case COPYRIGHT_SIGN:
549
- return (const char *)"\\copyright";
550
- case FEMININE_ORDINAL_INDICATOR:
551
- return (const char *)"$^{\\underbar{a}}$";
552
- case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
553
- return (const char *)"\\flqq{}";
554
- case NOT_SIGN:
555
- return (const char *)"$\\lnot$";
556
- case SOFT_HYPHEN:
557
- return (const char *)"\\-";
558
- case REGISTERED_SIGN:
559
- return (const char *)"\\textregistered";/* \usepackage{textcomp} */
560
- case MACRON:
561
- return (const char *)"\\textasciimacron";/* \usepackage{textcomp} */
562
- case DEGREE_SIGN:
563
- return (const char *)"$^{o}$";
564
- case PLUS_MINUS_SIGN:
565
- return (const char *)"$\\pm$";
566
- case SUPERSCRIPT_TWO:
567
- return (const char *)"$^{2}$";
568
- case SUPERSCRIPT_THREE:
569
- return (const char *)"$^{3}$";
570
- case ACUTE_ACCENT:
571
- return (const char *)"\\( \\prime \\)";
572
- case MICRO_SIGN:
573
- return (const char *)"$\\mu$";
574
- case PILCROW_SIGN:
575
- return (const char *)"\\P";
576
- case MIDDLE_DOT:
577
- return (const char *)"$\\cdot$";
578
- case CEDILLA:
579
- return (const char *)"\\,";
580
- case SUPERSCRIPT_ONE:
581
- return (const char *)"$^{1}$";
582
- case MASCULINE_ORDINAL_INDICATOR:
583
- return (const char *)"$^{\\underbar{o}}$";
584
- case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
585
- return (const char *)"\\frqq{}";
586
- case VULGAR_FRACTION_ONE_QUARTER: /* these fractions are not good*/
587
- return (const char *)"\\( 1\\over 4 \\)";
588
- case VULGAR_FRACTION_ONE_HALF:
589
- return (const char *)"\\( 1\\over 2 \\)";
590
- case VULGAR_FRACTION_THREE_QUARTERS:
591
- return (const char *)"\\( 3\\over 4 \\)";
592
- case INVERTED_QUESTION_MARK:
593
- return (const char *)"?'";
594
- case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
595
- return (const char *)"\\`A";
596
- case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
597
- return (const char *)"\\'A";
598
- case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
599
- return (const char *)"\\^A";
600
- case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
601
- return (const char *)"\\~A";
602
- case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
603
- return (const char *)"\\\"A";
604
- case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
605
- return (const char *)"\\AA";
606
- case LATIN_CAPITAL_LETTER_AE:
607
- return (const char *)"\\AE";
608
- case LATIN_CAPITAL_LETTER_C_WITH_CARON:
609
- return (const char *)"\\v{C}";
610
- case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
611
- return (const char *)"\\C";
612
- case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
613
- return (const char *)"\\`E";
614
- case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
615
- return (const char *)"\\'E";
616
- case LATIN_CAPITAL_LETTER_E_WITH_CARON:
617
- return (const char *)"\\v{E}";
618
- case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
619
- return (const char *)"\\^E";
620
- case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
621
- return (const char *)"\\\"E";
622
- case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
623
- return (const char *)"\\`I";
624
- case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
625
- return (const char *)"\\'I";
626
- case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
627
- return (const char *)"\\^I";
628
- case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
629
- return (const char *)"\\\"I";
630
- case LATIN_CAPITAL_LETTER_ETH:
631
- return (const char *)UNDEFINED;
632
- case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
633
- return (const char *)"\\~N";
634
- case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
635
- return (const char *)"\\`O";
636
- case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
637
- return (const char *)"\\'O";
638
- case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
639
- return (const char *)"\\^O";
640
- case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
641
- return (const char *)"\\~O";
642
- case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
643
- return (const char *)"\\\"O";
644
- case MULTIPLICATION_SIGN:
645
- return (const char *)"$\\times$";
646
- case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
647
- return (const char *)"\\O";
648
- case LATIN_CAPITAL_LETTER_S_WITH_CARON:
649
- return (const char *)"\\v{S}";
650
- case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
651
- return (const char *)"\\`U";
652
- case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
653
- return (const char *)"\\'U";
654
- case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
655
- return (const char *)"\\^U";
656
- case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
657
- return (const char *)"\\\"U";
658
- case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
659
- return (const char *)"\\'Y";
660
- case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
661
- return (const char *)"\\v{Z}";
662
- case LATIN_CAPITAL_LETTER_THORN:
663
- return (const char *)UNDEFINED;
664
- case LATIN_SMALL_LETTER_SHARP_S:
665
- return (const char *)"\\ss";
666
- case LATIN_SMALL_LETTER_A_WITH_GRAVE:
667
- return (const char *)"\\`a";
668
- case LATIN_SMALL_LETTER_A_WITH_ACUTE:
669
- return (const char *)"\\'a";
670
- case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
671
- return (const char *)"\\^a";
672
- case LATIN_SMALL_LETTER_A_WITH_TILDE:
673
- return (const char *)"\\~a";
674
- case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
675
- return (const char *)"\\\"a";
676
- case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
677
- return (const char *)"\\aa";
678
- case LATIN_SMALL_LETTER_AE:
679
- return (const char *)"\\ae";
680
- case LATIN_SMALL_LETTER_C_WITH_CARON:
681
- return (const char *)"\\v{c}";
682
- case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
683
- return (const char *)"\\c";
684
- case LATIN_SMALL_LETTER_E_WITH_GRAVE:
685
- return (const char *)"\\`e";
686
- case LATIN_SMALL_LETTER_E_WITH_ACUTE:
687
- return (const char *)"\\'e";
688
- case LATIN_SMALL_LETTER_E_WITH_CARON:
689
- return (const char *)"\\v{e}";
690
- case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
691
- return (const char *)"\\^e";
692
- case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
693
- return (const char *)"\\\"e";
694
- case LATIN_SMALL_LETTER_I_WITH_GRAVE:
695
- return (const char *)"\\`i";
696
- case LATIN_SMALL_LETTER_I_WITH_ACUTE:
697
- return (const char *)"\\'i";
698
- case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
699
- return (const char *)"\\^i";
700
- case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
701
- return (const char *)"\\\"i";
702
- case LATIN_SMALL_LETTER_ETH:
703
- return (const char *)UNDEFINED;
704
- case LATIN_SMALL_LETTER_N_WITH_TILDE:
705
- return (const char *)"\\~n";
706
- case LATIN_SMALL_LETTER_O_WITH_GRAVE:
707
- return (const char *)"\\`o";
708
- case LATIN_SMALL_LETTER_O_WITH_ACUTE:
709
- return (const char *)"\\'o";
710
- case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
711
- return (const char *)"\\^o";
712
- case LATIN_SMALL_LETTER_O_WITH_TILDE:
713
- return (const char *)"\\~o";
714
- case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
715
- return (const char *)"\\\"o";
716
- case DIVISION_SIGN:
717
- return (const char *)"$\\div$";
718
- case LATIN_SMALL_LETTER_O_WITH_STROKE:
719
- return (const char *)"\\o";
720
- case LATIN_SMALL_LETTER_S_WITH_CARON:
721
- return (const char *)"\\v{s}";
722
- case LATIN_SMALL_LETTER_U_WITH_GRAVE:
723
- return (const char *)"\\`u";
724
- case LATIN_SMALL_LETTER_U_WITH_ACUTE:
725
- return (const char *)"\\'u";
726
- case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
727
- return (const char *)"\\^u";
728
- case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
729
- return (const char *)"\\\"u";
730
- case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
731
- return (const char *)"\\'y";
732
- case LATIN_SMALL_LETTER_THORN:
733
- return (const char *)UNDEFINED;
734
- case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
735
- return (const char *)"\\\"y";
736
- case LATIN_SMALL_LETTER_Z_WITH_CARON:
737
- return (const char *)"\\v{z}";
738
-
739
- /* greek */
740
- /* some (punctuation, accents, accented capital) greek letters missing*/
741
- case GREEK_CAPITAL_LETTER_ALPHA:
742
- return (const char *)"A";
743
- case GREEK_CAPITAL_LETTER_BETA:
744
- return (const char *)"B";
745
- case GREEK_CAPITAL_LETTER_GAMMA:
746
- return (const char *)"\\( \\Gamma \\)";
747
- case GREEK_CAPITAL_LETTER_DELTA:
748
- return (const char *)"\\( \\Delta \\)";
749
- case GREEK_CAPITAL_LETTER_EPSILON:
750
- return (const char *)"E";
751
- case GREEK_CAPITAL_LETTER_ZETA:
752
- return (const char *)"Z";
753
- case GREEK_CAPITAL_LETTER_ETA:
754
- return (const char *)"H";
755
- case GREEK_CAPITAL_LETTER_THETA:
756
- return (const char *)"\\( \\Theta \\)";
757
- case GREEK_CAPITAL_LETTER_IOTA:
758
- return (const char *)"I";
759
- case GREEK_CAPITAL_LETTER_KAPPA:
760
- return (const char *)"K";
761
- case GREEK_CAPITAL_LETTER_LAMDA:
762
- return (const char *)"\\( \\Lambda \\)";
763
- case GREEK_CAPITAL_LETTER_MU:
764
- return (const char *)"M";
765
- case GREEK_CAPITAL_LETTER_NU:
766
- return (const char *)"N";
767
- case GREEK_CAPITAL_LETTER_XI:
768
- return (const char *)"\\( \\Xi \\)";
769
- case GREEK_CAPITAL_LETTER_OMICRON:
770
- return (const char *)"O";
771
- case GREEK_CAPITAL_LETTER_PI:
772
- return (const char *)"\\( \\Pi \\)";
773
- case GREEK_CAPITAL_LETTER_RHO:
774
- return (const char *)"P";
775
- case GREEK_CAPITAL_LETTER_SIGMA:
776
- return (const char *)"\\( \\Sigma \\)";
777
- case GREEK_CAPITAL_LETTER_TAU:
778
- return (const char *)"T";
779
- case GREEK_CAPITAL_LETTER_UPSILON:
780
- return (const char *)"\\( \\Upsilon \\)";
781
- case GREEK_CAPITAL_LETTER_PHI:
782
- return (const char *)"\\( \\Phi \\)";
783
- case GREEK_CAPITAL_LETTER_CHI:
784
- return (const char *)"\\( \\Chi \\)";
785
- case GREEK_CAPITAL_LETTER_PSI:
786
- return (const char *)"\\( \\Psi \\)";
787
- case GREEK_CAPITAL_LETTER_OMEGA:
788
- return (const char *)"\\( \\Omega \\)";
789
- case GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA:
790
- return (const char *)UNDEFINED;
791
- case GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA:
792
- return (const char *)UNDEFINED;
793
- case GREEK_SMALL_LETTER_ALPHA_WITH_TONOS:
794
- return (const char *)UNDEFINED;
795
- case GREEK_SMALL_LETTER_EPSILON_WITH_TONOS:
796
- return (const char *)UNDEFINED;
797
- case GREEK_SMALL_LETTER_ETA_WITH_TONOS:
798
- return (const char *)UNDEFINED;
799
- case GREEK_SMALL_LETTER_IOTA_WITH_TONOS:
800
- return (const char *)UNDEFINED;
801
- case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS:
802
- return (const char *)UNDEFINED;
803
- case GREEK_SMALL_LETTER_ALPHA:
804
- return (const char *)"\\( \\alpha \\)";
805
- case GREEK_SMALL_LETTER_BETA:
806
- return (const char *)"\\( \\beta \\)";
807
- case GREEK_SMALL_LETTER_GAMMA:
808
- return (const char *)"\\( \\gamma \\)";
809
- case GREEK_SMALL_LETTER_DELTA:
810
- return (const char *)"\\( \\delta \\)";
811
- case GREEK_SMALL_LETTER_EPSILON:
812
- return (const char *)"\\( \\epsilon \\)";
813
- case GREEK_SMALL_LETTER_ZETA:
814
- return (const char *)"\\( \\zeta \\)";
815
- case GREEK_SMALL_LETTER_ETA:
816
- return (const char *)"\\( \\eta \\)";
817
- case GREEK_SMALL_LETTER_THETA:
818
- return (const char *)"\\( \\theta \\)";
819
- case GREEK_SMALL_LETTER_IOTA:
820
- return (const char *)"\\( \\iota \\)";
821
- case GREEK_SMALL_LETTER_KAPPA:
822
- return (const char *)"\\( \\kappa \\)";
823
- case GREEK_SMALL_LETTER_LAMDA:
824
- return (const char *)"\\( \\lambda \\)";
825
- case GREEK_SMALL_LETTER_MU:
826
- return (const char *)"\\( \\mu \\)";
827
- case GREEK_SMALL_LETTER_NU:
828
- return (const char *)"\\( \\nu \\)";
829
- case GREEK_SMALL_LETTER_XI:
830
- return (const char *)"\\( \\xi \\)";
831
- case GREEK_SMALL_LETTER_OMICRON:
832
- return (const char *)"\\( \\omicron \\)";
833
- case GREEK_SMALL_LETTER_PI:
834
- return (const char *)"\\( \\pi \\)";
835
- case GREEK_SMALL_LETTER_RHO:
836
- return (const char *)"\\( \\rho \\)";
837
- case GREEK_SMALL_LETTER_FINAL_SIGMA:
838
- return (const char *)"\\( \\varsigma \\)";
839
- case GREEK_SMALL_LETTER_SIGMA:
840
- return (const char *)"\\( \\sigma \\)";
841
- case GREEK_SMALL_LETTER_TAU:
842
- return (const char *)"\\( \\tau \\)";
843
- case GREEK_SMALL_LETTER_UPSILON:
844
- return (const char *)"\\( \\upsilon \\)";
845
- case GREEK_SMALL_LETTER_PHI:
846
- return (const char *)"\\( \\varphi \\)";
847
- case GREEK_SMALL_LETTER_CHI:
848
- return (const char *)"\\( \\chi \\)";
849
- case GREEK_SMALL_LETTER_PSI:
850
- return (const char *)"\\( \\psi \\)";
851
- case GREEK_SMALL_LETTER_OMEGA:
852
- return (const char *)"\\( \\omega \\)";
853
- case GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA:
854
- return (const char *)UNDEFINED;
855
- case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA:
856
- return (const char *)UNDEFINED;
857
- case GREEK_SMALL_LETTER_OMICRON_WITH_TONOS:
858
- return (const char *)UNDEFINED;
859
- case GREEK_SMALL_LETTER_UPSILON_WITH_TONOS:
860
- return (const char *)UNDEFINED;
861
- case GREEK_SMALL_LETTER_OMEGA_WITH_TONOS:
862
- return (const char *)UNDEFINED;
863
- case GREEK_BETA_SYMBOL:
864
- return (const char *)UNDEFINED;
865
- case GREEK_THETA_SYMBOL:
866
- return (const char *)"\\( \\vartheta \\)";
867
- case GREEK_UPSILON_WITH_HOOK_SYMBOL:
868
- return (const char *)UNDEFINED;
869
- case GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL:
870
- return (const char *)UNDEFINED;
871
- case GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL:
872
- return (const char *)UNDEFINED;
873
- case GREEK_PHI_SYMBOL:
874
- return (const char *)"\\( \\phi \\)";
875
- case GREEK_PI_SYMBOL:
876
- return (const char *)"\\( \\varpi \\)";
877
- /* and some greek letters missing*/
878
-
879
- /* punctuation (partial) */
880
- case HYPHEN:
881
- return (const char *)"-";
882
- case NON_BREAKING_HYPHEN:
883
- return (const char *)UNDEFINED;
884
- case FIGURE_DASH:
885
- case EN_DASH:
886
- return (const char *)"--";
887
- case EM_DASH:
888
- return (const char *)"---";
889
- case HORIZONTAL_BAR:
890
- return (const char *)UNDEFINED;
891
- case LEFT_SINGLE_QUOTATION_MARK:
892
- return (const char *)"`";
893
- case RIGHT_SINGLE_QUOTATION_MARK:
894
- return (const char *)"'";
895
- case SINGLE_LOW_9_QUOTATION_MARK:
896
- return (const char *)"\\glq{}";
897
- case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
898
- return (const char *)UNDEFINED;
899
- case LEFT_DOUBLE_QUOTATION_MARK:
900
- return (const char *)"``";
901
- case RIGHT_DOUBLE_QUOTATION_MARK:
902
- return (const char *)"''";
903
- case DOUBLE_LOW_9_QUOTATION_MARK:
904
- return (const char *)"\\glqq{}";
905
- case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
906
- return (const char *)UNDEFINED;
907
- case DAGGER:
908
- return (const char *)"\\dag";
909
- case DOUBLE_DAGGER:
910
- return (const char *)"\\ddag";
911
- case BULLET:
912
- return (const char *)"$\\bullet$";
913
- case TRIANGULAR_BULLET:
914
- return (const char *)"$\\blacktriangleright";
915
- case HYPHENATION_POINT:
916
- return (const char *)"\\-";
917
- case HORIZONTAL_ELLIPSIS:
918
- return (const char *)"\\ldots";
919
- case PER_MILLE_SIGN:
920
- return (const char *)UNDEFINED;
921
- case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
922
- return (const char *)"\\flq{}";
923
- case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
924
- return (const char *)"\\frq{}";
925
- /* ligatures */
926
- case LATIN_SMALL_LIGATURE_FF:
927
- return (const char *)"ff";
928
- case LATIN_SMALL_LIGATURE_FI:
929
- return (const char *)"fi";
930
- case LATIN_SMALL_LIGATURE_FL:
931
- return (const char *)"fl";
932
- case LATIN_SMALL_LIGATURE_FFI:
933
- return (const char *)"ffi";
934
- case LATIN_SMALL_LIGATURE_FFL:
935
- return (const char *)"ffl";
936
- case LATIN_SMALL_LIGATURE_LONG_S_T:
937
- case LATIN_SMALL_LIGATURE_ST:
938
- return (const char *)"st";
939
- /* reserved */
940
- case 0:
941
- return (const char *)"";
942
- case UNKNOWN:
943
- return (const char *)"\\_";
944
- case PICTURE:
945
- return (const char *)"(PICTURE)";
946
- default:
947
- /* snprintf seems to be no standard, so I use insecure sprintf */
948
- sprintf(buf,"\\symbol{%u}",(unsigned)c);
949
- return buf; /* UNDEFINED; */
950
- }
951
- case HTML:
952
- if ( c >= SPACE && c <= TILDE ) { /* ASCII */
953
- switch (c) {
954
- case '&':
955
- return (const char *)"&amp;";
956
- /* semicolon must not be coded */
957
- case '\'':
958
- return (const char *)"&apos;";
959
- case '"':
960
- return (const char *)"&quot;";
961
- case '<':
962
- return (const char *)"&lt;";
963
- case '>':
964
- return (const char *)"&gt;";
965
- }
966
- buf[0] = (char)c;
967
- return buf;
968
- }
969
- switch (c) {
970
- case PICTURE:
971
- return (const char *)"<!--PICTURE-->";
972
- case UNKNOWN:
973
- return (const char *)"_"; /* better use colored symbol? */
974
- case LINE_FEED:
975
- return (const char *)"<br />"; /* \n handled somwhere else? */
976
- case FORM_FEED:
977
- case CARRIAGE_RETURN:
978
- return (const char *)"<br />";
979
- case NO_BREAK_SPACE:
980
- return (const char *)"<nobr />";
981
- case INVERTED_EXCLAMATION_MARK:
982
- return (const char *)"&iexcl;";
983
- case CENT_SIGN:
984
- return (const char *)"&cent;";
985
- case POUND_SIGN:
986
- return (const char *)"&pound;";
987
- case CURRENCY_SIGN:
988
- return (const char *)"&curren;";
989
- case YEN_SIGN:
990
- return (const char *)"&yen;";
991
- case BROKEN_BAR:
992
- return (const char *)"&brvbar;";
993
- case SECTION_SIGN:
994
- return (const char *)"&sect;";
995
- case DIAERESIS:
996
- return (const char *)"&uml;";
997
- case COPYRIGHT_SIGN:
998
- return (const char *)"&copy;";
999
- case FEMININE_ORDINAL_INDICATOR:
1000
- return (const char *)"&ordfem;";
1001
- case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1002
- return (const char *)"&laquo;";
1003
- case NOT_SIGN:
1004
- return (const char *)"&not;";
1005
- case SOFT_HYPHEN:
1006
- return (const char *)"&shy;";
1007
- case REGISTERED_SIGN:
1008
- return (const char *)"&reg;";
1009
- case MACRON:
1010
- return (const char *)"&macr;";
1011
- case DEGREE_SIGN:
1012
- return (const char *)"&deg;";
1013
- case PLUS_MINUS_SIGN:
1014
- return (const char *)"&plusmn;";
1015
- case SUPERSCRIPT_TWO:
1016
- return (const char *)"&sup2;";
1017
- case SUPERSCRIPT_THREE:
1018
- return (const char *)"&sup3;";
1019
- case ACUTE_ACCENT:
1020
- return (const char *)"&acute;";
1021
- case MICRO_SIGN:
1022
- return (const char *)"&micro;";
1023
- case PILCROW_SIGN:
1024
- return (const char *)"&para;";
1025
- case MIDDLE_DOT:
1026
- return (const char *)"&middot;";
1027
- case CEDILLA:
1028
- return (const char *)"&cedil;";
1029
- case SUPERSCRIPT_ONE:
1030
- return (const char *)"&sup1;";
1031
- case MASCULINE_ORDINAL_INDICATOR:
1032
- return (const char *)"&ordm;";
1033
- case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1034
- return (const char *)"&raquo;";
1035
- case VULGAR_FRACTION_ONE_QUARTER:
1036
- return (const char *)"&frac14;";
1037
- case VULGAR_FRACTION_ONE_HALF:
1038
- return (const char *)"&frac12;";
1039
- case VULGAR_FRACTION_THREE_QUARTERS:
1040
- return (const char *)"&frac34;";
1041
- case INVERTED_QUESTION_MARK:
1042
- return (const char *)"&iquest;";
1043
- case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
1044
- return (const char *)"&Agrave;";
1045
- case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
1046
- return (const char *)"&Aacute;";
1047
- case LATIN_CAPITAL_LETTER_A_WITH_BREVE:
1048
- return (const char *)"&Abreve;";
1049
- case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
1050
- return (const char *)"&Acirc;";
1051
- case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
1052
- return (const char *)"&Atilde;";
1053
- case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
1054
- return (const char *)"&Auml;";
1055
- case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
1056
- return (const char *)"&Aring;";
1057
- case LATIN_CAPITAL_LETTER_AE:
1058
- return (const char *)"&AElig;";
1059
- case LATIN_CAPITAL_LETTER_C_WITH_CARON:
1060
- return (const char *)"&Ccaron;";
1061
- case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
1062
- return (const char *)"&Ccedil;";
1063
- case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
1064
- return (const char *)"&Egrave;";
1065
- case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
1066
- return (const char *)"&Eacute;";
1067
- case LATIN_CAPITAL_LETTER_E_WITH_CARON:
1068
- return (const char *)"&Ecaron;";
1069
- case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
1070
- return (const char *)"&Ecirc;";
1071
- case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
1072
- return (const char *)"&Euml;";
1073
- case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
1074
- return (const char *)"&Igrave;";
1075
- case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
1076
- return (const char *)"&Iacute;";
1077
- case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
1078
- return (const char *)"&Icirc;";
1079
- case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
1080
- return (const char *)"&Iuml;";
1081
- case LATIN_CAPITAL_LETTER_ETH:
1082
- return (const char *)"&ETH;";
1083
- case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
1084
- return (const char *)"&Ntilde;";
1085
- case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
1086
- return (const char *)"&Ograve;";
1087
- case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
1088
- return (const char *)"&Oacute;";
1089
- case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
1090
- return (const char *)"&Ocirc;";
1091
- case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
1092
- return (const char *)"&Otilde;";
1093
- case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
1094
- return (const char *)"&Ouml;";
1095
- case MULTIPLICATION_SIGN:
1096
- return (const char *)"&times";
1097
- case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
1098
- return (const char *)"&Oslash;";
1099
- case LATIN_CAPITAL_LETTER_S_WITH_CARON:
1100
- return (const char *)"&Scaron;";
1101
- case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
1102
- return (const char *)"&Ugrave;";
1103
- case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
1104
- return (const char *)"&Uacute;";
1105
- case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
1106
- return (const char *)"&Ucirc;";
1107
- case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
1108
- return (const char *)"&Uuml;";
1109
- case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
1110
- return (const char *)"&Yacute;";
1111
- case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
1112
- return (const char *)"&Zcaron;";
1113
- case LATIN_CAPITAL_LETTER_THORN:
1114
- return (const char *)"&THORN;";
1115
- case LATIN_SMALL_LETTER_SHARP_S:
1116
- return (const char *)"&szlig;";
1117
- case LATIN_SMALL_LETTER_A_WITH_GRAVE:
1118
- return (const char *)"&agrave;";
1119
- case LATIN_SMALL_LETTER_A_WITH_ACUTE:
1120
- return (const char *)"&aacute;";
1121
- case LATIN_SMALL_LETTER_A_WITH_BREVE:
1122
- return (const char *)"&abreve;";
1123
- case LATIN_SMALL_LETTER_A_WITH_CARON:
1124
- return (const char *)"&acaron;";
1125
- case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
1126
- return (const char *)"&acirc;";
1127
- case LATIN_SMALL_LETTER_A_WITH_TILDE:
1128
- return (const char *)"&atilde;";
1129
- case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
1130
- return (const char *)"&auml;";
1131
- case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
1132
- return (const char *)"&aring;";
1133
- case LATIN_SMALL_LETTER_AE:
1134
- return (const char *)"&aelig;";
1135
- case LATIN_SMALL_LETTER_C_WITH_CARON:
1136
- return (const char *)"&ccaron;";
1137
- case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
1138
- return (const char *)"&ccedil;";
1139
- case LATIN_SMALL_LETTER_E_WITH_GRAVE:
1140
- return (const char *)"&egrave;";
1141
- case LATIN_SMALL_LETTER_E_WITH_ACUTE:
1142
- return (const char *)"&eacute;";
1143
- case LATIN_SMALL_LETTER_E_WITH_CARON:
1144
- return (const char *)"&ecaron;";
1145
- case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
1146
- return (const char *)"&ecirc;";
1147
- case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
1148
- return (const char *)"&euml;";
1149
- case LATIN_SMALL_LETTER_I_WITH_GRAVE:
1150
- return (const char *)"&igrave;";
1151
- case LATIN_SMALL_LETTER_I_WITH_ACUTE:
1152
- return (const char *)"&iacute;";
1153
- case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
1154
- return (const char *)"&icirc;";
1155
- case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
1156
- return (const char *)"&iuml;";
1157
- case LATIN_SMALL_LETTER_ETH:
1158
- return (const char *)"&eth;";
1159
- case LATIN_SMALL_LETTER_N_WITH_TILDE:
1160
- return (const char *)"&ntilde;";
1161
- case LATIN_SMALL_LETTER_O_WITH_GRAVE:
1162
- return (const char *)"&ograve;";
1163
- case LATIN_SMALL_LETTER_O_WITH_ACUTE:
1164
- return (const char *)"&oacute;";
1165
- case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
1166
- return (const char *)"&ocirc;";
1167
- case LATIN_SMALL_LETTER_O_WITH_TILDE:
1168
- return (const char *)"&otilde;";
1169
- case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
1170
- return (const char *)"&ouml;";
1171
- case DIVISION_SIGN:
1172
- return (const char *)"&divide;";
1173
- case LATIN_SMALL_LETTER_O_WITH_STROKE:
1174
- return (const char *)"&oslash;";
1175
- case LATIN_SMALL_LETTER_S_WITH_CARON:
1176
- return (const char *)"&scaron;";
1177
- case LATIN_SMALL_LETTER_U_WITH_GRAVE:
1178
- return (const char *)"&ugrave;";
1179
- case LATIN_SMALL_LETTER_U_WITH_ACUTE:
1180
- return (const char *)"&uacute;";
1181
- case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
1182
- return (const char *)"&ucirc;";
1183
- case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
1184
- return (const char *)"&uuml;";
1185
- case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
1186
- return (const char *)"&yacute;";
1187
- case LATIN_SMALL_LETTER_THORN:
1188
- return (const char *)"&thorn;";
1189
- case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
1190
- return (const char *)"&yuml;";
1191
- case LATIN_SMALL_LETTER_Z_WITH_CARON:
1192
- return (const char *)"&zcaron;";
1193
- case EURO_CURRENCY_SIGN:
1194
- return (const char *)"&euro;";
1195
- case 0:
1196
- return (const char *)"";
1197
- default:
1198
- sprintf(buf,"&#%u;",(unsigned)c);
1199
- return buf; /* undefined */
1200
- }
1201
- /* break; unreachable code */
1202
- case XML: /* only 5 &xxx;-ENTITIES ar defined by default */
1203
- if ( c >= SPACE && c <= TILDE ) { /* ASCII */
1204
- switch (c) {
1205
- case '&':
1206
- return (const char *)"&amp;";
1207
- case '\'':
1208
- return (const char *)"&apos;";
1209
- case '"':
1210
- return (const char *)"&quot;";
1211
- case '<':
1212
- return (const char *)"&lt;";
1213
- case '>':
1214
- return (const char *)"&gt;";
1215
- }
1216
- buf[0] = (char)c;
1217
- return buf;
1218
- }
1219
- switch (c) { /* subject of change! */
1220
- case PICTURE:
1221
- return (const char *)"(PICTURE)";
1222
- case UNKNOWN:
1223
- return (const char *)"_"; /* better use colored symbol? */
1224
- case LINE_FEED: /* \n handled somwhere else? */
1225
- case FORM_FEED:
1226
- case CARRIAGE_RETURN:
1227
- return (const char *)"<br />";
1228
- case NO_BREAK_SPACE:
1229
- return (const char *)"<nobr />";
1230
- case 0:
1231
- return (const char *)"";
1232
- default:
1233
- sprintf(buf,"&#x%03x;",(unsigned)c);
1234
- return buf; /* undefined */
1235
- }
1236
- /* break; unreachable code */
1237
- case SGML:
1238
- switch (c) {
1239
- default:
1240
- sprintf(buf,"&#%u;",(unsigned)c);
1241
- return buf; /* UNDEFINED */
1242
- }
1243
- /* break; unreachable code */
1244
- case ASCII: /* mainly used for debugging */
1245
- if ( c=='\n' || (c>= 0x20 && c <= 0x7F) ) {
1246
- buf[0] = (char)c;
1247
- return buf;
1248
- }
1249
- switch (c) {
1250
- /* extra */
1251
- case UNKNOWN:
1252
- return (const char *)"(?)";
1253
- case PICTURE:
1254
- return (const char *)"(?)";
1255
-
1256
- default:
1257
- /* snprintf seems to be no standard, so I use insecure sprintf */
1258
- if ((unsigned)c>255) sprintf(buf,"(0x%04x)",(unsigned)c);
1259
- else sprintf(buf,"(0x%02x)",(unsigned)c);
1260
- return buf; /* UNDEFINED; */
1261
- }
1262
- /* break; unreachable code */
1263
- default: /* use UTF8 as default, test with xterm -u8 */
1264
- /* extra */
1265
- if ( c == UNKNOWN ) return (const char *)"_";
1266
- if ( c == PICTURE ) return (const char *)"_"; /* Due to Mobile OCR */
1267
- if ( c <= (wchar_t)0x0000007F ) { /* UTF8 == 7bit ASCII */
1268
- buf[0] = (char)c;
1269
- return buf;
1270
- }
1271
- if ( c <= (wchar_t)0x000007FF ) { /* UTF8 == 11bit */
1272
- buf[0] = (char)(0xc0|((c>> 6) & 0x1f)); /* 110xxxxx */
1273
- buf[1] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1274
- buf[2] = (char)0; /* terminate string */
1275
- return buf;
1276
- }
1277
- /* wchar_t is 16bit for Borland-C !? Jan07 */
1278
- if ( c <= (wchar_t)0x0000FFFF ) { /* UTF8 == 16bit */
1279
- buf[0] = (char)(0xe0|((c>>12) & 0x0f)); /* 1110xxxx */
1280
- buf[1] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1281
- buf[2] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1282
- buf[3] = (char)0; /* terminate string */
1283
- return buf;
1284
- }
1285
- if ( c <= (wchar_t)0x001FFFFF ) { /* UTF8 == 21bit */
1286
- buf[0] = (char)(0xf0|((c>>18) & 0x07)); /* 11110xxx */
1287
- buf[1] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1288
- buf[2] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1289
- buf[3] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1290
- buf[4] = (char)0; /* terminate string */
1291
- return buf;
1292
- }
1293
- if ( c <= (wchar_t)0x03FFFFFF ) { /* UTF8 == 26bit */
1294
- buf[0] = (char)(0xf8|((c>>24) & 0x03)); /* 111110xx */
1295
- buf[1] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1296
- buf[2] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1297
- buf[3] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1298
- buf[4] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1299
- buf[5] = (char)0; /* terminate string */
1300
- return buf;
1301
- }
1302
- if ( c <= (wchar_t)0x7FFFFFFF ) { /* UTF8 == 31bit */
1303
- buf[0] = (char)(0xfc|((c>>30) & 0x01)); /* 1111110x */
1304
- buf[1] = (char)(0x80|((c>>24) & 0x3f)); /* 10xxxxxx */
1305
- buf[2] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1306
- buf[3] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1307
- buf[4] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1308
- buf[5] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1309
- buf[6] = (char)0; /* terminate string */
1310
- return buf;
1311
- }
1312
- return (const char *)UNDEFINED;
1313
- }
1314
- }