entangledstate-isbn 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (290) hide show
  1. data/README +1 -1
  2. data/Rakefile +0 -18
  3. data/VERSION +1 -0
  4. data/isbn.gemspec +290 -7
  5. data/lib/isbn.rb +6 -6
  6. data/src/gocr-0.48/.cvsignore +6 -0
  7. data/src/gocr-0.48/AUTHORS +7 -0
  8. data/src/gocr-0.48/BUGS +55 -0
  9. data/src/gocr-0.48/CREDITS +17 -0
  10. data/src/gocr-0.48/HISTORY +243 -0
  11. data/src/gocr-0.48/INSTALL +83 -0
  12. data/src/gocr-0.48/Makefile +193 -0
  13. data/src/gocr-0.48/Makefile.in +193 -0
  14. data/src/gocr-0.48/README +165 -0
  15. data/src/gocr-0.48/READMEde.txt +80 -0
  16. data/src/gocr-0.48/REMARK.txt +18 -0
  17. data/src/gocr-0.48/REVIEW +538 -0
  18. data/src/gocr-0.48/TODO +65 -0
  19. data/src/gocr-0.48/bin/.cvsignore +2 -0
  20. data/src/gocr-0.48/bin/create_db +38 -0
  21. data/src/gocr-0.48/bin/gocr.tcl +527 -0
  22. data/src/gocr-0.48/bin/gocr_chk.sh +44 -0
  23. data/src/gocr-0.48/configure +4689 -0
  24. data/src/gocr-0.48/configure.in +71 -0
  25. data/src/gocr-0.48/doc/.#Makefile.1.6 +39 -0
  26. data/src/gocr-0.48/doc/.cvsignore +2 -0
  27. data/src/gocr-0.48/doc/Makefile +39 -0
  28. data/src/gocr-0.48/doc/Makefile.in +39 -0
  29. data/src/gocr-0.48/doc/example.dtd +53 -0
  30. data/src/gocr-0.48/doc/example.xml +21 -0
  31. data/src/gocr-0.48/doc/examples.txt +67 -0
  32. data/src/gocr-0.48/doc/gocr.html +578 -0
  33. data/src/gocr-0.48/doc/unicode.txt +57 -0
  34. data/src/gocr-0.48/examples/.#Makefile.1.22 +166 -0
  35. data/src/gocr-0.48/examples/4x6.png +0 -0
  36. data/src/gocr-0.48/examples/4x6.txt +2 -0
  37. data/src/gocr-0.48/examples/5x7.png +0 -0
  38. data/src/gocr-0.48/examples/5x7.png.txt +2 -0
  39. data/src/gocr-0.48/examples/5x8.png +0 -0
  40. data/src/gocr-0.48/examples/5x8.png.txt +2 -0
  41. data/src/gocr-0.48/examples/Makefile +166 -0
  42. data/src/gocr-0.48/examples/color.fig +20 -0
  43. data/src/gocr-0.48/examples/ex.fig +16 -0
  44. data/src/gocr-0.48/examples/font.tex +22 -0
  45. data/src/gocr-0.48/examples/font1.tex +46 -0
  46. data/src/gocr-0.48/examples/font2.fig +27 -0
  47. data/src/gocr-0.48/examples/font_nw.tex +24 -0
  48. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  49. data/src/gocr-0.48/examples/handwrt1.txt +10 -0
  50. data/src/gocr-0.48/examples/inverse.fig +20 -0
  51. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  52. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  53. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +4 -0
  54. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a.txt +6 -0
  56. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-b.png.txt +4 -0
  58. data/src/gocr-0.48/examples/polish.tex +28 -0
  59. data/src/gocr-0.48/examples/rotate45.fig +14 -0
  60. data/src/gocr-0.48/examples/score +36 -0
  61. data/src/gocr-0.48/examples/text.tex +28 -0
  62. data/src/gocr-0.48/gocr.spec +143 -0
  63. data/src/gocr-0.48/gpl.html +537 -0
  64. data/src/gocr-0.48/include/.cvsignore +2 -0
  65. data/src/gocr-0.48/include/config.h +36 -0
  66. data/src/gocr-0.48/include/config.h.in +36 -0
  67. data/src/gocr-0.48/include/version.h +2 -0
  68. data/src/gocr-0.48/install-sh +3 -0
  69. data/src/gocr-0.48/make.bat +57 -0
  70. data/src/gocr-0.48/man/.cvsignore +2 -0
  71. data/src/gocr-0.48/man/Makefile +29 -0
  72. data/src/gocr-0.48/man/Makefile.in +29 -0
  73. data/src/gocr-0.48/man/man1/gocr.1 +166 -0
  74. data/src/gocr-0.48/src/.cvsignore +4 -0
  75. data/src/gocr-0.48/src/Makefile +132 -0
  76. data/src/gocr-0.48/src/Makefile.in +132 -0
  77. data/src/gocr-0.48/src/amiga.h +31 -0
  78. data/src/gocr-0.48/src/barcode.c +846 -0
  79. data/src/gocr-0.48/src/barcode.c.orig +593 -0
  80. data/src/gocr-0.48/src/barcode.h +11 -0
  81. data/src/gocr-0.48/src/box.c +372 -0
  82. data/src/gocr-0.48/src/database.c +462 -0
  83. data/src/gocr-0.48/src/detect.c +943 -0
  84. data/src/gocr-0.48/src/gocr.c +373 -0
  85. data/src/gocr-0.48/src/gocr.h +288 -0
  86. data/src/gocr-0.48/src/jconv.c +168 -0
  87. data/src/gocr-0.48/src/job.c +84 -0
  88. data/src/gocr-0.48/src/lines.c +350 -0
  89. data/src/gocr-0.48/src/list.c +334 -0
  90. data/src/gocr-0.48/src/list.h +90 -0
  91. data/src/gocr-0.48/src/ocr0.c +6756 -0
  92. data/src/gocr-0.48/src/ocr0.h +63 -0
  93. data/src/gocr-0.48/src/ocr0n.c +1475 -0
  94. data/src/gocr-0.48/src/ocr1.c +85 -0
  95. data/src/gocr-0.48/src/ocr1.h +3 -0
  96. data/src/gocr-0.48/src/otsu.c +289 -0
  97. data/src/gocr-0.48/src/otsu.h +23 -0
  98. data/src/gocr-0.48/src/output.c +289 -0
  99. data/src/gocr-0.48/src/output.h +37 -0
  100. data/src/gocr-0.48/src/pcx.c +153 -0
  101. data/src/gocr-0.48/src/pcx.h +9 -0
  102. data/src/gocr-0.48/src/pgm2asc.c +2893 -0
  103. data/src/gocr-0.48/src/pgm2asc.h +105 -0
  104. data/src/gocr-0.48/src/pixel.c +537 -0
  105. data/src/gocr-0.48/src/pnm.c +533 -0
  106. data/src/gocr-0.48/src/pnm.h +35 -0
  107. data/src/gocr-0.48/src/progress.c +87 -0
  108. data/src/gocr-0.48/src/progress.h +42 -0
  109. data/src/gocr-0.48/src/remove.c +703 -0
  110. data/src/gocr-0.48/src/tga.c +87 -0
  111. data/src/gocr-0.48/src/tga.h +6 -0
  112. data/src/gocr-0.48/src/unicode.c +1314 -0
  113. data/src/gocr-0.48/src/unicode.h +1257 -0
  114. data/src/jpeg-7/Makefile.am +133 -0
  115. data/src/jpeg-7/Makefile.in +1089 -0
  116. data/src/jpeg-7/README +322 -0
  117. data/src/jpeg-7/aclocal.m4 +8990 -0
  118. data/src/jpeg-7/ansi2knr.1 +36 -0
  119. data/src/jpeg-7/ansi2knr.c +739 -0
  120. data/src/jpeg-7/cderror.h +132 -0
  121. data/src/jpeg-7/cdjpeg.c +181 -0
  122. data/src/jpeg-7/cdjpeg.h +187 -0
  123. data/src/jpeg-7/change.log +270 -0
  124. data/src/jpeg-7/cjpeg.1 +325 -0
  125. data/src/jpeg-7/cjpeg.c +616 -0
  126. data/src/jpeg-7/ckconfig.c +402 -0
  127. data/src/jpeg-7/coderules.txt +118 -0
  128. data/src/jpeg-7/config.guess +1561 -0
  129. data/src/jpeg-7/config.sub +1686 -0
  130. data/src/jpeg-7/configure +17139 -0
  131. data/src/jpeg-7/configure.ac +317 -0
  132. data/src/jpeg-7/depcomp +630 -0
  133. data/src/jpeg-7/djpeg.1 +251 -0
  134. data/src/jpeg-7/djpeg.c +617 -0
  135. data/src/jpeg-7/example.c +433 -0
  136. data/src/jpeg-7/filelist.txt +215 -0
  137. data/src/jpeg-7/install-sh +520 -0
  138. data/src/jpeg-7/install.txt +1097 -0
  139. data/src/jpeg-7/jaricom.c +148 -0
  140. data/src/jpeg-7/jcapimin.c +282 -0
  141. data/src/jpeg-7/jcapistd.c +161 -0
  142. data/src/jpeg-7/jcarith.c +921 -0
  143. data/src/jpeg-7/jccoefct.c +453 -0
  144. data/src/jpeg-7/jccolor.c +459 -0
  145. data/src/jpeg-7/jcdctmgr.c +482 -0
  146. data/src/jpeg-7/jchuff.c +1612 -0
  147. data/src/jpeg-7/jcinit.c +65 -0
  148. data/src/jpeg-7/jcmainct.c +293 -0
  149. data/src/jpeg-7/jcmarker.c +667 -0
  150. data/src/jpeg-7/jcmaster.c +770 -0
  151. data/src/jpeg-7/jcomapi.c +106 -0
  152. data/src/jpeg-7/jconfig.bcc +48 -0
  153. data/src/jpeg-7/jconfig.cfg +45 -0
  154. data/src/jpeg-7/jconfig.dj +38 -0
  155. data/src/jpeg-7/jconfig.mac +43 -0
  156. data/src/jpeg-7/jconfig.manx +43 -0
  157. data/src/jpeg-7/jconfig.mc6 +52 -0
  158. data/src/jpeg-7/jconfig.sas +43 -0
  159. data/src/jpeg-7/jconfig.st +42 -0
  160. data/src/jpeg-7/jconfig.txt +155 -0
  161. data/src/jpeg-7/jconfig.vc +45 -0
  162. data/src/jpeg-7/jconfig.vms +37 -0
  163. data/src/jpeg-7/jconfig.wat +38 -0
  164. data/src/jpeg-7/jcparam.c +632 -0
  165. data/src/jpeg-7/jcprepct.c +358 -0
  166. data/src/jpeg-7/jcsample.c +545 -0
  167. data/src/jpeg-7/jctrans.c +381 -0
  168. data/src/jpeg-7/jdapimin.c +396 -0
  169. data/src/jpeg-7/jdapistd.c +275 -0
  170. data/src/jpeg-7/jdarith.c +762 -0
  171. data/src/jpeg-7/jdatadst.c +151 -0
  172. data/src/jpeg-7/jdatasrc.c +212 -0
  173. data/src/jpeg-7/jdcoefct.c +736 -0
  174. data/src/jpeg-7/jdcolor.c +396 -0
  175. data/src/jpeg-7/jdct.h +393 -0
  176. data/src/jpeg-7/jddctmgr.c +382 -0
  177. data/src/jpeg-7/jdhuff.c +1309 -0
  178. data/src/jpeg-7/jdinput.c +384 -0
  179. data/src/jpeg-7/jdmainct.c +512 -0
  180. data/src/jpeg-7/jdmarker.c +1360 -0
  181. data/src/jpeg-7/jdmaster.c +663 -0
  182. data/src/jpeg-7/jdmerge.c +400 -0
  183. data/src/jpeg-7/jdpostct.c +290 -0
  184. data/src/jpeg-7/jdsample.c +361 -0
  185. data/src/jpeg-7/jdtrans.c +136 -0
  186. data/src/jpeg-7/jerror.c +252 -0
  187. data/src/jpeg-7/jerror.h +304 -0
  188. data/src/jpeg-7/jfdctflt.c +174 -0
  189. data/src/jpeg-7/jfdctfst.c +230 -0
  190. data/src/jpeg-7/jfdctint.c +4348 -0
  191. data/src/jpeg-7/jidctflt.c +242 -0
  192. data/src/jpeg-7/jidctfst.c +368 -0
  193. data/src/jpeg-7/jidctint.c +5137 -0
  194. data/src/jpeg-7/jinclude.h +91 -0
  195. data/src/jpeg-7/jmemansi.c +167 -0
  196. data/src/jpeg-7/jmemdos.c +638 -0
  197. data/src/jpeg-7/jmemdosa.asm +379 -0
  198. data/src/jpeg-7/jmemmac.c +289 -0
  199. data/src/jpeg-7/jmemmgr.c +1118 -0
  200. data/src/jpeg-7/jmemname.c +276 -0
  201. data/src/jpeg-7/jmemnobs.c +109 -0
  202. data/src/jpeg-7/jmemsys.h +198 -0
  203. data/src/jpeg-7/jmorecfg.h +369 -0
  204. data/src/jpeg-7/jpegint.h +395 -0
  205. data/src/jpeg-7/jpeglib.h +1135 -0
  206. data/src/jpeg-7/jpegtran.1 +272 -0
  207. data/src/jpeg-7/jpegtran.c +546 -0
  208. data/src/jpeg-7/jquant1.c +856 -0
  209. data/src/jpeg-7/jquant2.c +1310 -0
  210. data/src/jpeg-7/jutils.c +179 -0
  211. data/src/jpeg-7/jversion.h +14 -0
  212. data/src/jpeg-7/libjpeg.map +4 -0
  213. data/src/jpeg-7/libjpeg.txt +3067 -0
  214. data/src/jpeg-7/ltmain.sh +8406 -0
  215. data/src/jpeg-7/makcjpeg.st +36 -0
  216. data/src/jpeg-7/makdjpeg.st +36 -0
  217. data/src/jpeg-7/makeadsw.vc6 +77 -0
  218. data/src/jpeg-7/makeasln.vc9 +33 -0
  219. data/src/jpeg-7/makecdep.vc6 +82 -0
  220. data/src/jpeg-7/makecdsp.vc6 +130 -0
  221. data/src/jpeg-7/makecmak.vc6 +159 -0
  222. data/src/jpeg-7/makecvcp.vc9 +186 -0
  223. data/src/jpeg-7/makeddep.vc6 +82 -0
  224. data/src/jpeg-7/makeddsp.vc6 +130 -0
  225. data/src/jpeg-7/makedmak.vc6 +159 -0
  226. data/src/jpeg-7/makedvcp.vc9 +186 -0
  227. data/src/jpeg-7/makefile.ansi +220 -0
  228. data/src/jpeg-7/makefile.bcc +291 -0
  229. data/src/jpeg-7/makefile.dj +226 -0
  230. data/src/jpeg-7/makefile.manx +220 -0
  231. data/src/jpeg-7/makefile.mc6 +255 -0
  232. data/src/jpeg-7/makefile.mms +224 -0
  233. data/src/jpeg-7/makefile.sas +258 -0
  234. data/src/jpeg-7/makefile.unix +234 -0
  235. data/src/jpeg-7/makefile.vc +217 -0
  236. data/src/jpeg-7/makefile.vms +142 -0
  237. data/src/jpeg-7/makefile.wat +239 -0
  238. data/src/jpeg-7/makejdep.vc6 +423 -0
  239. data/src/jpeg-7/makejdsp.vc6 +285 -0
  240. data/src/jpeg-7/makejdsw.vc6 +29 -0
  241. data/src/jpeg-7/makejmak.vc6 +425 -0
  242. data/src/jpeg-7/makejsln.vc9 +17 -0
  243. data/src/jpeg-7/makejvcp.vc9 +328 -0
  244. data/src/jpeg-7/makeproj.mac +213 -0
  245. data/src/jpeg-7/makerdep.vc6 +6 -0
  246. data/src/jpeg-7/makerdsp.vc6 +78 -0
  247. data/src/jpeg-7/makermak.vc6 +110 -0
  248. data/src/jpeg-7/makervcp.vc9 +133 -0
  249. data/src/jpeg-7/maketdep.vc6 +43 -0
  250. data/src/jpeg-7/maketdsp.vc6 +122 -0
  251. data/src/jpeg-7/maketmak.vc6 +131 -0
  252. data/src/jpeg-7/maketvcp.vc9 +178 -0
  253. data/src/jpeg-7/makewdep.vc6 +6 -0
  254. data/src/jpeg-7/makewdsp.vc6 +78 -0
  255. data/src/jpeg-7/makewmak.vc6 +110 -0
  256. data/src/jpeg-7/makewvcp.vc9 +133 -0
  257. data/src/jpeg-7/makljpeg.st +68 -0
  258. data/src/jpeg-7/maktjpeg.st +30 -0
  259. data/src/jpeg-7/makvms.opt +4 -0
  260. data/src/jpeg-7/missing +376 -0
  261. data/src/jpeg-7/rdbmp.c +439 -0
  262. data/src/jpeg-7/rdcolmap.c +253 -0
  263. data/src/jpeg-7/rdgif.c +38 -0
  264. data/src/jpeg-7/rdjpgcom.1 +63 -0
  265. data/src/jpeg-7/rdjpgcom.c +515 -0
  266. data/src/jpeg-7/rdppm.c +459 -0
  267. data/src/jpeg-7/rdrle.c +387 -0
  268. data/src/jpeg-7/rdswitch.c +365 -0
  269. data/src/jpeg-7/rdtarga.c +500 -0
  270. data/src/jpeg-7/structure.txt +945 -0
  271. data/src/jpeg-7/testimg.bmp +0 -0
  272. data/src/jpeg-7/testimg.jpg +0 -0
  273. data/src/jpeg-7/testimg.ppm +4 -0
  274. data/src/jpeg-7/testimgp.jpg +0 -0
  275. data/src/jpeg-7/testorig.jpg +0 -0
  276. data/src/jpeg-7/testprog.jpg +0 -0
  277. data/src/jpeg-7/transupp.c +1533 -0
  278. data/src/jpeg-7/transupp.h +205 -0
  279. data/src/jpeg-7/usage.txt +605 -0
  280. data/src/jpeg-7/wizard.txt +211 -0
  281. data/src/jpeg-7/wrbmp.c +442 -0
  282. data/src/jpeg-7/wrgif.c +399 -0
  283. data/src/jpeg-7/wrjpgcom.1 +103 -0
  284. data/src/jpeg-7/wrjpgcom.c +583 -0
  285. data/src/jpeg-7/wrppm.c +269 -0
  286. data/src/jpeg-7/wrrle.c +305 -0
  287. data/src/jpeg-7/wrtarga.c +253 -0
  288. metadata +287 -6
  289. data/LICENSE +0 -20
  290. data/VERSION.yml +0 -4
@@ -0,0 +1,87 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 1999 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+ */
21
+
22
+ #include <stdio.h>
23
+ #include <stdlib.h>
24
+ #include <assert.h>
25
+
26
+ #include "tga.h"
27
+
28
+ typedef unsigned char byte;
29
+
30
+ // --- needed for reading TGA-files
31
+ #if 0
32
+ char read_b(FILE *f1){ // filter #-comments
33
+ char c;
34
+ c=fgetc(f1); assert(!feof(f1)); assert(!ferror(f1));
35
+ return c;
36
+ }
37
+ #endif
38
+
39
+ //byte tga[18]={ 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,24,32};
40
+ /* header_hex= 00 00 02 00 00 00 00 00 00 00 00 00 xl xh yl yh
41
+ * 18 20 -- -- -- -- -- -- -- -- -- -- -- -- -- -- */
42
+
43
+ void readtga(char *name,pix *p,int mode){ // see pcx.format.txt
44
+ // mode: 0=gray,1=RGB
45
+ int nx,ny,i,x,y;
46
+ FILE *f1;
47
+ unsigned char *pic,h[18];
48
+
49
+ f1=fopen(name,"rb"); if(!f1) fprintf(stderr," error opening file\n");
50
+ assert(f1); // open-error
51
+ assert(fread(h,1,18,f1)==18); /* 18 Byte lesen -> h[] */
52
+ assert(h[ 0]== 0); // TGA0
53
+ assert(h[ 1]== 0); // TGA1
54
+ assert(h[ 2]== 2); // TGA2 no run length encoding
55
+ for(i=3;i<12;i++)
56
+ assert(h[ i]== 0); // ???
57
+ assert(h[16]==0x18); // TGA16
58
+ assert(h[17]==0x20); // TGA17
59
+ nx = h[12] + (h[13]<<8); /* x-dimension low high */
60
+ ny = h[14] + (h[15]<<8); /* y-dimension low high */
61
+ fprintf(stderr,"# TGA version=%d x=%d y=%d", h[2],nx,ny );
62
+ fflush(stdout);
63
+ pic=(unsigned char *)malloc( 3*nx*ny );
64
+ assert(pic!=NULL); // no memory
65
+ assert(ny==(int)fread(pic,3*nx,ny,f1)); // read all lines BGR
66
+ if(mode==0)
67
+ {
68
+ for(y=0;y<ny;y++) /* BGR => gray */
69
+ for(x=0;x<nx;x++)
70
+ { i=x+y*nx; pic[i]=(pic[i*3+0]+pic[i*3+1]+pic[i*3+2])/3; }
71
+ }
72
+ else
73
+ if(mode==1)
74
+ {
75
+ byte b;
76
+ for(y=0;y<ny;y++) /* BGR => RGB */
77
+ for(x=0;x<nx;x++)
78
+ { i=x+y*nx; b=pic[i*3+0]; pic[i*3+0]=pic[i*3+2]; pic[i*3+2]=b; }
79
+ }
80
+ else assert(0); // wrong mode
81
+ fclose(f1);
82
+ p->p=pic; p->x=nx; p->y=ny; p->bpp=1+2*mode;
83
+ fprintf(stderr," mode=%d\n",mode);
84
+ }
85
+
86
+ // ------------------------------------------------------------------------
87
+
@@ -0,0 +1,6 @@
1
+
2
+ #include "pnm.h"
3
+
4
+ void readtga(char *name,pix *p,int mode); // mode: 0=gray 1=RGB
5
+
6
+ // ------------------------------------------------------------------------
@@ -0,0 +1,1314 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2007 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+ */
21
+
22
+ #include "unicode.h"
23
+ #include <stdio.h>
24
+
25
+ /* FIXME jb global */
26
+ int warn=0; /* if 1 a message is generated if composition is not defined */
27
+
28
+ /* Arguments: the character (main), and the modifier (accent, etc). See the
29
+ function if you want to know the modifiers.
30
+ Description: This function intends to be a small helper, to avoid having
31
+ to write switches in functions. It's therefore mainly to accents, and
32
+ specially for the most usual ones. It supports the basic greek
33
+ characters too, which is actually not very helpful.
34
+ Returns: the unicode character corresponding to the composed character.
35
+
36
+ ToDo:
37
+ - It seems to me, that tables should be more effectiv.
38
+ So we should use tables in future? (js)
39
+ */
40
+ wchar_t compose(wchar_t main, wchar_t modifier) {
41
+ /* supported by now: part of ISO8859-1, basic greek characters */
42
+ if( main == UNKNOWN || main == PICTURE ) return main;
43
+ #ifdef DEBUG
44
+ if(modifier!=UNICODE_NULL && modifier!=SPACE)
45
+ printf(" compose(%c,%d)",(char)main,(int)modifier);
46
+ #endif
47
+ if(main>127 && modifier!=0 && modifier!=SPACE && warn)
48
+ fprintf(stderr,"# Warning compose %04x + %04x>127\n",
49
+ (int)modifier,(int)main);
50
+ switch (modifier) {
51
+ case UNICODE_NULL:
52
+ case SPACE:
53
+ return (wchar_t)main;
54
+
55
+ case APOSTROPHE: /* do NOT USE this. It's here for compatibility only.
56
+ Use ACUTE_ACCENT instead. */
57
+ fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
58
+
59
+ case ACUTE_ACCENT: /* acute/cedilla */
60
+ switch (main) {
61
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_ACUTE;
62
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_ACUTE;
63
+ case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_ACUTE;
64
+ case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_ACUTE;
65
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_ACUTE;
66
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_ACUTE;
67
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_ACUTE;
68
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_ACUTE;
69
+ case 'g': return LATIN_SMALL_LETTER_G_WITH_ACUTE;
70
+ case 'G': return LATIN_CAPITAL_LETTER_G_WITH_ACUTE;
71
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_ACUTE;
72
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_ACUTE;
73
+ case 'l': return LATIN_SMALL_LETTER_L_WITH_ACUTE;
74
+ case 'L': return LATIN_CAPITAL_LETTER_L_WITH_ACUTE;
75
+ case 'n': return LATIN_SMALL_LETTER_N_WITH_ACUTE;
76
+ case 'N': return LATIN_CAPITAL_LETTER_N_WITH_ACUTE;
77
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_ACUTE;
78
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
79
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
80
+ case 'r': return LATIN_SMALL_LETTER_R_WITH_ACUTE;
81
+ case 'R': return LATIN_CAPITAL_LETTER_R_WITH_ACUTE;
82
+ case 's': return LATIN_SMALL_LETTER_S_WITH_ACUTE;
83
+ case 'S': return LATIN_CAPITAL_LETTER_S_WITH_ACUTE;
84
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_ACUTE;
85
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_ACUTE;
86
+ case 'y': return LATIN_SMALL_LETTER_Y_WITH_ACUTE;
87
+ case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_ACUTE;
88
+ case 'z': return LATIN_SMALL_LETTER_Z_WITH_ACUTE;
89
+ case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_ACUTE;
90
+ default:
91
+ if(warn)fprintf( stderr, " COMPOSE: ACUTE_ACCENT+%04x not defined\n",(int)main);
92
+ }
93
+ break;
94
+
95
+ case BREVE: /* caron (latin2) "u"-above-... (small bow) */
96
+ switch (main) {
97
+ /* FIXME write separate heuristics for breve */
98
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_BREVE;
99
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_BREVE;
100
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_BREVE;
101
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_BREVE;
102
+ case 'g': return LATIN_SMALL_LETTER_G_WITH_BREVE;
103
+ case 'G': return LATIN_CAPITAL_LETTER_G_WITH_BREVE;
104
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_BREVE;
105
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_BREVE;
106
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_BREVE;
107
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_BREVE;
108
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_BREVE;
109
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_BREVE;
110
+ default:
111
+ if(warn)fprintf( stderr, " COMPOSE: BREVE+%04x not defined\n",(int)main);
112
+ }
113
+ break;
114
+
115
+ case CARON: /* caron (latin2) "v"-above-... */
116
+ switch (main) {
117
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_CARON;
118
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CARON;
119
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_CARON;
120
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CARON;
121
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_CARON;
122
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CARON;
123
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_CARON;
124
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CARON;
125
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_CARON;
126
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
127
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
128
+ case 's': return LATIN_SMALL_LETTER_S_WITH_CARON;
129
+ case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CARON;
130
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_CARON;
131
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CARON;
132
+ case 'z': return LATIN_SMALL_LETTER_Z_WITH_CARON;
133
+ case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_CARON;
134
+ default:
135
+ if(warn)fprintf( stderr, " COMPOSE: CARON+%04x not defined\n",(int)main);
136
+ }
137
+ break;
138
+
139
+ case CEDILLA:
140
+ switch (main) {
141
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_CEDILLA;
142
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
143
+ default:
144
+ if(warn)fprintf( stderr, " COMPOSE: CEDILLA+%04x not defined\n",(int)main);
145
+ }
146
+ break;
147
+
148
+ case TILDE:
149
+ switch (main) {
150
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_TILDE;
151
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_TILDE;
152
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_TILDE;
153
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_TILDE;
154
+ case 'n': return LATIN_SMALL_LETTER_N_WITH_TILDE;
155
+ case 'N': return LATIN_CAPITAL_LETTER_N_WITH_TILDE;
156
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_TILDE;
157
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
158
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
159
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_TILDE;
160
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_TILDE;
161
+ default:
162
+ if(warn)fprintf( stderr, " COMPOSE: TILDE+%04x not defined\n",(int)main);
163
+ }
164
+ break;
165
+
166
+ case GRAVE_ACCENT:
167
+ switch (main) {
168
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_GRAVE;
169
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_GRAVE;
170
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_GRAVE;
171
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_GRAVE;
172
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_GRAVE;
173
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_GRAVE;
174
+ case 'n': return LATIN_SMALL_LETTER_N_WITH_GRAVE;
175
+ case 'N': return LATIN_CAPITAL_LETTER_N_WITH_GRAVE;
176
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_GRAVE;
177
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
178
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
179
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_GRAVE;
180
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_GRAVE;
181
+ default:
182
+ if(warn)fprintf( stderr, " COMPOSE: GRAVE_ACCENT+%04x not defined\n",(int)main);
183
+ }
184
+ break;
185
+
186
+ case QUOTATION_MARK: /* do NOT USE this. It's here for compatibility only.
187
+ Use DIAERESIS instead. */
188
+ fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
189
+
190
+ case DIAERESIS:
191
+ switch (main) {
192
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_DIAERESIS;
193
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS;
194
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_DIAERESIS;
195
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS;
196
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_DIAERESIS;
197
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS;
198
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_DIAERESIS;
199
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
200
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
201
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_DIAERESIS;
202
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS;
203
+ case 'y': return LATIN_SMALL_LETTER_Y_WITH_DIAERESIS;
204
+ case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
205
+ default:
206
+ if(warn)fprintf( stderr, " COMPOSE: DIAERESIS+%04x (%c) not defined\n",(int)main,(char)main);
207
+ }
208
+ break;
209
+
210
+ case CIRCUMFLEX_ACCENT: /* ^ */
211
+ switch (main) {
212
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX;
213
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX;
214
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX;
215
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX;
216
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX;
217
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX;
218
+ case 'g': return LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX;
219
+ case 'G': return LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX;
220
+ case 'h': return LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX;
221
+ case 'H': return LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX;
222
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX;
223
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX;
224
+ case 'j': return LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX;
225
+ case 'J': return LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX;
226
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX;
227
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
228
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
229
+ case 's': return LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX;
230
+ case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX;
231
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX;
232
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX;
233
+ case 'w': return LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX;
234
+ case 'W': return LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX;
235
+ case 'y': return LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX;
236
+ case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX;
237
+ default:
238
+ if(warn)fprintf( stderr, " COMPOSE: CIRCUMFLEX_ACCENT+%04x not defined\n",(int)main);
239
+ }
240
+ break;
241
+
242
+ case MACRON: /* a minus sign above the char (latin2) */
243
+ switch (main) {
244
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_MACRON;
245
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_MACRON;
246
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_MACRON;
247
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_MACRON;
248
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_MACRON;
249
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_MACRON;
250
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_MACRON;
251
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_MACRON;
252
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_MACRON;
253
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_MACRON;
254
+ case 'y': return LATIN_SMALL_LETTER_Y_WITH_MACRON;
255
+ case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_MACRON;
256
+ case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_MACRON;
257
+ case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_MACRON;
258
+ case '=': return IDENTICAL_TO;
259
+ case '-': return '=';
260
+ case ' ': return MODIFIER_LETTER_MACRON;
261
+ default:
262
+ if(warn)fprintf( stderr, " COMPOSE: MACRON+%04x not defined\n",(int)main);
263
+ }
264
+ break;
265
+
266
+ case DOT_ABOVE: /* latin2 */
267
+ switch (main) {
268
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE;
269
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE;
270
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE;
271
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE;
272
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE;
273
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE;
274
+ case 'g': return LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE;
275
+ case 'G': return LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE;
276
+ case 'l': return 'i'; /* correct wrong recognition */
277
+ case 'i': return 'i';
278
+ case LATIN_SMALL_LETTER_DOTLESS_I: return 'i';
279
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
280
+ case 'j': return 'j';
281
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE;
282
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE;
283
+ case 'z': return LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE;
284
+ case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE;
285
+ case ',': return ';';
286
+ case '.': return ':';
287
+ default:
288
+ if(warn)fprintf( stderr, " COMPOSE: DOT_ABOVE+%04x not defined\n",(int)main);
289
+ }
290
+ break;
291
+
292
+ case RING_ABOVE:
293
+ switch (main) {
294
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_RING_ABOVE;
295
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE;
296
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_RING_ABOVE;
297
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE;
298
+ default:
299
+ if(warn)fprintf( stderr, " COMPOSE: RING_ABOVE+%04x not defined\n",(int)main);
300
+ }
301
+ break;
302
+
303
+ case 'e': /* e ligatures: ae, oe. */
304
+ case 'E':
305
+ switch (main) {
306
+ case 'a': return LATIN_SMALL_LETTER_AE;
307
+ case 'A': return LATIN_CAPITAL_LETTER_AE;
308
+ case 'o': return LATIN_SMALL_LIGATURE_OE;
309
+ case 'O': return LATIN_CAPITAL_LIGATURE_OE;
310
+ case '0': return LATIN_CAPITAL_LIGATURE_OE;
311
+ default:
312
+ if(warn)fprintf( stderr, " COMPOSE: %04x+e/E not defined\n",(int)main);
313
+ }
314
+ break;
315
+
316
+ case 'g': /* greek */
317
+ switch (main) {
318
+ /* missing 0x37A-0x390 */
319
+ /* weird cases: Q -> theta (it resembles a little, doesn't it?)
320
+ V -> psi (what can I do?) */
321
+ case 'A': return GREEK_CAPITAL_LETTER_ALPHA;
322
+ case 'B': return GREEK_CAPITAL_LETTER_BETA;
323
+ case 'G': return GREEK_CAPITAL_LETTER_GAMMA;
324
+ case 'D': return GREEK_CAPITAL_LETTER_DELTA;
325
+ case 'E': return GREEK_CAPITAL_LETTER_EPSILON;
326
+ case 'Z': return GREEK_CAPITAL_LETTER_ZETA;
327
+ case 'H': return GREEK_CAPITAL_LETTER_ETA;
328
+ case 'Q': return GREEK_CAPITAL_LETTER_THETA;
329
+ case 'I': return GREEK_CAPITAL_LETTER_IOTA;
330
+ case 'K': return GREEK_CAPITAL_LETTER_KAPPA;
331
+ case 'L': return GREEK_CAPITAL_LETTER_LAMDA;
332
+ case 'M': return GREEK_CAPITAL_LETTER_MU;
333
+ case 'N': return GREEK_CAPITAL_LETTER_NU;
334
+ case 'X': return GREEK_CAPITAL_LETTER_XI;
335
+ case 'O': return GREEK_CAPITAL_LETTER_OMICRON;
336
+ case 'P': return GREEK_CAPITAL_LETTER_PI;
337
+ case 'R': return GREEK_CAPITAL_LETTER_RHO;
338
+ case 'S': return GREEK_CAPITAL_LETTER_SIGMA;
339
+ case 'T': return GREEK_CAPITAL_LETTER_TAU;
340
+ case 'Y': return GREEK_CAPITAL_LETTER_UPSILON;
341
+ case 'F': return GREEK_CAPITAL_LETTER_PHI;
342
+ case 'C': return GREEK_CAPITAL_LETTER_CHI;
343
+ case 'V': return GREEK_CAPITAL_LETTER_PSI;
344
+ case 'W': return GREEK_CAPITAL_LETTER_OMEGA;
345
+ /*
346
+ case '': return GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA;
347
+ case '': return GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA;
348
+ case '': return GREEK_SMALL_LETTER_ALPHA_WITH_TONOS;
349
+ case '': return GREEK_SMALL_LETTER_EPSILON_WITH_TONOS;
350
+ case '': return GREEK_SMALL_LETTER_ETA_WITH_TONOS;
351
+ case '': return GREEK_SMALL_LETTER_IOTA_WITH_TONOS;
352
+ case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS;
353
+ */
354
+ case 'a': return GREEK_SMALL_LETTER_ALPHA;
355
+ case 'b': return GREEK_SMALL_LETTER_BETA;
356
+ case 'g': return GREEK_SMALL_LETTER_GAMMA;
357
+ case 'd': return GREEK_SMALL_LETTER_DELTA;
358
+ case 'e': return GREEK_SMALL_LETTER_EPSILON;
359
+ case 'z': return GREEK_SMALL_LETTER_ZETA;
360
+ case 'h': return GREEK_SMALL_LETTER_ETA;
361
+ case 'q': return GREEK_SMALL_LETTER_THETA;
362
+ case 'i': return GREEK_SMALL_LETTER_IOTA;
363
+ case 'k': return GREEK_SMALL_LETTER_KAPPA;
364
+ case 'l': return GREEK_SMALL_LETTER_LAMDA;
365
+ case 'm': return GREEK_SMALL_LETTER_MU;
366
+ case 'n': return GREEK_SMALL_LETTER_NU;
367
+ case 'x': return GREEK_SMALL_LETTER_XI;
368
+ case 'o': return GREEK_SMALL_LETTER_OMICRON;
369
+ case 'p': return GREEK_SMALL_LETTER_PI;
370
+ case 'r': return GREEK_SMALL_LETTER_RHO;
371
+ case '&': return GREEK_SMALL_LETTER_FINAL_SIGMA;
372
+ case 's': return GREEK_SMALL_LETTER_SIGMA;
373
+ case 't': return GREEK_SMALL_LETTER_TAU;
374
+ case 'y': return GREEK_SMALL_LETTER_UPSILON;
375
+ case 'f': return GREEK_SMALL_LETTER_PHI;
376
+ case 'c': return GREEK_SMALL_LETTER_CHI;
377
+ case 'v': return GREEK_SMALL_LETTER_PSI;
378
+ case 'w': return GREEK_SMALL_LETTER_OMEGA;
379
+ /*
380
+ case '': return GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA;
381
+ case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA;
382
+ case '': return GREEK_SMALL_LETTER_OMICRON_WITH_TONOS;
383
+ case '': return GREEK_SMALL_LETTER_UPSILON_WITH_TONOS;
384
+ case '': return GREEK_SMALL_LETTER_OMEGA_WITH_TONOS;
385
+ case '': return GREEK_BETA_SYMBOL;
386
+ case '': return GREEK_THETA_SYMBOL;
387
+ case '': return GREEK_UPSILON_WITH_HOOK_SYMBOL;
388
+ case '': return GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL;
389
+ case '': return GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL;
390
+ case '': return GREEK_PHI_SYMBOL;
391
+ case '': return GREEK_PI_SYMBOL;
392
+ */
393
+ default:
394
+ if(warn)fprintf( stderr, " COMPOSE: GREEK %04x not defined\n",(int)main);
395
+ }
396
+ break;
397
+
398
+ default:
399
+ fprintf( stderr, " COMPOSE: modifier %04x not defined\n",(int)modifier);
400
+ }
401
+ return (wchar_t)main;
402
+ }
403
+
404
+ #define UNDEFINED "~"
405
+
406
+ /* Arguments: character in Unicode format, type of format to convert to.
407
+ Returns: a string containing the Unicode character converted to the chosen
408
+ format. This string is statically allocated and should not be freed.
409
+ ToDo: better using tables?
410
+ */
411
+ const char *decode(wchar_t c, FORMAT type) {
412
+ /* static char d; --- js: big bug (missing \0) if &d returned */
413
+ /*FIXME jb static*/ static char bbuf[8*32]; /* space for 8 buffers, rotating */
414
+ /*FIXME jb static*/ static char *buf=bbuf; /* used for UTF8 sequences and undefined codes */
415
+ buf+=32; if(buf>=bbuf+8*32) buf=bbuf;
416
+ buf[0]=buf[1]=buf[2]=0;
417
+ switch (type) {
418
+ case ISO8859_1:
419
+ if ( c <= 0xFF ) { /* UNICODE == ISO8859-1 */
420
+ buf[0] = (char)c;
421
+ return buf;
422
+ }
423
+ switch (c) { /* not found in list, but perhaps we can describe it */
424
+ /* todo: add greek. GREEK_SMALL_LETTER_ALPHA = alpha */
425
+
426
+ /* general puctuation */
427
+ case HYPHEN:
428
+ return (const char *)"-";
429
+ case FIGURE_DASH:
430
+ case EN_DASH:
431
+ return (const char *)"--";
432
+ case EM_DASH:
433
+ return (const char *)"---";
434
+ case LEFT_SINGLE_QUOTATION_MARK:
435
+ return (const char *)"`";
436
+ case RIGHT_SINGLE_QUOTATION_MARK:
437
+ return (const char *)"'";
438
+ case SINGLE_LOW_9_QUOTATION_MARK:
439
+ return (const char *)",";
440
+ case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
441
+ return (const char *)UNDEFINED;
442
+ case LEFT_DOUBLE_QUOTATION_MARK:
443
+ return (const char *)"``";
444
+ case RIGHT_DOUBLE_QUOTATION_MARK:
445
+ return (const char *)"''";
446
+ case DOUBLE_LOW_9_QUOTATION_MARK:
447
+ return (const char *)",,";
448
+ case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
449
+ return (const char *)UNDEFINED;
450
+ case DAGGER:
451
+ return (const char *)"+";
452
+ case DOUBLE_DAGGER:
453
+ return (const char *)"*";
454
+ case BULLET:
455
+ return (const char *)"*";
456
+ case TRIANGULAR_BULLET:
457
+ return (const char *)"*";
458
+ case HYPHENATION_POINT:
459
+ return (const char *)"-";
460
+ case HORIZONTAL_ELLIPSIS:
461
+ return (const char *)"...";
462
+ case PER_MILLE_SIGN:
463
+ return (const char *)"%%"; /* awk! */
464
+ case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
465
+ return (const char *)"<";
466
+ case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
467
+ return (const char *)">";
468
+ case EURO_CURRENCY_SIGN:
469
+ return (const char *)"EUR"; /* change it! */
470
+
471
+ /* ligatures */
472
+ case LATIN_SMALL_LIGATURE_FF:
473
+ return (const char *)"ff";
474
+ case LATIN_SMALL_LIGATURE_FI:
475
+ return (const char *)"fi";
476
+ case LATIN_SMALL_LIGATURE_FL:
477
+ return (const char *)"fl";
478
+ case LATIN_SMALL_LIGATURE_FFI:
479
+ return (const char *)"ffi";
480
+ case LATIN_SMALL_LIGATURE_FFL:
481
+ return (const char *)"ffl";
482
+ case LATIN_SMALL_LIGATURE_LONG_S_T:
483
+ case LATIN_SMALL_LIGATURE_ST:
484
+ return (const char *)"st";
485
+
486
+ /* extra */
487
+ case UNKNOWN:
488
+ return (const char *)"_";
489
+ case PICTURE:
490
+ return (const char *)"_"; /* Due to Mobile OCR */
491
+
492
+ default:
493
+ /* snprintf seems to be no standard, so I use insecure sprintf */
494
+ sprintf(buf,"\\code(%04x)",(unsigned)c);
495
+ return buf; /* UNDEFINED; */
496
+ }
497
+ break;
498
+ case TeX:
499
+ if ( c >= SPACE && c <= TILDE ) { /* ASCII */
500
+ switch (c) {
501
+ case '$':
502
+ return (const char *)"\\$";
503
+ case '&':
504
+ return (const char *)"\\&";
505
+ case '%':
506
+ return (const char *)"\\%";
507
+ case '#':
508
+ return (const char *)"\\#";
509
+ case '_':
510
+ return (const char *)"\\_";
511
+ case '{':
512
+ return (const char *)"\\{";
513
+ case '}':
514
+ return (const char *)"\\}";
515
+ case '\\':
516
+ return (const char *)"$\\backslash$";
517
+ case '~':
518
+ return (const char *)"\\~{}";
519
+ case '^':
520
+ return (const char *)"\\^{}";
521
+ default:
522
+ buf[0] = (char)c;
523
+ return (const char *)buf;
524
+ }
525
+ }
526
+ switch (c) {
527
+ /* ISO8859_1 */
528
+ case NO_BREAK_SPACE:
529
+ return (const char *)"~";
530
+ case INVERTED_EXCLAMATION_MARK:
531
+ return (const char *)"!'";
532
+ case CENT_SIGN:
533
+ return (const char *)"\\textcent"; /* \usepackage{textcomp} */
534
+ case POUND_SIGN:
535
+ return (const char *)"\\pounds";
536
+ case EURO_CURRENCY_SIGN:
537
+ return (const char *)"\\euro"; /* \usepackage{eurosans} */
538
+ case CURRENCY_SIGN:
539
+ return (const char *)"\\textcurrency"; /* \usepackage{textcomp} */
540
+ case YEN_SIGN:
541
+ return (const char *)"\\textyen"; /* \usepackage{textcomp} */
542
+ case BROKEN_BAR:
543
+ return (const char *)"\\textbrokenbar"; /* \usepackage{textcomp} */
544
+ case SECTION_SIGN:
545
+ return (const char *)"\\S";
546
+ case DIAERESIS:
547
+ return (const char *)"\"";
548
+ case COPYRIGHT_SIGN:
549
+ return (const char *)"\\copyright";
550
+ case FEMININE_ORDINAL_INDICATOR:
551
+ return (const char *)"$^{\\underbar{a}}$";
552
+ case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
553
+ return (const char *)"\\flqq{}";
554
+ case NOT_SIGN:
555
+ return (const char *)"$\\lnot$";
556
+ case SOFT_HYPHEN:
557
+ return (const char *)"\\-";
558
+ case REGISTERED_SIGN:
559
+ return (const char *)"\\textregistered";/* \usepackage{textcomp} */
560
+ case MACRON:
561
+ return (const char *)"\\textasciimacron";/* \usepackage{textcomp} */
562
+ case DEGREE_SIGN:
563
+ return (const char *)"$^{o}$";
564
+ case PLUS_MINUS_SIGN:
565
+ return (const char *)"$\\pm$";
566
+ case SUPERSCRIPT_TWO:
567
+ return (const char *)"$^{2}$";
568
+ case SUPERSCRIPT_THREE:
569
+ return (const char *)"$^{3}$";
570
+ case ACUTE_ACCENT:
571
+ return (const char *)"\\( \\prime \\)";
572
+ case MICRO_SIGN:
573
+ return (const char *)"$\\mu$";
574
+ case PILCROW_SIGN:
575
+ return (const char *)"\\P";
576
+ case MIDDLE_DOT:
577
+ return (const char *)"$\\cdot$";
578
+ case CEDILLA:
579
+ return (const char *)"\\,";
580
+ case SUPERSCRIPT_ONE:
581
+ return (const char *)"$^{1}$";
582
+ case MASCULINE_ORDINAL_INDICATOR:
583
+ return (const char *)"$^{\\underbar{o}}$";
584
+ case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
585
+ return (const char *)"\\frqq{}";
586
+ case VULGAR_FRACTION_ONE_QUARTER: /* these fractions are not good*/
587
+ return (const char *)"\\( 1\\over 4 \\)";
588
+ case VULGAR_FRACTION_ONE_HALF:
589
+ return (const char *)"\\( 1\\over 2 \\)";
590
+ case VULGAR_FRACTION_THREE_QUARTERS:
591
+ return (const char *)"\\( 3\\over 4 \\)";
592
+ case INVERTED_QUESTION_MARK:
593
+ return (const char *)"?'";
594
+ case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
595
+ return (const char *)"\\`A";
596
+ case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
597
+ return (const char *)"\\'A";
598
+ case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
599
+ return (const char *)"\\^A";
600
+ case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
601
+ return (const char *)"\\~A";
602
+ case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
603
+ return (const char *)"\\\"A";
604
+ case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
605
+ return (const char *)"\\AA";
606
+ case LATIN_CAPITAL_LETTER_AE:
607
+ return (const char *)"\\AE";
608
+ case LATIN_CAPITAL_LETTER_C_WITH_CARON:
609
+ return (const char *)"\\v{C}";
610
+ case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
611
+ return (const char *)"\\C";
612
+ case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
613
+ return (const char *)"\\`E";
614
+ case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
615
+ return (const char *)"\\'E";
616
+ case LATIN_CAPITAL_LETTER_E_WITH_CARON:
617
+ return (const char *)"\\v{E}";
618
+ case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
619
+ return (const char *)"\\^E";
620
+ case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
621
+ return (const char *)"\\\"E";
622
+ case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
623
+ return (const char *)"\\`I";
624
+ case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
625
+ return (const char *)"\\'I";
626
+ case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
627
+ return (const char *)"\\^I";
628
+ case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
629
+ return (const char *)"\\\"I";
630
+ case LATIN_CAPITAL_LETTER_ETH:
631
+ return (const char *)UNDEFINED;
632
+ case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
633
+ return (const char *)"\\~N";
634
+ case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
635
+ return (const char *)"\\`O";
636
+ case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
637
+ return (const char *)"\\'O";
638
+ case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
639
+ return (const char *)"\\^O";
640
+ case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
641
+ return (const char *)"\\~O";
642
+ case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
643
+ return (const char *)"\\\"O";
644
+ case MULTIPLICATION_SIGN:
645
+ return (const char *)"$\\times$";
646
+ case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
647
+ return (const char *)"\\O";
648
+ case LATIN_CAPITAL_LETTER_S_WITH_CARON:
649
+ return (const char *)"\\v{S}";
650
+ case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
651
+ return (const char *)"\\`U";
652
+ case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
653
+ return (const char *)"\\'U";
654
+ case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
655
+ return (const char *)"\\^U";
656
+ case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
657
+ return (const char *)"\\\"U";
658
+ case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
659
+ return (const char *)"\\'Y";
660
+ case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
661
+ return (const char *)"\\v{Z}";
662
+ case LATIN_CAPITAL_LETTER_THORN:
663
+ return (const char *)UNDEFINED;
664
+ case LATIN_SMALL_LETTER_SHARP_S:
665
+ return (const char *)"\\ss";
666
+ case LATIN_SMALL_LETTER_A_WITH_GRAVE:
667
+ return (const char *)"\\`a";
668
+ case LATIN_SMALL_LETTER_A_WITH_ACUTE:
669
+ return (const char *)"\\'a";
670
+ case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
671
+ return (const char *)"\\^a";
672
+ case LATIN_SMALL_LETTER_A_WITH_TILDE:
673
+ return (const char *)"\\~a";
674
+ case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
675
+ return (const char *)"\\\"a";
676
+ case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
677
+ return (const char *)"\\aa";
678
+ case LATIN_SMALL_LETTER_AE:
679
+ return (const char *)"\\ae";
680
+ case LATIN_SMALL_LETTER_C_WITH_CARON:
681
+ return (const char *)"\\v{c}";
682
+ case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
683
+ return (const char *)"\\c";
684
+ case LATIN_SMALL_LETTER_E_WITH_GRAVE:
685
+ return (const char *)"\\`e";
686
+ case LATIN_SMALL_LETTER_E_WITH_ACUTE:
687
+ return (const char *)"\\'e";
688
+ case LATIN_SMALL_LETTER_E_WITH_CARON:
689
+ return (const char *)"\\v{e}";
690
+ case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
691
+ return (const char *)"\\^e";
692
+ case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
693
+ return (const char *)"\\\"e";
694
+ case LATIN_SMALL_LETTER_I_WITH_GRAVE:
695
+ return (const char *)"\\`i";
696
+ case LATIN_SMALL_LETTER_I_WITH_ACUTE:
697
+ return (const char *)"\\'i";
698
+ case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
699
+ return (const char *)"\\^i";
700
+ case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
701
+ return (const char *)"\\\"i";
702
+ case LATIN_SMALL_LETTER_ETH:
703
+ return (const char *)UNDEFINED;
704
+ case LATIN_SMALL_LETTER_N_WITH_TILDE:
705
+ return (const char *)"\\~n";
706
+ case LATIN_SMALL_LETTER_O_WITH_GRAVE:
707
+ return (const char *)"\\`o";
708
+ case LATIN_SMALL_LETTER_O_WITH_ACUTE:
709
+ return (const char *)"\\'o";
710
+ case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
711
+ return (const char *)"\\^o";
712
+ case LATIN_SMALL_LETTER_O_WITH_TILDE:
713
+ return (const char *)"\\~o";
714
+ case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
715
+ return (const char *)"\\\"o";
716
+ case DIVISION_SIGN:
717
+ return (const char *)"$\\div$";
718
+ case LATIN_SMALL_LETTER_O_WITH_STROKE:
719
+ return (const char *)"\\o";
720
+ case LATIN_SMALL_LETTER_S_WITH_CARON:
721
+ return (const char *)"\\v{s}";
722
+ case LATIN_SMALL_LETTER_U_WITH_GRAVE:
723
+ return (const char *)"\\`u";
724
+ case LATIN_SMALL_LETTER_U_WITH_ACUTE:
725
+ return (const char *)"\\'u";
726
+ case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
727
+ return (const char *)"\\^u";
728
+ case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
729
+ return (const char *)"\\\"u";
730
+ case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
731
+ return (const char *)"\\'y";
732
+ case LATIN_SMALL_LETTER_THORN:
733
+ return (const char *)UNDEFINED;
734
+ case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
735
+ return (const char *)"\\\"y";
736
+ case LATIN_SMALL_LETTER_Z_WITH_CARON:
737
+ return (const char *)"\\v{z}";
738
+
739
+ /* greek */
740
+ /* some (punctuation, accents, accented capital) greek letters missing*/
741
+ case GREEK_CAPITAL_LETTER_ALPHA:
742
+ return (const char *)"A";
743
+ case GREEK_CAPITAL_LETTER_BETA:
744
+ return (const char *)"B";
745
+ case GREEK_CAPITAL_LETTER_GAMMA:
746
+ return (const char *)"\\( \\Gamma \\)";
747
+ case GREEK_CAPITAL_LETTER_DELTA:
748
+ return (const char *)"\\( \\Delta \\)";
749
+ case GREEK_CAPITAL_LETTER_EPSILON:
750
+ return (const char *)"E";
751
+ case GREEK_CAPITAL_LETTER_ZETA:
752
+ return (const char *)"Z";
753
+ case GREEK_CAPITAL_LETTER_ETA:
754
+ return (const char *)"H";
755
+ case GREEK_CAPITAL_LETTER_THETA:
756
+ return (const char *)"\\( \\Theta \\)";
757
+ case GREEK_CAPITAL_LETTER_IOTA:
758
+ return (const char *)"I";
759
+ case GREEK_CAPITAL_LETTER_KAPPA:
760
+ return (const char *)"K";
761
+ case GREEK_CAPITAL_LETTER_LAMDA:
762
+ return (const char *)"\\( \\Lambda \\)";
763
+ case GREEK_CAPITAL_LETTER_MU:
764
+ return (const char *)"M";
765
+ case GREEK_CAPITAL_LETTER_NU:
766
+ return (const char *)"N";
767
+ case GREEK_CAPITAL_LETTER_XI:
768
+ return (const char *)"\\( \\Xi \\)";
769
+ case GREEK_CAPITAL_LETTER_OMICRON:
770
+ return (const char *)"O";
771
+ case GREEK_CAPITAL_LETTER_PI:
772
+ return (const char *)"\\( \\Pi \\)";
773
+ case GREEK_CAPITAL_LETTER_RHO:
774
+ return (const char *)"P";
775
+ case GREEK_CAPITAL_LETTER_SIGMA:
776
+ return (const char *)"\\( \\Sigma \\)";
777
+ case GREEK_CAPITAL_LETTER_TAU:
778
+ return (const char *)"T";
779
+ case GREEK_CAPITAL_LETTER_UPSILON:
780
+ return (const char *)"\\( \\Upsilon \\)";
781
+ case GREEK_CAPITAL_LETTER_PHI:
782
+ return (const char *)"\\( \\Phi \\)";
783
+ case GREEK_CAPITAL_LETTER_CHI:
784
+ return (const char *)"\\( \\Chi \\)";
785
+ case GREEK_CAPITAL_LETTER_PSI:
786
+ return (const char *)"\\( \\Psi \\)";
787
+ case GREEK_CAPITAL_LETTER_OMEGA:
788
+ return (const char *)"\\( \\Omega \\)";
789
+ case GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA:
790
+ return (const char *)UNDEFINED;
791
+ case GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA:
792
+ return (const char *)UNDEFINED;
793
+ case GREEK_SMALL_LETTER_ALPHA_WITH_TONOS:
794
+ return (const char *)UNDEFINED;
795
+ case GREEK_SMALL_LETTER_EPSILON_WITH_TONOS:
796
+ return (const char *)UNDEFINED;
797
+ case GREEK_SMALL_LETTER_ETA_WITH_TONOS:
798
+ return (const char *)UNDEFINED;
799
+ case GREEK_SMALL_LETTER_IOTA_WITH_TONOS:
800
+ return (const char *)UNDEFINED;
801
+ case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS:
802
+ return (const char *)UNDEFINED;
803
+ case GREEK_SMALL_LETTER_ALPHA:
804
+ return (const char *)"\\( \\alpha \\)";
805
+ case GREEK_SMALL_LETTER_BETA:
806
+ return (const char *)"\\( \\beta \\)";
807
+ case GREEK_SMALL_LETTER_GAMMA:
808
+ return (const char *)"\\( \\gamma \\)";
809
+ case GREEK_SMALL_LETTER_DELTA:
810
+ return (const char *)"\\( \\delta \\)";
811
+ case GREEK_SMALL_LETTER_EPSILON:
812
+ return (const char *)"\\( \\epsilon \\)";
813
+ case GREEK_SMALL_LETTER_ZETA:
814
+ return (const char *)"\\( \\zeta \\)";
815
+ case GREEK_SMALL_LETTER_ETA:
816
+ return (const char *)"\\( \\eta \\)";
817
+ case GREEK_SMALL_LETTER_THETA:
818
+ return (const char *)"\\( \\theta \\)";
819
+ case GREEK_SMALL_LETTER_IOTA:
820
+ return (const char *)"\\( \\iota \\)";
821
+ case GREEK_SMALL_LETTER_KAPPA:
822
+ return (const char *)"\\( \\kappa \\)";
823
+ case GREEK_SMALL_LETTER_LAMDA:
824
+ return (const char *)"\\( \\lambda \\)";
825
+ case GREEK_SMALL_LETTER_MU:
826
+ return (const char *)"\\( \\mu \\)";
827
+ case GREEK_SMALL_LETTER_NU:
828
+ return (const char *)"\\( \\nu \\)";
829
+ case GREEK_SMALL_LETTER_XI:
830
+ return (const char *)"\\( \\xi \\)";
831
+ case GREEK_SMALL_LETTER_OMICRON:
832
+ return (const char *)"\\( \\omicron \\)";
833
+ case GREEK_SMALL_LETTER_PI:
834
+ return (const char *)"\\( \\pi \\)";
835
+ case GREEK_SMALL_LETTER_RHO:
836
+ return (const char *)"\\( \\rho \\)";
837
+ case GREEK_SMALL_LETTER_FINAL_SIGMA:
838
+ return (const char *)"\\( \\varsigma \\)";
839
+ case GREEK_SMALL_LETTER_SIGMA:
840
+ return (const char *)"\\( \\sigma \\)";
841
+ case GREEK_SMALL_LETTER_TAU:
842
+ return (const char *)"\\( \\tau \\)";
843
+ case GREEK_SMALL_LETTER_UPSILON:
844
+ return (const char *)"\\( \\upsilon \\)";
845
+ case GREEK_SMALL_LETTER_PHI:
846
+ return (const char *)"\\( \\varphi \\)";
847
+ case GREEK_SMALL_LETTER_CHI:
848
+ return (const char *)"\\( \\chi \\)";
849
+ case GREEK_SMALL_LETTER_PSI:
850
+ return (const char *)"\\( \\psi \\)";
851
+ case GREEK_SMALL_LETTER_OMEGA:
852
+ return (const char *)"\\( \\omega \\)";
853
+ case GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA:
854
+ return (const char *)UNDEFINED;
855
+ case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA:
856
+ return (const char *)UNDEFINED;
857
+ case GREEK_SMALL_LETTER_OMICRON_WITH_TONOS:
858
+ return (const char *)UNDEFINED;
859
+ case GREEK_SMALL_LETTER_UPSILON_WITH_TONOS:
860
+ return (const char *)UNDEFINED;
861
+ case GREEK_SMALL_LETTER_OMEGA_WITH_TONOS:
862
+ return (const char *)UNDEFINED;
863
+ case GREEK_BETA_SYMBOL:
864
+ return (const char *)UNDEFINED;
865
+ case GREEK_THETA_SYMBOL:
866
+ return (const char *)"\\( \\vartheta \\)";
867
+ case GREEK_UPSILON_WITH_HOOK_SYMBOL:
868
+ return (const char *)UNDEFINED;
869
+ case GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL:
870
+ return (const char *)UNDEFINED;
871
+ case GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL:
872
+ return (const char *)UNDEFINED;
873
+ case GREEK_PHI_SYMBOL:
874
+ return (const char *)"\\( \\phi \\)";
875
+ case GREEK_PI_SYMBOL:
876
+ return (const char *)"\\( \\varpi \\)";
877
+ /* and some greek letters missing*/
878
+
879
+ /* punctuation (partial) */
880
+ case HYPHEN:
881
+ return (const char *)"-";
882
+ case NON_BREAKING_HYPHEN:
883
+ return (const char *)UNDEFINED;
884
+ case FIGURE_DASH:
885
+ case EN_DASH:
886
+ return (const char *)"--";
887
+ case EM_DASH:
888
+ return (const char *)"---";
889
+ case HORIZONTAL_BAR:
890
+ return (const char *)UNDEFINED;
891
+ case LEFT_SINGLE_QUOTATION_MARK:
892
+ return (const char *)"`";
893
+ case RIGHT_SINGLE_QUOTATION_MARK:
894
+ return (const char *)"'";
895
+ case SINGLE_LOW_9_QUOTATION_MARK:
896
+ return (const char *)"\\glq{}";
897
+ case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
898
+ return (const char *)UNDEFINED;
899
+ case LEFT_DOUBLE_QUOTATION_MARK:
900
+ return (const char *)"``";
901
+ case RIGHT_DOUBLE_QUOTATION_MARK:
902
+ return (const char *)"''";
903
+ case DOUBLE_LOW_9_QUOTATION_MARK:
904
+ return (const char *)"\\glqq{}";
905
+ case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
906
+ return (const char *)UNDEFINED;
907
+ case DAGGER:
908
+ return (const char *)"\\dag";
909
+ case DOUBLE_DAGGER:
910
+ return (const char *)"\\ddag";
911
+ case BULLET:
912
+ return (const char *)"$\\bullet$";
913
+ case TRIANGULAR_BULLET:
914
+ return (const char *)"$\\blacktriangleright";
915
+ case HYPHENATION_POINT:
916
+ return (const char *)"\\-";
917
+ case HORIZONTAL_ELLIPSIS:
918
+ return (const char *)"\\ldots";
919
+ case PER_MILLE_SIGN:
920
+ return (const char *)UNDEFINED;
921
+ case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
922
+ return (const char *)"\\flq{}";
923
+ case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
924
+ return (const char *)"\\frq{}";
925
+ /* ligatures */
926
+ case LATIN_SMALL_LIGATURE_FF:
927
+ return (const char *)"ff";
928
+ case LATIN_SMALL_LIGATURE_FI:
929
+ return (const char *)"fi";
930
+ case LATIN_SMALL_LIGATURE_FL:
931
+ return (const char *)"fl";
932
+ case LATIN_SMALL_LIGATURE_FFI:
933
+ return (const char *)"ffi";
934
+ case LATIN_SMALL_LIGATURE_FFL:
935
+ return (const char *)"ffl";
936
+ case LATIN_SMALL_LIGATURE_LONG_S_T:
937
+ case LATIN_SMALL_LIGATURE_ST:
938
+ return (const char *)"st";
939
+ /* reserved */
940
+ case 0:
941
+ return (const char *)"";
942
+ case UNKNOWN:
943
+ return (const char *)"\\_";
944
+ case PICTURE:
945
+ return (const char *)"(PICTURE)";
946
+ default:
947
+ /* snprintf seems to be no standard, so I use insecure sprintf */
948
+ sprintf(buf,"\\symbol{%u}",(unsigned)c);
949
+ return buf; /* UNDEFINED; */
950
+ }
951
+ case HTML:
952
+ if ( c >= SPACE && c <= TILDE ) { /* ASCII */
953
+ switch (c) {
954
+ case '&':
955
+ return (const char *)"&amp;";
956
+ /* semicolon must not be coded */
957
+ case '\'':
958
+ return (const char *)"&apos;";
959
+ case '"':
960
+ return (const char *)"&quot;";
961
+ case '<':
962
+ return (const char *)"&lt;";
963
+ case '>':
964
+ return (const char *)"&gt;";
965
+ }
966
+ buf[0] = (char)c;
967
+ return buf;
968
+ }
969
+ switch (c) {
970
+ case PICTURE:
971
+ return (const char *)"<!--PICTURE-->";
972
+ case UNKNOWN:
973
+ return (const char *)"_"; /* better use colored symbol? */
974
+ case LINE_FEED:
975
+ return (const char *)"<br />"; /* \n handled somwhere else? */
976
+ case FORM_FEED:
977
+ case CARRIAGE_RETURN:
978
+ return (const char *)"<br />";
979
+ case NO_BREAK_SPACE:
980
+ return (const char *)"<nobr />";
981
+ case INVERTED_EXCLAMATION_MARK:
982
+ return (const char *)"&iexcl;";
983
+ case CENT_SIGN:
984
+ return (const char *)"&cent;";
985
+ case POUND_SIGN:
986
+ return (const char *)"&pound;";
987
+ case CURRENCY_SIGN:
988
+ return (const char *)"&curren;";
989
+ case YEN_SIGN:
990
+ return (const char *)"&yen;";
991
+ case BROKEN_BAR:
992
+ return (const char *)"&brvbar;";
993
+ case SECTION_SIGN:
994
+ return (const char *)"&sect;";
995
+ case DIAERESIS:
996
+ return (const char *)"&uml;";
997
+ case COPYRIGHT_SIGN:
998
+ return (const char *)"&copy;";
999
+ case FEMININE_ORDINAL_INDICATOR:
1000
+ return (const char *)"&ordfem;";
1001
+ case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1002
+ return (const char *)"&laquo;";
1003
+ case NOT_SIGN:
1004
+ return (const char *)"&not;";
1005
+ case SOFT_HYPHEN:
1006
+ return (const char *)"&shy;";
1007
+ case REGISTERED_SIGN:
1008
+ return (const char *)"&reg;";
1009
+ case MACRON:
1010
+ return (const char *)"&macr;";
1011
+ case DEGREE_SIGN:
1012
+ return (const char *)"&deg;";
1013
+ case PLUS_MINUS_SIGN:
1014
+ return (const char *)"&plusmn;";
1015
+ case SUPERSCRIPT_TWO:
1016
+ return (const char *)"&sup2;";
1017
+ case SUPERSCRIPT_THREE:
1018
+ return (const char *)"&sup3;";
1019
+ case ACUTE_ACCENT:
1020
+ return (const char *)"&acute;";
1021
+ case MICRO_SIGN:
1022
+ return (const char *)"&micro;";
1023
+ case PILCROW_SIGN:
1024
+ return (const char *)"&para;";
1025
+ case MIDDLE_DOT:
1026
+ return (const char *)"&middot;";
1027
+ case CEDILLA:
1028
+ return (const char *)"&cedil;";
1029
+ case SUPERSCRIPT_ONE:
1030
+ return (const char *)"&sup1;";
1031
+ case MASCULINE_ORDINAL_INDICATOR:
1032
+ return (const char *)"&ordm;";
1033
+ case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1034
+ return (const char *)"&raquo;";
1035
+ case VULGAR_FRACTION_ONE_QUARTER:
1036
+ return (const char *)"&frac14;";
1037
+ case VULGAR_FRACTION_ONE_HALF:
1038
+ return (const char *)"&frac12;";
1039
+ case VULGAR_FRACTION_THREE_QUARTERS:
1040
+ return (const char *)"&frac34;";
1041
+ case INVERTED_QUESTION_MARK:
1042
+ return (const char *)"&iquest;";
1043
+ case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
1044
+ return (const char *)"&Agrave;";
1045
+ case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
1046
+ return (const char *)"&Aacute;";
1047
+ case LATIN_CAPITAL_LETTER_A_WITH_BREVE:
1048
+ return (const char *)"&Abreve;";
1049
+ case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
1050
+ return (const char *)"&Acirc;";
1051
+ case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
1052
+ return (const char *)"&Atilde;";
1053
+ case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
1054
+ return (const char *)"&Auml;";
1055
+ case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
1056
+ return (const char *)"&Aring;";
1057
+ case LATIN_CAPITAL_LETTER_AE:
1058
+ return (const char *)"&AElig;";
1059
+ case LATIN_CAPITAL_LETTER_C_WITH_CARON:
1060
+ return (const char *)"&Ccaron;";
1061
+ case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
1062
+ return (const char *)"&Ccedil;";
1063
+ case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
1064
+ return (const char *)"&Egrave;";
1065
+ case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
1066
+ return (const char *)"&Eacute;";
1067
+ case LATIN_CAPITAL_LETTER_E_WITH_CARON:
1068
+ return (const char *)"&Ecaron;";
1069
+ case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
1070
+ return (const char *)"&Ecirc;";
1071
+ case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
1072
+ return (const char *)"&Euml;";
1073
+ case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
1074
+ return (const char *)"&Igrave;";
1075
+ case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
1076
+ return (const char *)"&Iacute;";
1077
+ case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
1078
+ return (const char *)"&Icirc;";
1079
+ case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
1080
+ return (const char *)"&Iuml;";
1081
+ case LATIN_CAPITAL_LETTER_ETH:
1082
+ return (const char *)"&ETH;";
1083
+ case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
1084
+ return (const char *)"&Ntilde;";
1085
+ case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
1086
+ return (const char *)"&Ograve;";
1087
+ case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
1088
+ return (const char *)"&Oacute;";
1089
+ case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
1090
+ return (const char *)"&Ocirc;";
1091
+ case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
1092
+ return (const char *)"&Otilde;";
1093
+ case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
1094
+ return (const char *)"&Ouml;";
1095
+ case MULTIPLICATION_SIGN:
1096
+ return (const char *)"&times";
1097
+ case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
1098
+ return (const char *)"&Oslash;";
1099
+ case LATIN_CAPITAL_LETTER_S_WITH_CARON:
1100
+ return (const char *)"&Scaron;";
1101
+ case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
1102
+ return (const char *)"&Ugrave;";
1103
+ case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
1104
+ return (const char *)"&Uacute;";
1105
+ case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
1106
+ return (const char *)"&Ucirc;";
1107
+ case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
1108
+ return (const char *)"&Uuml;";
1109
+ case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
1110
+ return (const char *)"&Yacute;";
1111
+ case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
1112
+ return (const char *)"&Zcaron;";
1113
+ case LATIN_CAPITAL_LETTER_THORN:
1114
+ return (const char *)"&THORN;";
1115
+ case LATIN_SMALL_LETTER_SHARP_S:
1116
+ return (const char *)"&szlig;";
1117
+ case LATIN_SMALL_LETTER_A_WITH_GRAVE:
1118
+ return (const char *)"&agrave;";
1119
+ case LATIN_SMALL_LETTER_A_WITH_ACUTE:
1120
+ return (const char *)"&aacute;";
1121
+ case LATIN_SMALL_LETTER_A_WITH_BREVE:
1122
+ return (const char *)"&abreve;";
1123
+ case LATIN_SMALL_LETTER_A_WITH_CARON:
1124
+ return (const char *)"&acaron;";
1125
+ case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
1126
+ return (const char *)"&acirc;";
1127
+ case LATIN_SMALL_LETTER_A_WITH_TILDE:
1128
+ return (const char *)"&atilde;";
1129
+ case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
1130
+ return (const char *)"&auml;";
1131
+ case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
1132
+ return (const char *)"&aring;";
1133
+ case LATIN_SMALL_LETTER_AE:
1134
+ return (const char *)"&aelig;";
1135
+ case LATIN_SMALL_LETTER_C_WITH_CARON:
1136
+ return (const char *)"&ccaron;";
1137
+ case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
1138
+ return (const char *)"&ccedil;";
1139
+ case LATIN_SMALL_LETTER_E_WITH_GRAVE:
1140
+ return (const char *)"&egrave;";
1141
+ case LATIN_SMALL_LETTER_E_WITH_ACUTE:
1142
+ return (const char *)"&eacute;";
1143
+ case LATIN_SMALL_LETTER_E_WITH_CARON:
1144
+ return (const char *)"&ecaron;";
1145
+ case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
1146
+ return (const char *)"&ecirc;";
1147
+ case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
1148
+ return (const char *)"&euml;";
1149
+ case LATIN_SMALL_LETTER_I_WITH_GRAVE:
1150
+ return (const char *)"&igrave;";
1151
+ case LATIN_SMALL_LETTER_I_WITH_ACUTE:
1152
+ return (const char *)"&iacute;";
1153
+ case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
1154
+ return (const char *)"&icirc;";
1155
+ case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
1156
+ return (const char *)"&iuml;";
1157
+ case LATIN_SMALL_LETTER_ETH:
1158
+ return (const char *)"&eth;";
1159
+ case LATIN_SMALL_LETTER_N_WITH_TILDE:
1160
+ return (const char *)"&ntilde;";
1161
+ case LATIN_SMALL_LETTER_O_WITH_GRAVE:
1162
+ return (const char *)"&ograve;";
1163
+ case LATIN_SMALL_LETTER_O_WITH_ACUTE:
1164
+ return (const char *)"&oacute;";
1165
+ case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
1166
+ return (const char *)"&ocirc;";
1167
+ case LATIN_SMALL_LETTER_O_WITH_TILDE:
1168
+ return (const char *)"&otilde;";
1169
+ case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
1170
+ return (const char *)"&ouml;";
1171
+ case DIVISION_SIGN:
1172
+ return (const char *)"&divide;";
1173
+ case LATIN_SMALL_LETTER_O_WITH_STROKE:
1174
+ return (const char *)"&oslash;";
1175
+ case LATIN_SMALL_LETTER_S_WITH_CARON:
1176
+ return (const char *)"&scaron;";
1177
+ case LATIN_SMALL_LETTER_U_WITH_GRAVE:
1178
+ return (const char *)"&ugrave;";
1179
+ case LATIN_SMALL_LETTER_U_WITH_ACUTE:
1180
+ return (const char *)"&uacute;";
1181
+ case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
1182
+ return (const char *)"&ucirc;";
1183
+ case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
1184
+ return (const char *)"&uuml;";
1185
+ case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
1186
+ return (const char *)"&yacute;";
1187
+ case LATIN_SMALL_LETTER_THORN:
1188
+ return (const char *)"&thorn;";
1189
+ case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
1190
+ return (const char *)"&yuml;";
1191
+ case LATIN_SMALL_LETTER_Z_WITH_CARON:
1192
+ return (const char *)"&zcaron;";
1193
+ case EURO_CURRENCY_SIGN:
1194
+ return (const char *)"&euro;";
1195
+ case 0:
1196
+ return (const char *)"";
1197
+ default:
1198
+ sprintf(buf,"&#%u;",(unsigned)c);
1199
+ return buf; /* undefined */
1200
+ }
1201
+ /* break; unreachable code */
1202
+ case XML: /* only 5 &xxx;-ENTITIES ar defined by default */
1203
+ if ( c >= SPACE && c <= TILDE ) { /* ASCII */
1204
+ switch (c) {
1205
+ case '&':
1206
+ return (const char *)"&amp;";
1207
+ case '\'':
1208
+ return (const char *)"&apos;";
1209
+ case '"':
1210
+ return (const char *)"&quot;";
1211
+ case '<':
1212
+ return (const char *)"&lt;";
1213
+ case '>':
1214
+ return (const char *)"&gt;";
1215
+ }
1216
+ buf[0] = (char)c;
1217
+ return buf;
1218
+ }
1219
+ switch (c) { /* subject of change! */
1220
+ case PICTURE:
1221
+ return (const char *)"(PICTURE)";
1222
+ case UNKNOWN:
1223
+ return (const char *)"_"; /* better use colored symbol? */
1224
+ case LINE_FEED: /* \n handled somwhere else? */
1225
+ case FORM_FEED:
1226
+ case CARRIAGE_RETURN:
1227
+ return (const char *)"<br />";
1228
+ case NO_BREAK_SPACE:
1229
+ return (const char *)"<nobr />";
1230
+ case 0:
1231
+ return (const char *)"";
1232
+ default:
1233
+ sprintf(buf,"&#x%03x;",(unsigned)c);
1234
+ return buf; /* undefined */
1235
+ }
1236
+ /* break; unreachable code */
1237
+ case SGML:
1238
+ switch (c) {
1239
+ default:
1240
+ sprintf(buf,"&#%u;",(unsigned)c);
1241
+ return buf; /* UNDEFINED */
1242
+ }
1243
+ /* break; unreachable code */
1244
+ case ASCII: /* mainly used for debugging */
1245
+ if ( c=='\n' || (c>= 0x20 && c <= 0x7F) ) {
1246
+ buf[0] = (char)c;
1247
+ return buf;
1248
+ }
1249
+ switch (c) {
1250
+ /* extra */
1251
+ case UNKNOWN:
1252
+ return (const char *)"(?)";
1253
+ case PICTURE:
1254
+ return (const char *)"(?)";
1255
+
1256
+ default:
1257
+ /* snprintf seems to be no standard, so I use insecure sprintf */
1258
+ if ((unsigned)c>255) sprintf(buf,"(0x%04x)",(unsigned)c);
1259
+ else sprintf(buf,"(0x%02x)",(unsigned)c);
1260
+ return buf; /* UNDEFINED; */
1261
+ }
1262
+ /* break; unreachable code */
1263
+ default: /* use UTF8 as default, test with xterm -u8 */
1264
+ /* extra */
1265
+ if ( c == UNKNOWN ) return (const char *)"_";
1266
+ if ( c == PICTURE ) return (const char *)"_"; /* Due to Mobile OCR */
1267
+ if ( c <= (wchar_t)0x0000007F ) { /* UTF8 == 7bit ASCII */
1268
+ buf[0] = (char)c;
1269
+ return buf;
1270
+ }
1271
+ if ( c <= (wchar_t)0x000007FF ) { /* UTF8 == 11bit */
1272
+ buf[0] = (char)(0xc0|((c>> 6) & 0x1f)); /* 110xxxxx */
1273
+ buf[1] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1274
+ buf[2] = (char)0; /* terminate string */
1275
+ return buf;
1276
+ }
1277
+ /* wchar_t is 16bit for Borland-C !? Jan07 */
1278
+ if ( c <= (wchar_t)0x0000FFFF ) { /* UTF8 == 16bit */
1279
+ buf[0] = (char)(0xe0|((c>>12) & 0x0f)); /* 1110xxxx */
1280
+ buf[1] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1281
+ buf[2] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1282
+ buf[3] = (char)0; /* terminate string */
1283
+ return buf;
1284
+ }
1285
+ if ( c <= (wchar_t)0x001FFFFF ) { /* UTF8 == 21bit */
1286
+ buf[0] = (char)(0xf0|((c>>18) & 0x07)); /* 11110xxx */
1287
+ buf[1] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1288
+ buf[2] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1289
+ buf[3] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1290
+ buf[4] = (char)0; /* terminate string */
1291
+ return buf;
1292
+ }
1293
+ if ( c <= (wchar_t)0x03FFFFFF ) { /* UTF8 == 26bit */
1294
+ buf[0] = (char)(0xf8|((c>>24) & 0x03)); /* 111110xx */
1295
+ buf[1] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1296
+ buf[2] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1297
+ buf[3] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1298
+ buf[4] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1299
+ buf[5] = (char)0; /* terminate string */
1300
+ return buf;
1301
+ }
1302
+ if ( c <= (wchar_t)0x7FFFFFFF ) { /* UTF8 == 31bit */
1303
+ buf[0] = (char)(0xfc|((c>>30) & 0x01)); /* 1111110x */
1304
+ buf[1] = (char)(0x80|((c>>24) & 0x3f)); /* 10xxxxxx */
1305
+ buf[2] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1306
+ buf[3] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1307
+ buf[4] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1308
+ buf[5] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1309
+ buf[6] = (char)0; /* terminate string */
1310
+ return buf;
1311
+ }
1312
+ return (const char *)UNDEFINED;
1313
+ }
1314
+ }