gocr-ruby 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +21 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +49 -0
  8. data/ext/gocr/Makefile +141 -0
  9. data/ext/gocr/Makefile.in +140 -0
  10. data/ext/gocr/amiga.h +31 -0
  11. data/ext/gocr/barcode.c +2108 -0
  12. data/ext/gocr/barcode.h +11 -0
  13. data/ext/gocr/box.c +496 -0
  14. data/ext/gocr/config.h +37 -0
  15. data/ext/gocr/config.h.in +36 -0
  16. data/ext/gocr/database.c +468 -0
  17. data/ext/gocr/detect.c +1003 -0
  18. data/ext/gocr/extconf.rb +6 -0
  19. data/ext/gocr/gocr.c +436 -0
  20. data/ext/gocr/gocr.h +290 -0
  21. data/ext/gocr/jconv.c +168 -0
  22. data/ext/gocr/job.c +92 -0
  23. data/ext/gocr/lines.c +364 -0
  24. data/ext/gocr/list.c +334 -0
  25. data/ext/gocr/list.h +91 -0
  26. data/ext/gocr/ocr0.c +7312 -0
  27. data/ext/gocr/ocr0.h +63 -0
  28. data/ext/gocr/ocr0n.c +1527 -0
  29. data/ext/gocr/ocr1.c +85 -0
  30. data/ext/gocr/ocr1.h +3 -0
  31. data/ext/gocr/otsu.c +310 -0
  32. data/ext/gocr/otsu.h +23 -0
  33. data/ext/gocr/output.c +291 -0
  34. data/ext/gocr/output.h +37 -0
  35. data/ext/gocr/pcx.c +153 -0
  36. data/ext/gocr/pcx.h +9 -0
  37. data/ext/gocr/pgm2asc.c +3259 -0
  38. data/ext/gocr/pgm2asc.h +105 -0
  39. data/ext/gocr/pixel.c +538 -0
  40. data/ext/gocr/pnm.c +538 -0
  41. data/ext/gocr/pnm.h +35 -0
  42. data/ext/gocr/progress.c +87 -0
  43. data/ext/gocr/progress.h +42 -0
  44. data/ext/gocr/remove.c +715 -0
  45. data/ext/gocr/tga.c +87 -0
  46. data/ext/gocr/tga.h +6 -0
  47. data/ext/gocr/unicode.c +1318 -0
  48. data/ext/gocr/unicode.h +62 -0
  49. data/ext/gocr/unicode_defs.h +1245 -0
  50. data/ext/gocr/version.h +2 -0
  51. data/gocr-ruby.gemspec +28 -0
  52. data/image.png +0 -0
  53. data/lib/gocr.rb +6 -0
  54. data/lib/gocr/image.rb +8 -0
  55. data/lib/gocr/version.rb +3 -0
  56. metadata +156 -0
@@ -0,0 +1,87 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 1999 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+ */
21
+
22
+ #include <stdio.h>
23
+ #include <stdlib.h>
24
+ #include <assert.h>
25
+
26
+ #include "tga.h"
27
+
28
+ typedef unsigned char byte;
29
+
30
+ // --- needed for reading TGA-files
31
+ #if 0
32
+ char read_b(FILE *f1){ // filter #-comments
33
+ char c;
34
+ c=fgetc(f1); assert(!feof(f1)); assert(!ferror(f1));
35
+ return c;
36
+ }
37
+ #endif
38
+
39
+ //byte tga[18]={ 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,24,32};
40
+ /* header_hex= 00 00 02 00 00 00 00 00 00 00 00 00 xl xh yl yh
41
+ * 18 20 -- -- -- -- -- -- -- -- -- -- -- -- -- -- */
42
+
43
+ void readtga(char *name,pix *p,int mode){ // see pcx.format.txt
44
+ // mode: 0=gray,1=RGB
45
+ int nx,ny,i,x,y;
46
+ FILE *f1;
47
+ unsigned char *pic,h[18];
48
+
49
+ f1=fopen(name,"rb"); if(!f1) fprintf(stderr," error opening file\n");
50
+ assert(f1); // open-error
51
+ assert(fread(h,1,18,f1)==18); /* 18 Byte lesen -> h[] */
52
+ assert(h[ 0]== 0); // TGA0
53
+ assert(h[ 1]== 0); // TGA1
54
+ assert(h[ 2]== 2); // TGA2 no run length encoding
55
+ for(i=3;i<12;i++)
56
+ assert(h[ i]== 0); // ???
57
+ assert(h[16]==0x18); // TGA16
58
+ assert(h[17]==0x20); // TGA17
59
+ nx = h[12] + (h[13]<<8); /* x-dimension low high */
60
+ ny = h[14] + (h[15]<<8); /* y-dimension low high */
61
+ fprintf(stderr,"# TGA version=%d x=%d y=%d", h[2],nx,ny );
62
+ fflush(stdout);
63
+ pic=(unsigned char *)malloc( 3*nx*ny );
64
+ assert(pic!=NULL); // no memory
65
+ assert(ny==(int)fread(pic,3*nx,ny,f1)); // read all lines BGR
66
+ if(mode==0)
67
+ {
68
+ for(y=0;y<ny;y++) /* BGR => gray */
69
+ for(x=0;x<nx;x++)
70
+ { i=x+y*nx; pic[i]=(pic[i*3+0]+pic[i*3+1]+pic[i*3+2])/3; }
71
+ }
72
+ else
73
+ if(mode==1)
74
+ {
75
+ byte b;
76
+ for(y=0;y<ny;y++) /* BGR => RGB */
77
+ for(x=0;x<nx;x++)
78
+ { i=x+y*nx; b=pic[i*3+0]; pic[i*3+0]=pic[i*3+2]; pic[i*3+2]=b; }
79
+ }
80
+ else assert(0); // wrong mode
81
+ fclose(f1);
82
+ p->p=pic; p->x=nx; p->y=ny; p->bpp=1+2*mode;
83
+ fprintf(stderr," mode=%d\n",mode);
84
+ }
85
+
86
+ // ------------------------------------------------------------------------
87
+
@@ -0,0 +1,6 @@
1
+
2
+ #include "pnm.h"
3
+
4
+ void readtga(char *name,pix *p,int mode); // mode: 0=gray 1=RGB
5
+
6
+ // ------------------------------------------------------------------------
@@ -0,0 +1,1318 @@
1
+ /*
2
+ This is a Optical-Character-Recognition program
3
+ Copyright (C) 2000-2010 Joerg Schulenburg
4
+
5
+ This program is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU General Public License
7
+ as published by the Free Software Foundation; either version 2
8
+ of the License, or (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ see README for EMAIL-address
20
+ */
21
+
22
+ #include "unicode_defs.h" /* unicode table */
23
+ #include "unicode.h" /* convert functions and formats */
24
+ #include <stdio.h>
25
+
26
+ /* FIXME jb global */
27
+ int warn=0; /* if 1 a message is generated if composition is not defined */
28
+
29
+ /* Arguments: the character (main), and the modifier (accent, etc). See the
30
+ function if you want to know the modifiers.
31
+ Description: This function intends to be a small helper, to avoid having
32
+ to write switches in functions. It's therefore mainly to accents, and
33
+ specially for the most usual ones. It supports the basic greek
34
+ characters too, which is actually not very helpful.
35
+ Returns: the unicode character corresponding to the composed character.
36
+
37
+ ToDo:
38
+ - It seems to me, that tables should be more effectiv.
39
+ So we should use tables in future? (js)
40
+ */
41
+ wchar_t compose(wchar_t main, wchar_t modifier) {
42
+ /* supported by now: part of ISO8859-1, basic greek characters */
43
+ if( main == UNKNOWN || main == PICTURE ) return main;
44
+ #ifdef DEBUG
45
+ if(modifier!=UNICODE_NULL && modifier!=SPACE)
46
+ printf(" compose(%c,%d)",(char)main,(int)modifier);
47
+ #endif
48
+ if(main>127 && modifier!=0 && modifier!=SPACE && warn)
49
+ fprintf(stderr,"# Warning compose %04x + %04x>127\n",
50
+ (int)modifier,(int)main);
51
+ switch (modifier) {
52
+ case UNICODE_NULL:
53
+ case SPACE:
54
+ return (wchar_t)main;
55
+
56
+ case APOSTROPHE: /* do NOT USE this. It's here for compatibility only.
57
+ Use ACUTE_ACCENT instead. */
58
+ fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
59
+
60
+ case ACUTE_ACCENT: /* acute/cedilla */
61
+ switch (main) {
62
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_ACUTE;
63
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_ACUTE;
64
+ case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_ACUTE;
65
+ case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_ACUTE;
66
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_ACUTE;
67
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_ACUTE;
68
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_ACUTE;
69
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_ACUTE;
70
+ case 'g': return LATIN_SMALL_LETTER_G_WITH_ACUTE;
71
+ case 'G': return LATIN_CAPITAL_LETTER_G_WITH_ACUTE;
72
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_ACUTE;
73
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_ACUTE;
74
+ case 'l': return LATIN_SMALL_LETTER_L_WITH_ACUTE;
75
+ case 'L': return LATIN_CAPITAL_LETTER_L_WITH_ACUTE;
76
+ case 'n': return LATIN_SMALL_LETTER_N_WITH_ACUTE;
77
+ case 'N': return LATIN_CAPITAL_LETTER_N_WITH_ACUTE;
78
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_ACUTE;
79
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
80
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
81
+ case 'r': return LATIN_SMALL_LETTER_R_WITH_ACUTE;
82
+ case 'R': return LATIN_CAPITAL_LETTER_R_WITH_ACUTE;
83
+ case 's': return LATIN_SMALL_LETTER_S_WITH_ACUTE;
84
+ case 'S': return LATIN_CAPITAL_LETTER_S_WITH_ACUTE;
85
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_ACUTE;
86
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_ACUTE;
87
+ case 'y': return LATIN_SMALL_LETTER_Y_WITH_ACUTE;
88
+ case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_ACUTE;
89
+ case 'z': return LATIN_SMALL_LETTER_Z_WITH_ACUTE;
90
+ case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_ACUTE;
91
+ default:
92
+ if(warn)fprintf( stderr, " COMPOSE: ACUTE_ACCENT+%04x not defined\n",(int)main);
93
+ }
94
+ break;
95
+
96
+ case BREVE: /* caron (latin2) "u"-above-... (small bow) */
97
+ switch (main) {
98
+ /* FIXME write separate heuristics for breve */
99
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_BREVE;
100
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_BREVE;
101
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_BREVE;
102
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_BREVE;
103
+ case 'g': return LATIN_SMALL_LETTER_G_WITH_BREVE;
104
+ case 'G': return LATIN_CAPITAL_LETTER_G_WITH_BREVE;
105
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_BREVE;
106
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_BREVE;
107
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_BREVE;
108
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_BREVE;
109
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_BREVE;
110
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_BREVE;
111
+ default:
112
+ if(warn)fprintf( stderr, " COMPOSE: BREVE+%04x not defined\n",(int)main);
113
+ }
114
+ break;
115
+
116
+ case CARON: /* caron (latin2) "v"-above-... */
117
+ switch (main) {
118
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_CARON;
119
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CARON;
120
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_CARON;
121
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CARON;
122
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_CARON;
123
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CARON;
124
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_CARON;
125
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CARON;
126
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_CARON;
127
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
128
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
129
+ case 's': return LATIN_SMALL_LETTER_S_WITH_CARON;
130
+ case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CARON;
131
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_CARON;
132
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CARON;
133
+ case 'z': return LATIN_SMALL_LETTER_Z_WITH_CARON;
134
+ case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_CARON;
135
+ default:
136
+ if(warn)fprintf( stderr, " COMPOSE: CARON+%04x not defined\n",(int)main);
137
+ }
138
+ break;
139
+
140
+ case CEDILLA:
141
+ switch (main) {
142
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_CEDILLA;
143
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
144
+ default:
145
+ if(warn)fprintf( stderr, " COMPOSE: CEDILLA+%04x not defined\n",(int)main);
146
+ }
147
+ break;
148
+
149
+ case TILDE:
150
+ switch (main) {
151
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_TILDE;
152
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_TILDE;
153
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_TILDE;
154
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_TILDE;
155
+ case 'n': return LATIN_SMALL_LETTER_N_WITH_TILDE;
156
+ case 'N': return LATIN_CAPITAL_LETTER_N_WITH_TILDE;
157
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_TILDE;
158
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
159
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
160
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_TILDE;
161
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_TILDE;
162
+ default:
163
+ if(warn)fprintf( stderr, " COMPOSE: TILDE+%04x not defined\n",(int)main);
164
+ }
165
+ break;
166
+
167
+ case GRAVE_ACCENT:
168
+ switch (main) {
169
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_GRAVE;
170
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_GRAVE;
171
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_GRAVE;
172
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_GRAVE;
173
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_GRAVE;
174
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_GRAVE;
175
+ case 'n': return LATIN_SMALL_LETTER_N_WITH_GRAVE;
176
+ case 'N': return LATIN_CAPITAL_LETTER_N_WITH_GRAVE;
177
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_GRAVE;
178
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
179
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
180
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_GRAVE;
181
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_GRAVE;
182
+ default:
183
+ if(warn)fprintf( stderr, " COMPOSE: GRAVE_ACCENT+%04x not defined\n",(int)main);
184
+ }
185
+ break;
186
+
187
+ case QUOTATION_MARK: /* do NOT USE this. It's here for compatibility only.
188
+ Use DIAERESIS instead. */
189
+ fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
190
+
191
+ case DIAERESIS:
192
+ switch (main) {
193
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_DIAERESIS;
194
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS;
195
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_DIAERESIS;
196
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS;
197
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_DIAERESIS;
198
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS;
199
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_DIAERESIS;
200
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
201
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
202
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_DIAERESIS;
203
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS;
204
+ case 'y': return LATIN_SMALL_LETTER_Y_WITH_DIAERESIS;
205
+ case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
206
+ default:
207
+ if(warn)fprintf( stderr, " COMPOSE: DIAERESIS+%04x (%c) not defined\n",(int)main,(char)main);
208
+ }
209
+ break;
210
+
211
+ case CIRCUMFLEX_ACCENT: /* ^ */
212
+ switch (main) {
213
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX;
214
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX;
215
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX;
216
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX;
217
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX;
218
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX;
219
+ case 'g': return LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX;
220
+ case 'G': return LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX;
221
+ case 'h': return LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX;
222
+ case 'H': return LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX;
223
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX;
224
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX;
225
+ case 'j': return LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX;
226
+ case 'J': return LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX;
227
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX;
228
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
229
+ case '0': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
230
+ case 's': return LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX;
231
+ case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX;
232
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX;
233
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX;
234
+ case 'w': return LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX;
235
+ case 'W': return LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX;
236
+ case 'y': return LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX;
237
+ case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX;
238
+ default:
239
+ if(warn)fprintf( stderr, " COMPOSE: CIRCUMFLEX_ACCENT+%04x not defined\n",(int)main);
240
+ }
241
+ break;
242
+
243
+ case MACRON: /* a minus sign above the char (latin2) */
244
+ switch (main) {
245
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_MACRON;
246
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_MACRON;
247
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_MACRON;
248
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_MACRON;
249
+ case 'i': return LATIN_SMALL_LETTER_I_WITH_MACRON;
250
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_MACRON;
251
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_MACRON;
252
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_MACRON;
253
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_MACRON;
254
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_MACRON;
255
+ case 'y': return LATIN_SMALL_LETTER_Y_WITH_MACRON;
256
+ case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_MACRON;
257
+ case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_MACRON;
258
+ case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_MACRON;
259
+ case '=': return IDENTICAL_TO;
260
+ case '-': return '=';
261
+ case ' ': return MODIFIER_LETTER_MACRON;
262
+ default:
263
+ if(warn)fprintf( stderr, " COMPOSE: MACRON+%04x not defined\n",(int)main);
264
+ }
265
+ break;
266
+
267
+ case DOT_ABOVE: /* latin2 */
268
+ switch (main) {
269
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE;
270
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE;
271
+ case 'c': return LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE;
272
+ case 'C': return LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE;
273
+ case 'e': return LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE;
274
+ case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE;
275
+ case 'g': return LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE;
276
+ case 'G': return LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE;
277
+ case 'l': return 'i'; /* correct wrong recognition */
278
+ case 'i': return 'i';
279
+ case LATIN_SMALL_LETTER_DOTLESS_I: return 'i';
280
+ case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
281
+ case 'j': return 'j';
282
+ case 'o': return LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE;
283
+ case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE;
284
+ case 'z': return LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE;
285
+ case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE;
286
+ case ',': return ';';
287
+ case '.': return ':';
288
+ default:
289
+ if(warn)fprintf( stderr, " COMPOSE: DOT_ABOVE+%04x not defined\n",(int)main);
290
+ }
291
+ break;
292
+
293
+ case RING_ABOVE:
294
+ switch (main) {
295
+ case 'a': return LATIN_SMALL_LETTER_A_WITH_RING_ABOVE;
296
+ case 'A': return LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE;
297
+ case 'u': return LATIN_SMALL_LETTER_U_WITH_RING_ABOVE;
298
+ case 'U': return LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE;
299
+ default:
300
+ if(warn)fprintf( stderr, " COMPOSE: RING_ABOVE+%04x not defined\n",(int)main);
301
+ }
302
+ break;
303
+
304
+ case 'e': /* e ligatures: ae, oe. */
305
+ case 'E':
306
+ switch (main) {
307
+ case 'a': return LATIN_SMALL_LETTER_AE;
308
+ case 'A': return LATIN_CAPITAL_LETTER_AE;
309
+ case 'o': return LATIN_SMALL_LIGATURE_OE;
310
+ case 'O': return LATIN_CAPITAL_LIGATURE_OE;
311
+ case '0': return LATIN_CAPITAL_LIGATURE_OE;
312
+ default:
313
+ if(warn)fprintf( stderr, " COMPOSE: %04x+e/E not defined\n",(int)main);
314
+ }
315
+ break;
316
+
317
+ case 'g': /* greek */
318
+ switch (main) {
319
+ /* missing 0x37A-0x390 */
320
+ /* weird cases: Q -> theta (it resembles a little, doesn't it?)
321
+ V -> psi (what can I do?) */
322
+ case 'A': return GREEK_CAPITAL_LETTER_ALPHA;
323
+ case 'B': return GREEK_CAPITAL_LETTER_BETA;
324
+ case 'G': return GREEK_CAPITAL_LETTER_GAMMA;
325
+ case 'D': return GREEK_CAPITAL_LETTER_DELTA;
326
+ case 'E': return GREEK_CAPITAL_LETTER_EPSILON;
327
+ case 'Z': return GREEK_CAPITAL_LETTER_ZETA;
328
+ case 'H': return GREEK_CAPITAL_LETTER_ETA;
329
+ case 'Q': return GREEK_CAPITAL_LETTER_THETA;
330
+ case 'I': return GREEK_CAPITAL_LETTER_IOTA;
331
+ case 'K': return GREEK_CAPITAL_LETTER_KAPPA;
332
+ case 'L': return GREEK_CAPITAL_LETTER_LAMDA;
333
+ case 'M': return GREEK_CAPITAL_LETTER_MU;
334
+ case 'N': return GREEK_CAPITAL_LETTER_NU;
335
+ case 'X': return GREEK_CAPITAL_LETTER_XI;
336
+ case 'O': return GREEK_CAPITAL_LETTER_OMICRON;
337
+ case 'P': return GREEK_CAPITAL_LETTER_PI;
338
+ case 'R': return GREEK_CAPITAL_LETTER_RHO;
339
+ case 'S': return GREEK_CAPITAL_LETTER_SIGMA;
340
+ case 'T': return GREEK_CAPITAL_LETTER_TAU;
341
+ case 'Y': return GREEK_CAPITAL_LETTER_UPSILON;
342
+ case 'F': return GREEK_CAPITAL_LETTER_PHI;
343
+ case 'C': return GREEK_CAPITAL_LETTER_CHI;
344
+ case 'V': return GREEK_CAPITAL_LETTER_PSI;
345
+ case 'W': return GREEK_CAPITAL_LETTER_OMEGA;
346
+ /*
347
+ case '': return GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA;
348
+ case '': return GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA;
349
+ case '': return GREEK_SMALL_LETTER_ALPHA_WITH_TONOS;
350
+ case '': return GREEK_SMALL_LETTER_EPSILON_WITH_TONOS;
351
+ case '': return GREEK_SMALL_LETTER_ETA_WITH_TONOS;
352
+ case '': return GREEK_SMALL_LETTER_IOTA_WITH_TONOS;
353
+ case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS;
354
+ */
355
+ case 'a': return GREEK_SMALL_LETTER_ALPHA;
356
+ case 'b': return GREEK_SMALL_LETTER_BETA;
357
+ case 'g': return GREEK_SMALL_LETTER_GAMMA;
358
+ case 'd': return GREEK_SMALL_LETTER_DELTA;
359
+ case 'e': return GREEK_SMALL_LETTER_EPSILON;
360
+ case 'z': return GREEK_SMALL_LETTER_ZETA;
361
+ case 'h': return GREEK_SMALL_LETTER_ETA;
362
+ case 'q': return GREEK_SMALL_LETTER_THETA;
363
+ case 'i': return GREEK_SMALL_LETTER_IOTA;
364
+ case 'k': return GREEK_SMALL_LETTER_KAPPA;
365
+ case 'l': return GREEK_SMALL_LETTER_LAMDA;
366
+ case 'm': return GREEK_SMALL_LETTER_MU;
367
+ case 'n': return GREEK_SMALL_LETTER_NU;
368
+ case 'x': return GREEK_SMALL_LETTER_XI;
369
+ case 'o': return GREEK_SMALL_LETTER_OMICRON;
370
+ case 'p': return GREEK_SMALL_LETTER_PI;
371
+ case 'r': return GREEK_SMALL_LETTER_RHO;
372
+ case '&': return GREEK_SMALL_LETTER_FINAL_SIGMA;
373
+ case 's': return GREEK_SMALL_LETTER_SIGMA;
374
+ case 't': return GREEK_SMALL_LETTER_TAU;
375
+ case 'y': return GREEK_SMALL_LETTER_UPSILON;
376
+ case 'f': return GREEK_SMALL_LETTER_PHI;
377
+ case 'c': return GREEK_SMALL_LETTER_CHI;
378
+ case 'v': return GREEK_SMALL_LETTER_PSI;
379
+ case 'w': return GREEK_SMALL_LETTER_OMEGA;
380
+ /*
381
+ case '': return GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA;
382
+ case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA;
383
+ case '': return GREEK_SMALL_LETTER_OMICRON_WITH_TONOS;
384
+ case '': return GREEK_SMALL_LETTER_UPSILON_WITH_TONOS;
385
+ case '': return GREEK_SMALL_LETTER_OMEGA_WITH_TONOS;
386
+ case '': return GREEK_BETA_SYMBOL;
387
+ case '': return GREEK_THETA_SYMBOL;
388
+ case '': return GREEK_UPSILON_WITH_HOOK_SYMBOL;
389
+ case '': return GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL;
390
+ case '': return GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL;
391
+ case '': return GREEK_PHI_SYMBOL;
392
+ case '': return GREEK_PI_SYMBOL;
393
+ */
394
+ default:
395
+ if(warn)fprintf( stderr, " COMPOSE: GREEK %04x not defined\n",(int)main);
396
+ }
397
+ break;
398
+
399
+ default:
400
+ fprintf( stderr, " COMPOSE: modifier %04x not defined\n",(int)modifier);
401
+ }
402
+ return (wchar_t)main;
403
+ }
404
+
405
+ #define UNDEFINED "~"
406
+
407
+ /* Arguments: character in Unicode format, type of format to convert to.
408
+ Returns: a string containing the Unicode character converted to the chosen
409
+ format. This string is statically allocated and should not be freed.
410
+ ToDo: better using tables?
411
+ */
412
+ const char *decode(wchar_t c, FORMAT type) {
413
+ /* static char d; --- js: big bug (missing \0) if &d returned */
414
+ /*FIXME jb static*/ static char bbuf[8*32]; /* space for 8 buffers, rotating */
415
+ /*FIXME jb static*/ static char *buf=bbuf; /* used for UTF8 sequences and undefined codes */
416
+ buf+=32; if(buf>=bbuf+8*32) buf=bbuf;
417
+ buf[0]=buf[1]=buf[2]=0;
418
+ switch (type) {
419
+ case ISO8859_1:
420
+ if ( c <= 0xFF ) { /* UNICODE == ISO8859-1 */
421
+ buf[0] = (char)c;
422
+ return buf;
423
+ }
424
+ switch (c) { /* not found in list, but perhaps we can describe it */
425
+ /* todo: add greek. GREEK_SMALL_LETTER_ALPHA = alpha */
426
+
427
+ /* general puctuation */
428
+ case HYPHEN:
429
+ return (const char *)"-";
430
+ case FIGURE_DASH:
431
+ case EN_DASH:
432
+ return (const char *)"--";
433
+ case EM_DASH:
434
+ return (const char *)"---";
435
+ case LEFT_SINGLE_QUOTATION_MARK:
436
+ return (const char *)"`";
437
+ case RIGHT_SINGLE_QUOTATION_MARK:
438
+ return (const char *)"'";
439
+ case SINGLE_LOW_9_QUOTATION_MARK:
440
+ return (const char *)",";
441
+ case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
442
+ return (const char *)UNDEFINED;
443
+ case LEFT_DOUBLE_QUOTATION_MARK:
444
+ return (const char *)"``";
445
+ case RIGHT_DOUBLE_QUOTATION_MARK:
446
+ return (const char *)"''";
447
+ case DOUBLE_LOW_9_QUOTATION_MARK:
448
+ return (const char *)",,";
449
+ case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
450
+ return (const char *)UNDEFINED;
451
+ case DAGGER:
452
+ return (const char *)"+";
453
+ case DOUBLE_DAGGER:
454
+ return (const char *)"*";
455
+ case BULLET:
456
+ return (const char *)"*";
457
+ case TRIANGULAR_BULLET:
458
+ return (const char *)"*";
459
+ case HYPHENATION_POINT:
460
+ return (const char *)"-";
461
+ case HORIZONTAL_ELLIPSIS:
462
+ return (const char *)"...";
463
+ case PER_MILLE_SIGN:
464
+ return (const char *)"%%"; /* awk! */
465
+ case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
466
+ return (const char *)"<";
467
+ case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
468
+ return (const char *)">";
469
+ case EURO_CURRENCY_SIGN:
470
+ return (const char *)"EUR"; /* change it! */
471
+
472
+ /* ligatures */
473
+ case LATIN_SMALL_LIGATURE_FF:
474
+ return (const char *)"ff";
475
+ case LATIN_SMALL_LIGATURE_FI:
476
+ return (const char *)"fi";
477
+ case LATIN_SMALL_LIGATURE_FL:
478
+ return (const char *)"fl";
479
+ case LATIN_SMALL_LIGATURE_FFI:
480
+ return (const char *)"ffi";
481
+ case LATIN_SMALL_LIGATURE_FFL:
482
+ return (const char *)"ffl";
483
+ case LATIN_SMALL_LIGATURE_LONG_S_T:
484
+ case LATIN_SMALL_LIGATURE_ST:
485
+ return (const char *)"st";
486
+
487
+ /* extra */
488
+ case UNKNOWN:
489
+ return (const char *)"_";
490
+ case PICTURE:
491
+ return (const char *)"_"; /* Due to Mobile OCR */
492
+
493
+ default:
494
+ /* snprintf seems to be no standard, so I use insecure sprintf */
495
+ sprintf(buf,"\\code(%04x)",(unsigned)c);
496
+ return buf; /* UNDEFINED; */
497
+ }
498
+ break;
499
+ case TeX:
500
+ if ( c >= SPACE && c <= TILDE ) { /* ASCII */
501
+ switch (c) {
502
+ case '$':
503
+ return (const char *)"\\$";
504
+ case '&':
505
+ return (const char *)"\\&";
506
+ case '%':
507
+ return (const char *)"\\%";
508
+ case '#':
509
+ return (const char *)"\\#";
510
+ case '_':
511
+ return (const char *)"\\_";
512
+ case '{':
513
+ return (const char *)"\\{";
514
+ case '}':
515
+ return (const char *)"\\}";
516
+ case '\\':
517
+ return (const char *)"\\textbackslash";
518
+ case '~':
519
+ return (const char *)"\\~{}";
520
+ case '^':
521
+ return (const char *)"\\^{}";
522
+ default:
523
+ buf[0] = (char)c;
524
+ return (const char *)buf;
525
+ }
526
+ }
527
+ switch (c) {
528
+ /* ISO8859_1 */
529
+ case NO_BREAK_SPACE:
530
+ return (const char *)"~";
531
+ case INVERTED_EXCLAMATION_MARK:
532
+ return (const char *)"!'";
533
+ case CENT_SIGN:
534
+ return (const char *)"\\textcent"; /* \usepackage{textcomp} */
535
+ case POUND_SIGN:
536
+ return (const char *)"\\pounds";
537
+ case EURO_CURRENCY_SIGN:
538
+ return (const char *)"\\euro"; /* \usepackage{eurosans} */
539
+ case CURRENCY_SIGN:
540
+ return (const char *)"\\textcurrency"; /* \usepackage{textcomp} */
541
+ case YEN_SIGN:
542
+ return (const char *)"\\textyen"; /* \usepackage{textcomp} */
543
+ case BROKEN_BAR:
544
+ return (const char *)"\\textbrokenbar"; /* \usepackage{textcomp} */
545
+ case SECTION_SIGN:
546
+ return (const char *)"\\S";
547
+ case DIAERESIS:
548
+ return (const char *)"\"";
549
+ case COPYRIGHT_SIGN:
550
+ return (const char *)"\\copyright";
551
+ case FEMININE_ORDINAL_INDICATOR:
552
+ return (const char *)"$^{\\underbar{a}}$";
553
+ case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
554
+ return (const char *)"\\flqq{}";
555
+ case NOT_SIGN:
556
+ return (const char *)"$\\lnot$";
557
+ case SOFT_HYPHEN:
558
+ return (const char *)"\\-";
559
+ case REGISTERED_SIGN:
560
+ return (const char *)"\\textregistered";/* \usepackage{textcomp} */
561
+ case MACRON:
562
+ return (const char *)"\\textasciimacron";/* \usepackage{textcomp} */
563
+ case DEGREE_SIGN:
564
+ return (const char *)"$^{o}$";
565
+ case PLUS_MINUS_SIGN:
566
+ return (const char *)"$\\pm$";
567
+ case SUPERSCRIPT_TWO:
568
+ return (const char *)"$^{2}$";
569
+ case SUPERSCRIPT_THREE:
570
+ return (const char *)"$^{3}$";
571
+ case ACUTE_ACCENT:
572
+ return (const char *)"\\( \\prime \\)";
573
+ case MICRO_SIGN:
574
+ return (const char *)"$\\mu$";
575
+ case PILCROW_SIGN:
576
+ return (const char *)"\\P";
577
+ case MIDDLE_DOT:
578
+ return (const char *)"$\\cdot$";
579
+ case CEDILLA:
580
+ return (const char *)"\\,";
581
+ case SUPERSCRIPT_ONE:
582
+ return (const char *)"$^{1}$";
583
+ case MASCULINE_ORDINAL_INDICATOR:
584
+ return (const char *)"$^{\\underbar{o}}$";
585
+ case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
586
+ return (const char *)"\\frqq{}";
587
+ case VULGAR_FRACTION_ONE_QUARTER: /* these fractions are not good*/
588
+ return (const char *)"\\( 1\\over 4 \\)";
589
+ case VULGAR_FRACTION_ONE_HALF:
590
+ return (const char *)"\\( 1\\over 2 \\)";
591
+ case VULGAR_FRACTION_THREE_QUARTERS:
592
+ return (const char *)"\\( 3\\over 4 \\)";
593
+ case INVERTED_QUESTION_MARK:
594
+ return (const char *)"?'";
595
+ case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
596
+ return (const char *)"\\`A";
597
+ case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
598
+ return (const char *)"\\'A";
599
+ case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
600
+ return (const char *)"\\^A";
601
+ case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
602
+ return (const char *)"\\~A";
603
+ case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
604
+ return (const char *)"\\\"A";
605
+ case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
606
+ return (const char *)"\\AA";
607
+ case LATIN_CAPITAL_LETTER_AE:
608
+ return (const char *)"\\AE";
609
+ case LATIN_CAPITAL_LETTER_C_WITH_CARON:
610
+ return (const char *)"\\v{C}";
611
+ case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
612
+ return (const char *)"\\C";
613
+ case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
614
+ return (const char *)"\\`E";
615
+ case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
616
+ return (const char *)"\\'E";
617
+ case LATIN_CAPITAL_LETTER_E_WITH_CARON:
618
+ return (const char *)"\\v{E}";
619
+ case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
620
+ return (const char *)"\\^E";
621
+ case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
622
+ return (const char *)"\\\"E";
623
+ case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
624
+ return (const char *)"\\`I";
625
+ case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
626
+ return (const char *)"\\'I";
627
+ case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
628
+ return (const char *)"\\^I";
629
+ case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
630
+ return (const char *)"\\\"I";
631
+ case LATIN_CAPITAL_LETTER_ETH:
632
+ return (const char *)UNDEFINED;
633
+ case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
634
+ return (const char *)"\\~N";
635
+ case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
636
+ return (const char *)"\\`O";
637
+ case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
638
+ return (const char *)"\\'O";
639
+ case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
640
+ return (const char *)"\\^O";
641
+ case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
642
+ return (const char *)"\\~O";
643
+ case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
644
+ return (const char *)"\\\"O";
645
+ case MULTIPLICATION_SIGN:
646
+ return (const char *)"$\\times$";
647
+ case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
648
+ return (const char *)"\\O";
649
+ case LATIN_CAPITAL_LETTER_S_WITH_CARON:
650
+ return (const char *)"\\v{S}";
651
+ case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
652
+ return (const char *)"\\`U";
653
+ case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
654
+ return (const char *)"\\'U";
655
+ case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
656
+ return (const char *)"\\^U";
657
+ case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
658
+ return (const char *)"\\\"U";
659
+ case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
660
+ return (const char *)"\\'Y";
661
+ case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
662
+ return (const char *)"\\v{Z}";
663
+ case LATIN_CAPITAL_LETTER_THORN:
664
+ return (const char *)UNDEFINED;
665
+ case LATIN_SMALL_LETTER_SHARP_S:
666
+ return (const char *)"\\ss";
667
+ case LATIN_SMALL_LETTER_A_WITH_GRAVE:
668
+ return (const char *)"\\`a";
669
+ case LATIN_SMALL_LETTER_A_WITH_ACUTE:
670
+ return (const char *)"\\'a";
671
+ case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
672
+ return (const char *)"\\^a";
673
+ case LATIN_SMALL_LETTER_A_WITH_TILDE:
674
+ return (const char *)"\\~a";
675
+ case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
676
+ return (const char *)"\\\"a";
677
+ case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
678
+ return (const char *)"\\aa";
679
+ case LATIN_SMALL_LETTER_AE:
680
+ return (const char *)"\\ae";
681
+ case LATIN_SMALL_LETTER_C_WITH_CARON:
682
+ return (const char *)"\\v{c}";
683
+ case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
684
+ return (const char *)"\\c";
685
+ case LATIN_SMALL_LETTER_E_WITH_GRAVE:
686
+ return (const char *)"\\`e";
687
+ case LATIN_SMALL_LETTER_E_WITH_ACUTE:
688
+ return (const char *)"\\'e";
689
+ case LATIN_SMALL_LETTER_E_WITH_CARON:
690
+ return (const char *)"\\v{e}";
691
+ case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
692
+ return (const char *)"\\^e";
693
+ case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
694
+ return (const char *)"\\\"e";
695
+ case LATIN_SMALL_LETTER_I_WITH_GRAVE:
696
+ return (const char *)"\\`i";
697
+ case LATIN_SMALL_LETTER_I_WITH_ACUTE:
698
+ return (const char *)"\\'i";
699
+ case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
700
+ return (const char *)"\\^i";
701
+ case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
702
+ return (const char *)"\\\"i";
703
+ case LATIN_SMALL_LETTER_ETH:
704
+ return (const char *)UNDEFINED;
705
+ case LATIN_SMALL_LETTER_N_WITH_TILDE:
706
+ return (const char *)"\\~n";
707
+ case LATIN_SMALL_LETTER_O_WITH_GRAVE:
708
+ return (const char *)"\\`o";
709
+ case LATIN_SMALL_LETTER_O_WITH_ACUTE:
710
+ return (const char *)"\\'o";
711
+ case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
712
+ return (const char *)"\\^o";
713
+ case LATIN_SMALL_LETTER_O_WITH_TILDE:
714
+ return (const char *)"\\~o";
715
+ case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
716
+ return (const char *)"\\\"o";
717
+ case DIVISION_SIGN:
718
+ return (const char *)"$\\div$";
719
+ case LATIN_SMALL_LETTER_O_WITH_STROKE:
720
+ return (const char *)"\\o";
721
+ case LATIN_SMALL_LETTER_S_WITH_CARON:
722
+ return (const char *)"\\v{s}";
723
+ case LATIN_SMALL_LETTER_U_WITH_GRAVE:
724
+ return (const char *)"\\`u";
725
+ case LATIN_SMALL_LETTER_U_WITH_ACUTE:
726
+ return (const char *)"\\'u";
727
+ case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
728
+ return (const char *)"\\^u";
729
+ case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
730
+ return (const char *)"\\\"u";
731
+ case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
732
+ return (const char *)"\\'y";
733
+ case LATIN_SMALL_LETTER_THORN:
734
+ return (const char *)UNDEFINED;
735
+ case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
736
+ return (const char *)"\\\"y";
737
+ case LATIN_SMALL_LETTER_Z_WITH_CARON:
738
+ return (const char *)"\\v{z}";
739
+
740
+ /* greek */
741
+ /* some (punctuation, accents, accented capital) greek letters missing*/
742
+ case GREEK_CAPITAL_LETTER_ALPHA:
743
+ return (const char *)"A";
744
+ case GREEK_CAPITAL_LETTER_BETA:
745
+ return (const char *)"B";
746
+ case GREEK_CAPITAL_LETTER_GAMMA:
747
+ return (const char *)"\\( \\Gamma \\)";
748
+ case GREEK_CAPITAL_LETTER_DELTA:
749
+ return (const char *)"\\( \\Delta \\)";
750
+ case GREEK_CAPITAL_LETTER_EPSILON:
751
+ return (const char *)"E";
752
+ case GREEK_CAPITAL_LETTER_ZETA:
753
+ return (const char *)"Z";
754
+ case GREEK_CAPITAL_LETTER_ETA:
755
+ return (const char *)"H";
756
+ case GREEK_CAPITAL_LETTER_THETA:
757
+ return (const char *)"\\( \\Theta \\)";
758
+ case GREEK_CAPITAL_LETTER_IOTA:
759
+ return (const char *)"I";
760
+ case GREEK_CAPITAL_LETTER_KAPPA:
761
+ return (const char *)"K";
762
+ case GREEK_CAPITAL_LETTER_LAMDA:
763
+ return (const char *)"\\( \\Lambda \\)";
764
+ case GREEK_CAPITAL_LETTER_MU:
765
+ return (const char *)"M";
766
+ case GREEK_CAPITAL_LETTER_NU:
767
+ return (const char *)"N";
768
+ case GREEK_CAPITAL_LETTER_XI:
769
+ return (const char *)"\\( \\Xi \\)";
770
+ case GREEK_CAPITAL_LETTER_OMICRON:
771
+ return (const char *)"O";
772
+ case GREEK_CAPITAL_LETTER_PI:
773
+ return (const char *)"\\( \\Pi \\)";
774
+ case GREEK_CAPITAL_LETTER_RHO:
775
+ return (const char *)"P";
776
+ case GREEK_CAPITAL_LETTER_SIGMA:
777
+ return (const char *)"\\( \\Sigma \\)";
778
+ case GREEK_CAPITAL_LETTER_TAU:
779
+ return (const char *)"T";
780
+ case GREEK_CAPITAL_LETTER_UPSILON:
781
+ return (const char *)"\\( \\Upsilon \\)";
782
+ case GREEK_CAPITAL_LETTER_PHI:
783
+ return (const char *)"\\( \\Phi \\)";
784
+ case GREEK_CAPITAL_LETTER_CHI:
785
+ return (const char *)"\\( \\Chi \\)";
786
+ case GREEK_CAPITAL_LETTER_PSI:
787
+ return (const char *)"\\( \\Psi \\)";
788
+ case GREEK_CAPITAL_LETTER_OMEGA:
789
+ return (const char *)"\\( \\Omega \\)";
790
+ case GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA:
791
+ return (const char *)UNDEFINED;
792
+ case GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA:
793
+ return (const char *)UNDEFINED;
794
+ case GREEK_SMALL_LETTER_ALPHA_WITH_TONOS:
795
+ return (const char *)UNDEFINED;
796
+ case GREEK_SMALL_LETTER_EPSILON_WITH_TONOS:
797
+ return (const char *)UNDEFINED;
798
+ case GREEK_SMALL_LETTER_ETA_WITH_TONOS:
799
+ return (const char *)UNDEFINED;
800
+ case GREEK_SMALL_LETTER_IOTA_WITH_TONOS:
801
+ return (const char *)UNDEFINED;
802
+ case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS:
803
+ return (const char *)UNDEFINED;
804
+ case GREEK_SMALL_LETTER_ALPHA:
805
+ return (const char *)"\\( \\alpha \\)";
806
+ case GREEK_SMALL_LETTER_BETA:
807
+ return (const char *)"\\( \\beta \\)";
808
+ case GREEK_SMALL_LETTER_GAMMA:
809
+ return (const char *)"\\( \\gamma \\)";
810
+ case GREEK_SMALL_LETTER_DELTA:
811
+ return (const char *)"\\( \\delta \\)";
812
+ case GREEK_SMALL_LETTER_EPSILON:
813
+ return (const char *)"\\( \\epsilon \\)";
814
+ case GREEK_SMALL_LETTER_ZETA:
815
+ return (const char *)"\\( \\zeta \\)";
816
+ case GREEK_SMALL_LETTER_ETA:
817
+ return (const char *)"\\( \\eta \\)";
818
+ case GREEK_SMALL_LETTER_THETA:
819
+ return (const char *)"\\( \\theta \\)";
820
+ case GREEK_SMALL_LETTER_IOTA:
821
+ return (const char *)"\\( \\iota \\)";
822
+ case GREEK_SMALL_LETTER_KAPPA:
823
+ return (const char *)"\\( \\kappa \\)";
824
+ case GREEK_SMALL_LETTER_LAMDA:
825
+ return (const char *)"\\( \\lambda \\)";
826
+ case GREEK_SMALL_LETTER_MU:
827
+ return (const char *)"\\( \\mu \\)";
828
+ case GREEK_SMALL_LETTER_NU:
829
+ return (const char *)"\\( \\nu \\)";
830
+ case GREEK_SMALL_LETTER_XI:
831
+ return (const char *)"\\( \\xi \\)";
832
+ case GREEK_SMALL_LETTER_OMICRON:
833
+ return (const char *)"\\( \\omicron \\)";
834
+ case GREEK_SMALL_LETTER_PI:
835
+ return (const char *)"\\( \\pi \\)";
836
+ case GREEK_SMALL_LETTER_RHO:
837
+ return (const char *)"\\( \\rho \\)";
838
+ case GREEK_SMALL_LETTER_FINAL_SIGMA:
839
+ return (const char *)"\\( \\varsigma \\)";
840
+ case GREEK_SMALL_LETTER_SIGMA:
841
+ return (const char *)"\\( \\sigma \\)";
842
+ case GREEK_SMALL_LETTER_TAU:
843
+ return (const char *)"\\( \\tau \\)";
844
+ case GREEK_SMALL_LETTER_UPSILON:
845
+ return (const char *)"\\( \\upsilon \\)";
846
+ case GREEK_SMALL_LETTER_PHI:
847
+ return (const char *)"\\( \\varphi \\)";
848
+ case GREEK_SMALL_LETTER_CHI:
849
+ return (const char *)"\\( \\chi \\)";
850
+ case GREEK_SMALL_LETTER_PSI:
851
+ return (const char *)"\\( \\psi \\)";
852
+ case GREEK_SMALL_LETTER_OMEGA:
853
+ return (const char *)"\\( \\omega \\)";
854
+ case GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA:
855
+ return (const char *)UNDEFINED;
856
+ case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA:
857
+ return (const char *)UNDEFINED;
858
+ case GREEK_SMALL_LETTER_OMICRON_WITH_TONOS:
859
+ return (const char *)UNDEFINED;
860
+ case GREEK_SMALL_LETTER_UPSILON_WITH_TONOS:
861
+ return (const char *)UNDEFINED;
862
+ case GREEK_SMALL_LETTER_OMEGA_WITH_TONOS:
863
+ return (const char *)UNDEFINED;
864
+ case GREEK_BETA_SYMBOL:
865
+ return (const char *)UNDEFINED;
866
+ case GREEK_THETA_SYMBOL:
867
+ return (const char *)"\\( \\vartheta \\)";
868
+ case GREEK_UPSILON_WITH_HOOK_SYMBOL:
869
+ return (const char *)UNDEFINED;
870
+ case GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL:
871
+ return (const char *)UNDEFINED;
872
+ case GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL:
873
+ return (const char *)UNDEFINED;
874
+ case GREEK_PHI_SYMBOL:
875
+ return (const char *)"\\( \\phi \\)";
876
+ case GREEK_PI_SYMBOL:
877
+ return (const char *)"\\( \\varpi \\)";
878
+ /* and some greek letters missing*/
879
+
880
+ /* punctuation (partial) */
881
+ case HYPHEN:
882
+ return (const char *)"-";
883
+ case NON_BREAKING_HYPHEN:
884
+ return (const char *)UNDEFINED;
885
+ case FIGURE_DASH:
886
+ case EN_DASH:
887
+ return (const char *)"--";
888
+ case EM_DASH:
889
+ return (const char *)"---";
890
+ case HORIZONTAL_BAR:
891
+ return (const char *)UNDEFINED;
892
+ case LEFT_SINGLE_QUOTATION_MARK:
893
+ return (const char *)"`";
894
+ case RIGHT_SINGLE_QUOTATION_MARK:
895
+ return (const char *)"'";
896
+ case SINGLE_LOW_9_QUOTATION_MARK:
897
+ return (const char *)"\\glq{}";
898
+ case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
899
+ return (const char *)UNDEFINED;
900
+ case LEFT_DOUBLE_QUOTATION_MARK:
901
+ return (const char *)"``";
902
+ case RIGHT_DOUBLE_QUOTATION_MARK:
903
+ return (const char *)"''";
904
+ case DOUBLE_LOW_9_QUOTATION_MARK:
905
+ return (const char *)"\\glqq{}";
906
+ case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
907
+ return (const char *)UNDEFINED;
908
+ case DAGGER:
909
+ return (const char *)"\\dag";
910
+ case DOUBLE_DAGGER:
911
+ return (const char *)"\\ddag";
912
+ case BULLET:
913
+ return (const char *)"$\\bullet$";
914
+ case TRIANGULAR_BULLET:
915
+ return (const char *)"$\\blacktriangleright";
916
+ case HYPHENATION_POINT:
917
+ return (const char *)"\\-";
918
+ case HORIZONTAL_ELLIPSIS:
919
+ return (const char *)"\\ldots";
920
+ case PER_MILLE_SIGN:
921
+ return (const char *)UNDEFINED;
922
+ case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
923
+ return (const char *)"\\flq{}";
924
+ case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
925
+ return (const char *)"\\frq{}";
926
+ /* ligatures */
927
+ case LATIN_SMALL_LIGATURE_FF:
928
+ return (const char *)"ff";
929
+ case LATIN_SMALL_LIGATURE_FI:
930
+ return (const char *)"fi";
931
+ case LATIN_SMALL_LIGATURE_FL:
932
+ return (const char *)"fl";
933
+ case LATIN_SMALL_LIGATURE_FFI:
934
+ return (const char *)"ffi";
935
+ case LATIN_SMALL_LIGATURE_FFL:
936
+ return (const char *)"ffl";
937
+ case LATIN_SMALL_LIGATURE_LONG_S_T:
938
+ case LATIN_SMALL_LIGATURE_ST:
939
+ return (const char *)"st";
940
+ /* math */
941
+ case UNICODE_INFINITY:
942
+ return (const char *)"\\infty";
943
+ /* reserved */
944
+ case 0:
945
+ return (const char *)"";
946
+ case UNKNOWN:
947
+ return (const char *)"\\_";
948
+ case PICTURE:
949
+ return (const char *)"(PICTURE)";
950
+ default:
951
+ /* snprintf seems to be no standard, so I use insecure sprintf */
952
+ sprintf(buf,"\\symbol{%u}",(unsigned)c);
953
+ return buf; /* UNDEFINED; */
954
+ }
955
+ case HTML:
956
+ if ( c >= SPACE && c <= TILDE ) { /* ASCII */
957
+ switch (c) {
958
+ case '&':
959
+ return (const char *)"&amp;";
960
+ /* semicolon must not be coded */
961
+ case '\'':
962
+ return (const char *)"&apos;";
963
+ case '"':
964
+ return (const char *)"&quot;";
965
+ case '<':
966
+ return (const char *)"&lt;";
967
+ case '>':
968
+ return (const char *)"&gt;";
969
+ }
970
+ buf[0] = (char)c;
971
+ return buf;
972
+ }
973
+ switch (c) {
974
+ case PICTURE:
975
+ return (const char *)"<!--PICTURE-->";
976
+ case UNKNOWN:
977
+ return (const char *)"_"; /* better use colored symbol? */
978
+ case LINE_FEED:
979
+ return (const char *)"<br />"; /* \n handled somwhere else? */
980
+ case FORM_FEED:
981
+ case CARRIAGE_RETURN:
982
+ return (const char *)"<br />";
983
+ case NO_BREAK_SPACE:
984
+ return (const char *)"<nobr />";
985
+ case INVERTED_EXCLAMATION_MARK:
986
+ return (const char *)"&iexcl;";
987
+ case CENT_SIGN:
988
+ return (const char *)"&cent;";
989
+ case POUND_SIGN:
990
+ return (const char *)"&pound;";
991
+ case CURRENCY_SIGN:
992
+ return (const char *)"&curren;";
993
+ case YEN_SIGN:
994
+ return (const char *)"&yen;";
995
+ case BROKEN_BAR:
996
+ return (const char *)"&brvbar;";
997
+ case SECTION_SIGN:
998
+ return (const char *)"&sect;";
999
+ case DIAERESIS:
1000
+ return (const char *)"&uml;";
1001
+ case COPYRIGHT_SIGN:
1002
+ return (const char *)"&copy;";
1003
+ case FEMININE_ORDINAL_INDICATOR:
1004
+ return (const char *)"&ordfem;";
1005
+ case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1006
+ return (const char *)"&laquo;";
1007
+ case NOT_SIGN:
1008
+ return (const char *)"&not;";
1009
+ case SOFT_HYPHEN:
1010
+ return (const char *)"&shy;";
1011
+ case REGISTERED_SIGN:
1012
+ return (const char *)"&reg;";
1013
+ case MACRON:
1014
+ return (const char *)"&macr;";
1015
+ case DEGREE_SIGN:
1016
+ return (const char *)"&deg;";
1017
+ case PLUS_MINUS_SIGN:
1018
+ return (const char *)"&plusmn;";
1019
+ case SUPERSCRIPT_TWO:
1020
+ return (const char *)"&sup2;";
1021
+ case SUPERSCRIPT_THREE:
1022
+ return (const char *)"&sup3;";
1023
+ case ACUTE_ACCENT:
1024
+ return (const char *)"&acute;";
1025
+ case MICRO_SIGN:
1026
+ return (const char *)"&micro;";
1027
+ case PILCROW_SIGN:
1028
+ return (const char *)"&para;";
1029
+ case MIDDLE_DOT:
1030
+ return (const char *)"&middot;";
1031
+ case CEDILLA:
1032
+ return (const char *)"&cedil;";
1033
+ case SUPERSCRIPT_ONE:
1034
+ return (const char *)"&sup1;";
1035
+ case MASCULINE_ORDINAL_INDICATOR:
1036
+ return (const char *)"&ordm;";
1037
+ case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1038
+ return (const char *)"&raquo;";
1039
+ case VULGAR_FRACTION_ONE_QUARTER:
1040
+ return (const char *)"&frac14;";
1041
+ case VULGAR_FRACTION_ONE_HALF:
1042
+ return (const char *)"&frac12;";
1043
+ case VULGAR_FRACTION_THREE_QUARTERS:
1044
+ return (const char *)"&frac34;";
1045
+ case INVERTED_QUESTION_MARK:
1046
+ return (const char *)"&iquest;";
1047
+ case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
1048
+ return (const char *)"&Agrave;";
1049
+ case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
1050
+ return (const char *)"&Aacute;";
1051
+ case LATIN_CAPITAL_LETTER_A_WITH_BREVE:
1052
+ return (const char *)"&Abreve;";
1053
+ case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
1054
+ return (const char *)"&Acirc;";
1055
+ case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
1056
+ return (const char *)"&Atilde;";
1057
+ case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
1058
+ return (const char *)"&Auml;";
1059
+ case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
1060
+ return (const char *)"&Aring;";
1061
+ case LATIN_CAPITAL_LETTER_AE:
1062
+ return (const char *)"&AElig;";
1063
+ case LATIN_CAPITAL_LETTER_C_WITH_CARON:
1064
+ return (const char *)"&Ccaron;";
1065
+ case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
1066
+ return (const char *)"&Ccedil;";
1067
+ case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
1068
+ return (const char *)"&Egrave;";
1069
+ case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
1070
+ return (const char *)"&Eacute;";
1071
+ case LATIN_CAPITAL_LETTER_E_WITH_CARON:
1072
+ return (const char *)"&Ecaron;";
1073
+ case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
1074
+ return (const char *)"&Ecirc;";
1075
+ case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
1076
+ return (const char *)"&Euml;";
1077
+ case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
1078
+ return (const char *)"&Igrave;";
1079
+ case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
1080
+ return (const char *)"&Iacute;";
1081
+ case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
1082
+ return (const char *)"&Icirc;";
1083
+ case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
1084
+ return (const char *)"&Iuml;";
1085
+ case LATIN_CAPITAL_LETTER_ETH:
1086
+ return (const char *)"&ETH;";
1087
+ case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
1088
+ return (const char *)"&Ntilde;";
1089
+ case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
1090
+ return (const char *)"&Ograve;";
1091
+ case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
1092
+ return (const char *)"&Oacute;";
1093
+ case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
1094
+ return (const char *)"&Ocirc;";
1095
+ case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
1096
+ return (const char *)"&Otilde;";
1097
+ case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
1098
+ return (const char *)"&Ouml;";
1099
+ case MULTIPLICATION_SIGN:
1100
+ return (const char *)"&times";
1101
+ case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
1102
+ return (const char *)"&Oslash;";
1103
+ case LATIN_CAPITAL_LETTER_S_WITH_CARON:
1104
+ return (const char *)"&Scaron;";
1105
+ case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
1106
+ return (const char *)"&Ugrave;";
1107
+ case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
1108
+ return (const char *)"&Uacute;";
1109
+ case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
1110
+ return (const char *)"&Ucirc;";
1111
+ case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
1112
+ return (const char *)"&Uuml;";
1113
+ case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
1114
+ return (const char *)"&Yacute;";
1115
+ case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
1116
+ return (const char *)"&Zcaron;";
1117
+ case LATIN_CAPITAL_LETTER_THORN:
1118
+ return (const char *)"&THORN;";
1119
+ case LATIN_SMALL_LETTER_SHARP_S:
1120
+ return (const char *)"&szlig;";
1121
+ case LATIN_SMALL_LETTER_A_WITH_GRAVE:
1122
+ return (const char *)"&agrave;";
1123
+ case LATIN_SMALL_LETTER_A_WITH_ACUTE:
1124
+ return (const char *)"&aacute;";
1125
+ case LATIN_SMALL_LETTER_A_WITH_BREVE:
1126
+ return (const char *)"&abreve;";
1127
+ case LATIN_SMALL_LETTER_A_WITH_CARON:
1128
+ return (const char *)"&acaron;";
1129
+ case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
1130
+ return (const char *)"&acirc;";
1131
+ case LATIN_SMALL_LETTER_A_WITH_TILDE:
1132
+ return (const char *)"&atilde;";
1133
+ case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
1134
+ return (const char *)"&auml;";
1135
+ case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
1136
+ return (const char *)"&aring;";
1137
+ case LATIN_SMALL_LETTER_AE:
1138
+ return (const char *)"&aelig;";
1139
+ case LATIN_SMALL_LETTER_C_WITH_CARON:
1140
+ return (const char *)"&ccaron;";
1141
+ case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
1142
+ return (const char *)"&ccedil;";
1143
+ case LATIN_SMALL_LETTER_E_WITH_GRAVE:
1144
+ return (const char *)"&egrave;";
1145
+ case LATIN_SMALL_LETTER_E_WITH_ACUTE:
1146
+ return (const char *)"&eacute;";
1147
+ case LATIN_SMALL_LETTER_E_WITH_CARON:
1148
+ return (const char *)"&ecaron;";
1149
+ case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
1150
+ return (const char *)"&ecirc;";
1151
+ case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
1152
+ return (const char *)"&euml;";
1153
+ case LATIN_SMALL_LETTER_I_WITH_GRAVE:
1154
+ return (const char *)"&igrave;";
1155
+ case LATIN_SMALL_LETTER_I_WITH_ACUTE:
1156
+ return (const char *)"&iacute;";
1157
+ case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
1158
+ return (const char *)"&icirc;";
1159
+ case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
1160
+ return (const char *)"&iuml;";
1161
+ case LATIN_SMALL_LETTER_ETH:
1162
+ return (const char *)"&eth;";
1163
+ case LATIN_SMALL_LETTER_N_WITH_TILDE:
1164
+ return (const char *)"&ntilde;";
1165
+ case LATIN_SMALL_LETTER_O_WITH_GRAVE:
1166
+ return (const char *)"&ograve;";
1167
+ case LATIN_SMALL_LETTER_O_WITH_ACUTE:
1168
+ return (const char *)"&oacute;";
1169
+ case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
1170
+ return (const char *)"&ocirc;";
1171
+ case LATIN_SMALL_LETTER_O_WITH_TILDE:
1172
+ return (const char *)"&otilde;";
1173
+ case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
1174
+ return (const char *)"&ouml;";
1175
+ case DIVISION_SIGN:
1176
+ return (const char *)"&divide;";
1177
+ case LATIN_SMALL_LETTER_O_WITH_STROKE:
1178
+ return (const char *)"&oslash;";
1179
+ case LATIN_SMALL_LETTER_S_WITH_CARON:
1180
+ return (const char *)"&scaron;";
1181
+ case LATIN_SMALL_LETTER_U_WITH_GRAVE:
1182
+ return (const char *)"&ugrave;";
1183
+ case LATIN_SMALL_LETTER_U_WITH_ACUTE:
1184
+ return (const char *)"&uacute;";
1185
+ case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
1186
+ return (const char *)"&ucirc;";
1187
+ case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
1188
+ return (const char *)"&uuml;";
1189
+ case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
1190
+ return (const char *)"&yacute;";
1191
+ case LATIN_SMALL_LETTER_THORN:
1192
+ return (const char *)"&thorn;";
1193
+ case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
1194
+ return (const char *)"&yuml;";
1195
+ case LATIN_SMALL_LETTER_Z_WITH_CARON:
1196
+ return (const char *)"&zcaron;";
1197
+ case EURO_CURRENCY_SIGN:
1198
+ return (const char *)"&euro;";
1199
+ case 0:
1200
+ return (const char *)"";
1201
+ default:
1202
+ sprintf(buf,"&#%u;",(unsigned)c);
1203
+ return buf; /* undefined */
1204
+ }
1205
+ /* break; unreachable code */
1206
+ case XML: /* only 5 &xxx;-ENTITIES ar defined by default */
1207
+ if ( c >= SPACE && c <= TILDE ) { /* ASCII */
1208
+ switch (c) {
1209
+ case '&':
1210
+ return (const char *)"&amp;";
1211
+ case '\'':
1212
+ return (const char *)"&apos;";
1213
+ case '"':
1214
+ return (const char *)"&quot;";
1215
+ case '<':
1216
+ return (const char *)"&lt;";
1217
+ case '>':
1218
+ return (const char *)"&gt;";
1219
+ }
1220
+ buf[0] = (char)c;
1221
+ return buf;
1222
+ }
1223
+ switch (c) { /* subject of change! */
1224
+ case PICTURE:
1225
+ return (const char *)"(PICTURE)";
1226
+ case UNKNOWN:
1227
+ return (const char *)"_"; /* better use colored symbol? */
1228
+ case LINE_FEED: /* \n handled somwhere else? */
1229
+ case FORM_FEED:
1230
+ case CARRIAGE_RETURN:
1231
+ return (const char *)"<br />";
1232
+ case NO_BREAK_SPACE:
1233
+ return (const char *)"<nobr />";
1234
+ case 0:
1235
+ return (const char *)"";
1236
+ default:
1237
+ sprintf(buf,"&#x%03x;",(unsigned)c);
1238
+ return buf; /* undefined */
1239
+ }
1240
+ /* break; unreachable code */
1241
+ case SGML:
1242
+ switch (c) {
1243
+ default:
1244
+ sprintf(buf,"&#%u;",(unsigned)c);
1245
+ return buf; /* UNDEFINED */
1246
+ }
1247
+ /* break; unreachable code */
1248
+ case ASCII: /* mainly used for debugging */
1249
+ if ( c=='\n' || (c>= 0x20 && c <= 0x7F) ) {
1250
+ buf[0] = (char)c;
1251
+ return buf;
1252
+ }
1253
+ switch (c) {
1254
+ /* extra */
1255
+ case UNKNOWN:
1256
+ return (const char *)"(?)";
1257
+ case PICTURE:
1258
+ return (const char *)"(?)";
1259
+
1260
+ default:
1261
+ /* snprintf seems to be no standard, so I use insecure sprintf */
1262
+ if ((unsigned)c>255) sprintf(buf,"(0x%04x)",(unsigned)c);
1263
+ else sprintf(buf,"(0x%02x)",(unsigned)c);
1264
+ return buf; /* UNDEFINED; */
1265
+ }
1266
+ /* break; unreachable code */
1267
+ default: /* use UTF8 as default, test with xterm -u8 */
1268
+ /* extra */
1269
+ if ( c == UNKNOWN ) return (const char *)"_";
1270
+ if ( c == PICTURE ) return (const char *)"_"; /* Due to Mobile OCR */
1271
+ if ( c <= (wchar_t)0x0000007F ) { /* UTF8 == 7bit ASCII */
1272
+ buf[0] = (char)c;
1273
+ return buf;
1274
+ }
1275
+ if ( c <= (wchar_t)0x000007FF ) { /* UTF8 == 11bit */
1276
+ buf[0] = (char)(0xc0|((c>> 6) & 0x1f)); /* 110xxxxx */
1277
+ buf[1] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1278
+ buf[2] = (char)0; /* terminate string */
1279
+ return buf;
1280
+ }
1281
+ /* wchar_t is 16bit for Borland-C !? Jan07 */
1282
+ if ( c <= (wchar_t)0x0000FFFF ) { /* UTF8 == 16bit */
1283
+ buf[0] = (char)(0xe0|((c>>12) & 0x0f)); /* 1110xxxx */
1284
+ buf[1] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1285
+ buf[2] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1286
+ buf[3] = (char)0; /* terminate string */
1287
+ return buf;
1288
+ }
1289
+ if ( c <= (wchar_t)0x001FFFFF ) { /* UTF8 == 21bit */
1290
+ buf[0] = (char)(0xf0|((c>>18) & 0x07)); /* 11110xxx */
1291
+ buf[1] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1292
+ buf[2] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1293
+ buf[3] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1294
+ buf[4] = (char)0; /* terminate string */
1295
+ return buf;
1296
+ }
1297
+ if ( c <= (wchar_t)0x03FFFFFF ) { /* UTF8 == 26bit */
1298
+ buf[0] = (char)(0xf8|((c>>24) & 0x03)); /* 111110xx */
1299
+ buf[1] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1300
+ buf[2] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1301
+ buf[3] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1302
+ buf[4] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1303
+ buf[5] = (char)0; /* terminate string */
1304
+ return buf;
1305
+ }
1306
+ if ( c <= (wchar_t)0x7FFFFFFF ) { /* UTF8 == 31bit */
1307
+ buf[0] = (char)(0xfc|((c>>30) & 0x01)); /* 1111110x */
1308
+ buf[1] = (char)(0x80|((c>>24) & 0x3f)); /* 10xxxxxx */
1309
+ buf[2] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1310
+ buf[3] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1311
+ buf[4] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1312
+ buf[5] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1313
+ buf[6] = (char)0; /* terminate string */
1314
+ return buf;
1315
+ }
1316
+ return (const char *)UNDEFINED;
1317
+ }
1318
+ }