@helm2/poc_jenki_rce 0.0.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of @helm2/poc_jenki_rce might be problematic. Click here for more details.

Files changed (160) hide show
  1. package/LICENSE +19 -0
  2. package/Makefile +18 -0
  3. package/README.md +52 -0
  4. package/binding.gyp +81 -0
  5. package/index.d.ts +273 -0
  6. package/index.js +45 -0
  7. package/lib/bindings.js +1 -0
  8. package/lib/document.js +118 -0
  9. package/lib/element.js +82 -0
  10. package/lib/sax_parser.js +38 -0
  11. package/package.json +70 -0
  12. package/src/html_document.cc +7 -0
  13. package/src/html_document.h +18 -0
  14. package/src/libxmljs.cc +252 -0
  15. package/src/libxmljs.h +53 -0
  16. package/src/xml_attribute.cc +173 -0
  17. package/src/xml_attribute.h +40 -0
  18. package/src/xml_comment.cc +117 -0
  19. package/src/xml_comment.h +30 -0
  20. package/src/xml_document.cc +810 -0
  21. package/src/xml_document.h +67 -0
  22. package/src/xml_element.cc +565 -0
  23. package/src/xml_element.h +61 -0
  24. package/src/xml_namespace.cc +158 -0
  25. package/src/xml_namespace.h +39 -0
  26. package/src/xml_node.cc +761 -0
  27. package/src/xml_node.h +73 -0
  28. package/src/xml_pi.cc +161 -0
  29. package/src/xml_pi.h +34 -0
  30. package/src/xml_sax_parser.cc +424 -0
  31. package/src/xml_sax_parser.h +73 -0
  32. package/src/xml_syntax_error.cc +66 -0
  33. package/src/xml_syntax_error.h +25 -0
  34. package/src/xml_text.cc +320 -0
  35. package/src/xml_text.h +48 -0
  36. package/src/xml_textwriter.cc +315 -0
  37. package/src/xml_textwriter.h +62 -0
  38. package/src/xml_xpath_context.cc +70 -0
  39. package/src/xml_xpath_context.h +23 -0
  40. package/vendor/libxml/Copyright +23 -0
  41. package/vendor/libxml/DOCBparser.c +305 -0
  42. package/vendor/libxml/HTMLparser.c +7287 -0
  43. package/vendor/libxml/HTMLtree.c +1200 -0
  44. package/vendor/libxml/Makefile +2983 -0
  45. package/vendor/libxml/SAX.c +180 -0
  46. package/vendor/libxml/SAX2.c +3036 -0
  47. package/vendor/libxml/buf.c +1351 -0
  48. package/vendor/libxml/buf.h +72 -0
  49. package/vendor/libxml/c14n.c +2234 -0
  50. package/vendor/libxml/catalog.c +3828 -0
  51. package/vendor/libxml/chvalid.c +336 -0
  52. package/vendor/libxml/config.h +294 -0
  53. package/vendor/libxml/config.h.gch +0 -0
  54. package/vendor/libxml/debugXML.c +3423 -0
  55. package/vendor/libxml/dict.c +1298 -0
  56. package/vendor/libxml/elfgcchack.h +17818 -0
  57. package/vendor/libxml/enc.h +32 -0
  58. package/vendor/libxml/encoding.c +3975 -0
  59. package/vendor/libxml/entities.c +1163 -0
  60. package/vendor/libxml/error.c +998 -0
  61. package/vendor/libxml/globals.c +1126 -0
  62. package/vendor/libxml/hash.c +1146 -0
  63. package/vendor/libxml/include/libxml/DOCBparser.h +96 -0
  64. package/vendor/libxml/include/libxml/HTMLparser.h +306 -0
  65. package/vendor/libxml/include/libxml/HTMLtree.h +147 -0
  66. package/vendor/libxml/include/libxml/Makefile +725 -0
  67. package/vendor/libxml/include/libxml/Makefile.am +54 -0
  68. package/vendor/libxml/include/libxml/Makefile.in +725 -0
  69. package/vendor/libxml/include/libxml/SAX.h +173 -0
  70. package/vendor/libxml/include/libxml/SAX2.h +178 -0
  71. package/vendor/libxml/include/libxml/c14n.h +128 -0
  72. package/vendor/libxml/include/libxml/catalog.h +182 -0
  73. package/vendor/libxml/include/libxml/chvalid.h +230 -0
  74. package/vendor/libxml/include/libxml/debugXML.h +217 -0
  75. package/vendor/libxml/include/libxml/dict.h +79 -0
  76. package/vendor/libxml/include/libxml/encoding.h +245 -0
  77. package/vendor/libxml/include/libxml/entities.h +151 -0
  78. package/vendor/libxml/include/libxml/globals.h +508 -0
  79. package/vendor/libxml/include/libxml/hash.h +236 -0
  80. package/vendor/libxml/include/libxml/list.h +137 -0
  81. package/vendor/libxml/include/libxml/nanoftp.h +163 -0
  82. package/vendor/libxml/include/libxml/nanohttp.h +81 -0
  83. package/vendor/libxml/include/libxml/parser.h +1243 -0
  84. package/vendor/libxml/include/libxml/parserInternals.h +644 -0
  85. package/vendor/libxml/include/libxml/pattern.h +100 -0
  86. package/vendor/libxml/include/libxml/relaxng.h +217 -0
  87. package/vendor/libxml/include/libxml/schemasInternals.h +958 -0
  88. package/vendor/libxml/include/libxml/schematron.h +142 -0
  89. package/vendor/libxml/include/libxml/threads.h +89 -0
  90. package/vendor/libxml/include/libxml/tree.h +1311 -0
  91. package/vendor/libxml/include/libxml/uri.h +94 -0
  92. package/vendor/libxml/include/libxml/valid.h +458 -0
  93. package/vendor/libxml/include/libxml/xinclude.h +129 -0
  94. package/vendor/libxml/include/libxml/xlink.h +189 -0
  95. package/vendor/libxml/include/libxml/xmlIO.h +368 -0
  96. package/vendor/libxml/include/libxml/xmlautomata.h +146 -0
  97. package/vendor/libxml/include/libxml/xmlerror.h +945 -0
  98. package/vendor/libxml/include/libxml/xmlexports.h +77 -0
  99. package/vendor/libxml/include/libxml/xmlmemory.h +224 -0
  100. package/vendor/libxml/include/libxml/xmlmodule.h +57 -0
  101. package/vendor/libxml/include/libxml/xmlreader.h +428 -0
  102. package/vendor/libxml/include/libxml/xmlregexp.h +222 -0
  103. package/vendor/libxml/include/libxml/xmlsave.h +88 -0
  104. package/vendor/libxml/include/libxml/xmlschemas.h +246 -0
  105. package/vendor/libxml/include/libxml/xmlschemastypes.h +151 -0
  106. package/vendor/libxml/include/libxml/xmlstring.h +140 -0
  107. package/vendor/libxml/include/libxml/xmlunicode.h +202 -0
  108. package/vendor/libxml/include/libxml/xmlversion.h +484 -0
  109. package/vendor/libxml/include/libxml/xmlwin32version.h +239 -0
  110. package/vendor/libxml/include/libxml/xmlwriter.h +488 -0
  111. package/vendor/libxml/include/libxml/xpath.h +564 -0
  112. package/vendor/libxml/include/libxml/xpathInternals.h +632 -0
  113. package/vendor/libxml/include/libxml/xpointer.h +114 -0
  114. package/vendor/libxml/include/win32config.h +122 -0
  115. package/vendor/libxml/include/wsockcompat.h +54 -0
  116. package/vendor/libxml/legacy.c +1343 -0
  117. package/vendor/libxml/libxml.h +134 -0
  118. package/vendor/libxml/list.c +779 -0
  119. package/vendor/libxml/nanoftp.c +2118 -0
  120. package/vendor/libxml/nanohttp.c +1899 -0
  121. package/vendor/libxml/parser.c +15553 -0
  122. package/vendor/libxml/parserInternals.c +2164 -0
  123. package/vendor/libxml/pattern.c +2621 -0
  124. package/vendor/libxml/relaxng.c +11101 -0
  125. package/vendor/libxml/rngparser.c +1595 -0
  126. package/vendor/libxml/runsuite.c +1157 -0
  127. package/vendor/libxml/save.h +36 -0
  128. package/vendor/libxml/schematron.c +1787 -0
  129. package/vendor/libxml/threads.c +1049 -0
  130. package/vendor/libxml/timsort.h +601 -0
  131. package/vendor/libxml/tree.c +10183 -0
  132. package/vendor/libxml/trio.c +6895 -0
  133. package/vendor/libxml/trio.h +230 -0
  134. package/vendor/libxml/triodef.h +228 -0
  135. package/vendor/libxml/trionan.c +914 -0
  136. package/vendor/libxml/trionan.h +84 -0
  137. package/vendor/libxml/triop.h +150 -0
  138. package/vendor/libxml/triostr.c +2112 -0
  139. package/vendor/libxml/triostr.h +144 -0
  140. package/vendor/libxml/uri.c +2561 -0
  141. package/vendor/libxml/valid.c +7138 -0
  142. package/vendor/libxml/xinclude.c +2657 -0
  143. package/vendor/libxml/xlink.c +183 -0
  144. package/vendor/libxml/xmlIO.c +4135 -0
  145. package/vendor/libxml/xmlcatalog.c +624 -0
  146. package/vendor/libxml/xmllint.c +3796 -0
  147. package/vendor/libxml/xmlmemory.c +1163 -0
  148. package/vendor/libxml/xmlmodule.c +468 -0
  149. package/vendor/libxml/xmlreader.c +6033 -0
  150. package/vendor/libxml/xmlregexp.c +8271 -0
  151. package/vendor/libxml/xmlsave.c +2735 -0
  152. package/vendor/libxml/xmlschemas.c +29173 -0
  153. package/vendor/libxml/xmlschemastypes.c +6276 -0
  154. package/vendor/libxml/xmlstring.c +1050 -0
  155. package/vendor/libxml/xmlunicode.c +3179 -0
  156. package/vendor/libxml/xmlwriter.c +4738 -0
  157. package/vendor/libxml/xpath.c +14734 -0
  158. package/vendor/libxml/xpointer.c +2969 -0
  159. package/vendor/libxml/xzlib.c +815 -0
  160. package/vendor/libxml/xzlib.h +19 -0
@@ -0,0 +1,3975 @@
1
+ /*
2
+ * encoding.c : implements the encoding conversion functions needed for XML
3
+ *
4
+ * Related specs:
5
+ * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
+ * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
+ * [ISO-10646] UTF-8 and UTF-16 in Annexes
8
+ * [ISO-8859-1] ISO Latin-1 characters codes.
9
+ * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10
+ * Worldwide Character Encoding -- Version 1.0", Addison-
11
+ * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12
+ * described in Unicode Technical Report #4.
13
+ * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14
+ * Information Interchange, ANSI X3.4-1986.
15
+ *
16
+ * See Copyright for the status of this software.
17
+ *
18
+ * daniel@veillard.com
19
+ *
20
+ * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
+ */
22
+
23
+ #define IN_LIBXML
24
+ #include "libxml.h"
25
+
26
+ #include <string.h>
27
+ #include <limits.h>
28
+
29
+ #ifdef HAVE_CTYPE_H
30
+ #include <ctype.h>
31
+ #endif
32
+ #ifdef HAVE_STDLIB_H
33
+ #include <stdlib.h>
34
+ #endif
35
+ #ifdef LIBXML_ICONV_ENABLED
36
+ #ifdef HAVE_ERRNO_H
37
+ #include <errno.h>
38
+ #endif
39
+ #endif
40
+ #include <libxml/encoding.h>
41
+ #include <libxml/xmlmemory.h>
42
+ #ifdef LIBXML_HTML_ENABLED
43
+ #include <libxml/HTMLparser.h>
44
+ #endif
45
+ #include <libxml/globals.h>
46
+ #include <libxml/xmlerror.h>
47
+
48
+ #include "buf.h"
49
+ #include "enc.h"
50
+
51
+ static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52
+ static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53
+
54
+ typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55
+ typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56
+ struct _xmlCharEncodingAlias {
57
+ const char *name;
58
+ const char *alias;
59
+ };
60
+
61
+ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62
+ static int xmlCharEncodingAliasesNb = 0;
63
+ static int xmlCharEncodingAliasesMax = 0;
64
+
65
+ #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66
+ #if 0
67
+ #define DEBUG_ENCODING /* Define this to get encoding traces */
68
+ #endif
69
+ #else
70
+ #ifdef LIBXML_ISO8859X_ENABLED
71
+ static void xmlRegisterCharEncodingHandlersISO8859x (void);
72
+ #endif
73
+ #endif
74
+
75
+ static int xmlLittleEndian = 1;
76
+
77
+ /**
78
+ * xmlEncodingErrMemory:
79
+ * @extra: extra information
80
+ *
81
+ * Handle an out of memory condition
82
+ */
83
+ static void
84
+ xmlEncodingErrMemory(const char *extra)
85
+ {
86
+ __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87
+ }
88
+
89
+ /**
90
+ * xmlErrEncoding:
91
+ * @error: the error number
92
+ * @msg: the error message
93
+ *
94
+ * n encoding error
95
+ */
96
+ static void LIBXML_ATTR_FORMAT(2,0)
97
+ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98
+ {
99
+ __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100
+ XML_FROM_I18N, error, XML_ERR_FATAL,
101
+ NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102
+ }
103
+
104
+ #ifdef LIBXML_ICU_ENABLED
105
+ static uconv_t*
106
+ openIcuConverter(const char* name, int toUnicode)
107
+ {
108
+ UErrorCode status = U_ZERO_ERROR;
109
+ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110
+ if (conv == NULL)
111
+ return NULL;
112
+
113
+ conv->pivot_source = conv->pivot_buf;
114
+ conv->pivot_target = conv->pivot_buf;
115
+
116
+ conv->uconv = ucnv_open(name, &status);
117
+ if (U_FAILURE(status))
118
+ goto error;
119
+
120
+ status = U_ZERO_ERROR;
121
+ if (toUnicode) {
122
+ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123
+ NULL, NULL, NULL, &status);
124
+ }
125
+ else {
126
+ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127
+ NULL, NULL, NULL, &status);
128
+ }
129
+ if (U_FAILURE(status))
130
+ goto error;
131
+
132
+ status = U_ZERO_ERROR;
133
+ conv->utf8 = ucnv_open("UTF-8", &status);
134
+ if (U_SUCCESS(status))
135
+ return conv;
136
+
137
+ error:
138
+ if (conv->uconv)
139
+ ucnv_close(conv->uconv);
140
+ xmlFree(conv);
141
+ return NULL;
142
+ }
143
+
144
+ static void
145
+ closeIcuConverter(uconv_t *conv)
146
+ {
147
+ if (conv != NULL) {
148
+ ucnv_close(conv->uconv);
149
+ ucnv_close(conv->utf8);
150
+ xmlFree(conv);
151
+ }
152
+ }
153
+ #endif /* LIBXML_ICU_ENABLED */
154
+
155
+ /************************************************************************
156
+ * *
157
+ * Conversions To/From UTF8 encoding *
158
+ * *
159
+ ************************************************************************/
160
+
161
+ /**
162
+ * asciiToUTF8:
163
+ * @out: a pointer to an array of bytes to store the result
164
+ * @outlen: the length of @out
165
+ * @in: a pointer to an array of ASCII chars
166
+ * @inlen: the length of @in
167
+ *
168
+ * Take a block of ASCII chars in and try to convert it to an UTF-8
169
+ * block of chars out.
170
+ * Returns 0 if success, or -1 otherwise
171
+ * The value of @inlen after return is the number of octets consumed
172
+ * if the return value is positive, else unpredictable.
173
+ * The value of @outlen after return is the number of octets produced.
174
+ */
175
+ static int
176
+ asciiToUTF8(unsigned char* out, int *outlen,
177
+ const unsigned char* in, int *inlen) {
178
+ unsigned char* outstart = out;
179
+ const unsigned char* base = in;
180
+ const unsigned char* processed = in;
181
+ unsigned char* outend = out + *outlen;
182
+ const unsigned char* inend;
183
+ unsigned int c;
184
+
185
+ inend = in + (*inlen);
186
+ while ((in < inend) && (out - outstart + 5 < *outlen)) {
187
+ c= *in++;
188
+
189
+ if (out >= outend)
190
+ break;
191
+ if (c < 0x80) {
192
+ *out++ = c;
193
+ } else {
194
+ *outlen = out - outstart;
195
+ *inlen = processed - base;
196
+ return(-1);
197
+ }
198
+
199
+ processed = (const unsigned char*) in;
200
+ }
201
+ *outlen = out - outstart;
202
+ *inlen = processed - base;
203
+ return(*outlen);
204
+ }
205
+
206
+ #ifdef LIBXML_OUTPUT_ENABLED
207
+ /**
208
+ * UTF8Toascii:
209
+ * @out: a pointer to an array of bytes to store the result
210
+ * @outlen: the length of @out
211
+ * @in: a pointer to an array of UTF-8 chars
212
+ * @inlen: the length of @in
213
+ *
214
+ * Take a block of UTF-8 chars in and try to convert it to an ASCII
215
+ * block of chars out.
216
+ *
217
+ * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218
+ * The value of @inlen after return is the number of octets consumed
219
+ * if the return value is positive, else unpredictable.
220
+ * The value of @outlen after return is the number of octets produced.
221
+ */
222
+ static int
223
+ UTF8Toascii(unsigned char* out, int *outlen,
224
+ const unsigned char* in, int *inlen) {
225
+ const unsigned char* processed = in;
226
+ const unsigned char* outend;
227
+ const unsigned char* outstart = out;
228
+ const unsigned char* instart = in;
229
+ const unsigned char* inend;
230
+ unsigned int c, d;
231
+ int trailing;
232
+
233
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
234
+ if (in == NULL) {
235
+ /*
236
+ * initialization nothing to do
237
+ */
238
+ *outlen = 0;
239
+ *inlen = 0;
240
+ return(0);
241
+ }
242
+ inend = in + (*inlen);
243
+ outend = out + (*outlen);
244
+ while (in < inend) {
245
+ d = *in++;
246
+ if (d < 0x80) { c= d; trailing= 0; }
247
+ else if (d < 0xC0) {
248
+ /* trailing byte in leading position */
249
+ *outlen = out - outstart;
250
+ *inlen = processed - instart;
251
+ return(-2);
252
+ } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
253
+ else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
254
+ else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
255
+ else {
256
+ /* no chance for this in Ascii */
257
+ *outlen = out - outstart;
258
+ *inlen = processed - instart;
259
+ return(-2);
260
+ }
261
+
262
+ if (inend - in < trailing) {
263
+ break;
264
+ }
265
+
266
+ for ( ; trailing; trailing--) {
267
+ if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
268
+ break;
269
+ c <<= 6;
270
+ c |= d & 0x3F;
271
+ }
272
+
273
+ /* assertion: c is a single UTF-4 value */
274
+ if (c < 0x80) {
275
+ if (out >= outend)
276
+ break;
277
+ *out++ = c;
278
+ } else {
279
+ /* no chance for this in Ascii */
280
+ *outlen = out - outstart;
281
+ *inlen = processed - instart;
282
+ return(-2);
283
+ }
284
+ processed = in;
285
+ }
286
+ *outlen = out - outstart;
287
+ *inlen = processed - instart;
288
+ return(*outlen);
289
+ }
290
+ #endif /* LIBXML_OUTPUT_ENABLED */
291
+
292
+ /**
293
+ * isolat1ToUTF8:
294
+ * @out: a pointer to an array of bytes to store the result
295
+ * @outlen: the length of @out
296
+ * @in: a pointer to an array of ISO Latin 1 chars
297
+ * @inlen: the length of @in
298
+ *
299
+ * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300
+ * block of chars out.
301
+ * Returns the number of bytes written if success, or -1 otherwise
302
+ * The value of @inlen after return is the number of octets consumed
303
+ * if the return value is positive, else unpredictable.
304
+ * The value of @outlen after return is the number of octets produced.
305
+ */
306
+ int
307
+ isolat1ToUTF8(unsigned char* out, int *outlen,
308
+ const unsigned char* in, int *inlen) {
309
+ unsigned char* outstart = out;
310
+ const unsigned char* base = in;
311
+ unsigned char* outend;
312
+ const unsigned char* inend;
313
+ const unsigned char* instop;
314
+
315
+ if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316
+ return(-1);
317
+
318
+ outend = out + *outlen;
319
+ inend = in + (*inlen);
320
+ instop = inend;
321
+
322
+ while ((in < inend) && (out < outend - 1)) {
323
+ if (*in >= 0x80) {
324
+ *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
325
+ *out++ = ((*in) & 0x3F) | 0x80;
326
+ ++in;
327
+ }
328
+ if ((instop - in) > (outend - out)) instop = in + (outend - out);
329
+ while ((in < instop) && (*in < 0x80)) {
330
+ *out++ = *in++;
331
+ }
332
+ }
333
+ if ((in < inend) && (out < outend) && (*in < 0x80)) {
334
+ *out++ = *in++;
335
+ }
336
+ *outlen = out - outstart;
337
+ *inlen = in - base;
338
+ return(*outlen);
339
+ }
340
+
341
+ /**
342
+ * UTF8ToUTF8:
343
+ * @out: a pointer to an array of bytes to store the result
344
+ * @outlen: the length of @out
345
+ * @inb: a pointer to an array of UTF-8 chars
346
+ * @inlenb: the length of @in in UTF-8 chars
347
+ *
348
+ * No op copy operation for UTF8 handling.
349
+ *
350
+ * Returns the number of bytes written, or -1 if lack of space.
351
+ * The value of *inlen after return is the number of octets consumed
352
+ * if the return value is positive, else unpredictable.
353
+ */
354
+ static int
355
+ UTF8ToUTF8(unsigned char* out, int *outlen,
356
+ const unsigned char* inb, int *inlenb)
357
+ {
358
+ int len;
359
+
360
+ if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
361
+ return(-1);
362
+ if (inb == NULL) {
363
+ /* inb == NULL means output is initialized. */
364
+ *outlen = 0;
365
+ *inlenb = 0;
366
+ return(0);
367
+ }
368
+ if (*outlen > *inlenb) {
369
+ len = *inlenb;
370
+ } else {
371
+ len = *outlen;
372
+ }
373
+ if (len < 0)
374
+ return(-1);
375
+
376
+ /*
377
+ * FIXME: Conversion functions must assure valid UTF-8, so we have
378
+ * to check for UTF-8 validity. Preferably, this converter shouldn't
379
+ * be used at all.
380
+ */
381
+ memcpy(out, inb, len);
382
+
383
+ *outlen = len;
384
+ *inlenb = len;
385
+ return(*outlen);
386
+ }
387
+
388
+
389
+ #ifdef LIBXML_OUTPUT_ENABLED
390
+ /**
391
+ * UTF8Toisolat1:
392
+ * @out: a pointer to an array of bytes to store the result
393
+ * @outlen: the length of @out
394
+ * @in: a pointer to an array of UTF-8 chars
395
+ * @inlen: the length of @in
396
+ *
397
+ * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
398
+ * block of chars out.
399
+ *
400
+ * Returns the number of bytes written if success, -2 if the transcoding fails,
401
+ or -1 otherwise
402
+ * The value of @inlen after return is the number of octets consumed
403
+ * if the return value is positive, else unpredictable.
404
+ * The value of @outlen after return is the number of octets produced.
405
+ */
406
+ int
407
+ UTF8Toisolat1(unsigned char* out, int *outlen,
408
+ const unsigned char* in, int *inlen) {
409
+ const unsigned char* processed = in;
410
+ const unsigned char* outend;
411
+ const unsigned char* outstart = out;
412
+ const unsigned char* instart = in;
413
+ const unsigned char* inend;
414
+ unsigned int c, d;
415
+ int trailing;
416
+
417
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
418
+ if (in == NULL) {
419
+ /*
420
+ * initialization nothing to do
421
+ */
422
+ *outlen = 0;
423
+ *inlen = 0;
424
+ return(0);
425
+ }
426
+ inend = in + (*inlen);
427
+ outend = out + (*outlen);
428
+ while (in < inend) {
429
+ d = *in++;
430
+ if (d < 0x80) { c= d; trailing= 0; }
431
+ else if (d < 0xC0) {
432
+ /* trailing byte in leading position */
433
+ *outlen = out - outstart;
434
+ *inlen = processed - instart;
435
+ return(-2);
436
+ } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
437
+ else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
438
+ else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
439
+ else {
440
+ /* no chance for this in IsoLat1 */
441
+ *outlen = out - outstart;
442
+ *inlen = processed - instart;
443
+ return(-2);
444
+ }
445
+
446
+ if (inend - in < trailing) {
447
+ break;
448
+ }
449
+
450
+ for ( ; trailing; trailing--) {
451
+ if (in >= inend)
452
+ break;
453
+ if (((d= *in++) & 0xC0) != 0x80) {
454
+ *outlen = out - outstart;
455
+ *inlen = processed - instart;
456
+ return(-2);
457
+ }
458
+ c <<= 6;
459
+ c |= d & 0x3F;
460
+ }
461
+
462
+ /* assertion: c is a single UTF-4 value */
463
+ if (c <= 0xFF) {
464
+ if (out >= outend)
465
+ break;
466
+ *out++ = c;
467
+ } else {
468
+ /* no chance for this in IsoLat1 */
469
+ *outlen = out - outstart;
470
+ *inlen = processed - instart;
471
+ return(-2);
472
+ }
473
+ processed = in;
474
+ }
475
+ *outlen = out - outstart;
476
+ *inlen = processed - instart;
477
+ return(*outlen);
478
+ }
479
+ #endif /* LIBXML_OUTPUT_ENABLED */
480
+
481
+ /**
482
+ * UTF16LEToUTF8:
483
+ * @out: a pointer to an array of bytes to store the result
484
+ * @outlen: the length of @out
485
+ * @inb: a pointer to an array of UTF-16LE passwd as a byte array
486
+ * @inlenb: the length of @in in UTF-16LE chars
487
+ *
488
+ * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
489
+ * block of chars out. This function assumes the endian property
490
+ * is the same between the native type of this machine and the
491
+ * inputed one.
492
+ *
493
+ * Returns the number of bytes written, or -1 if lack of space, or -2
494
+ * if the transcoding fails (if *in is not a valid utf16 string)
495
+ * The value of *inlen after return is the number of octets consumed
496
+ * if the return value is positive, else unpredictable.
497
+ */
498
+ static int
499
+ UTF16LEToUTF8(unsigned char* out, int *outlen,
500
+ const unsigned char* inb, int *inlenb)
501
+ {
502
+ unsigned char* outstart = out;
503
+ const unsigned char* processed = inb;
504
+ unsigned char* outend;
505
+ unsigned short* in = (unsigned short*) inb;
506
+ unsigned short* inend;
507
+ unsigned int c, d, inlen;
508
+ unsigned char *tmp;
509
+ int bits;
510
+
511
+ if (*outlen == 0) {
512
+ *inlenb = 0;
513
+ return(0);
514
+ }
515
+ outend = out + *outlen;
516
+ if ((*inlenb % 2) == 1)
517
+ (*inlenb)--;
518
+ inlen = *inlenb / 2;
519
+ inend = in + inlen;
520
+ while ((in < inend) && (out - outstart + 5 < *outlen)) {
521
+ if (xmlLittleEndian) {
522
+ c= *in++;
523
+ } else {
524
+ tmp = (unsigned char *) in;
525
+ c = *tmp++;
526
+ c = c | (((unsigned int)*tmp) << 8);
527
+ in++;
528
+ }
529
+ if ((c & 0xFC00) == 0xD800) { /* surrogates */
530
+ if (in >= inend) { /* (in > inend) shouldn't happens */
531
+ break;
532
+ }
533
+ if (xmlLittleEndian) {
534
+ d = *in++;
535
+ } else {
536
+ tmp = (unsigned char *) in;
537
+ d = *tmp++;
538
+ d = d | (((unsigned int)*tmp) << 8);
539
+ in++;
540
+ }
541
+ if ((d & 0xFC00) == 0xDC00) {
542
+ c &= 0x03FF;
543
+ c <<= 10;
544
+ c |= d & 0x03FF;
545
+ c += 0x10000;
546
+ }
547
+ else {
548
+ *outlen = out - outstart;
549
+ *inlenb = processed - inb;
550
+ return(-2);
551
+ }
552
+ }
553
+
554
+ /* assertion: c is a single UTF-4 value */
555
+ if (out >= outend)
556
+ break;
557
+ if (c < 0x80) { *out++= c; bits= -6; }
558
+ else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
559
+ else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
560
+ else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
561
+
562
+ for ( ; bits >= 0; bits-= 6) {
563
+ if (out >= outend)
564
+ break;
565
+ *out++= ((c >> bits) & 0x3F) | 0x80;
566
+ }
567
+ processed = (const unsigned char*) in;
568
+ }
569
+ *outlen = out - outstart;
570
+ *inlenb = processed - inb;
571
+ return(*outlen);
572
+ }
573
+
574
+ #ifdef LIBXML_OUTPUT_ENABLED
575
+ /**
576
+ * UTF8ToUTF16LE:
577
+ * @outb: a pointer to an array of bytes to store the result
578
+ * @outlen: the length of @outb
579
+ * @in: a pointer to an array of UTF-8 chars
580
+ * @inlen: the length of @in
581
+ *
582
+ * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
583
+ * block of chars out.
584
+ *
585
+ * Returns the number of bytes written, or -1 if lack of space, or -2
586
+ * if the transcoding failed.
587
+ */
588
+ static int
589
+ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
590
+ const unsigned char* in, int *inlen)
591
+ {
592
+ unsigned short* out = (unsigned short*) outb;
593
+ const unsigned char* processed = in;
594
+ const unsigned char *const instart = in;
595
+ unsigned short* outstart= out;
596
+ unsigned short* outend;
597
+ const unsigned char* inend;
598
+ unsigned int c, d;
599
+ int trailing;
600
+ unsigned char *tmp;
601
+ unsigned short tmp1, tmp2;
602
+
603
+ /* UTF16LE encoding has no BOM */
604
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
605
+ if (in == NULL) {
606
+ *outlen = 0;
607
+ *inlen = 0;
608
+ return(0);
609
+ }
610
+ inend= in + *inlen;
611
+ outend = out + (*outlen / 2);
612
+ while (in < inend) {
613
+ d= *in++;
614
+ if (d < 0x80) { c= d; trailing= 0; }
615
+ else if (d < 0xC0) {
616
+ /* trailing byte in leading position */
617
+ *outlen = (out - outstart) * 2;
618
+ *inlen = processed - instart;
619
+ return(-2);
620
+ } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
621
+ else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
622
+ else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
623
+ else {
624
+ /* no chance for this in UTF-16 */
625
+ *outlen = (out - outstart) * 2;
626
+ *inlen = processed - instart;
627
+ return(-2);
628
+ }
629
+
630
+ if (inend - in < trailing) {
631
+ break;
632
+ }
633
+
634
+ for ( ; trailing; trailing--) {
635
+ if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
636
+ break;
637
+ c <<= 6;
638
+ c |= d & 0x3F;
639
+ }
640
+
641
+ /* assertion: c is a single UTF-4 value */
642
+ if (c < 0x10000) {
643
+ if (out >= outend)
644
+ break;
645
+ if (xmlLittleEndian) {
646
+ *out++ = c;
647
+ } else {
648
+ tmp = (unsigned char *) out;
649
+ *tmp = c ;
650
+ *(tmp + 1) = c >> 8 ;
651
+ out++;
652
+ }
653
+ }
654
+ else if (c < 0x110000) {
655
+ if (out+1 >= outend)
656
+ break;
657
+ c -= 0x10000;
658
+ if (xmlLittleEndian) {
659
+ *out++ = 0xD800 | (c >> 10);
660
+ *out++ = 0xDC00 | (c & 0x03FF);
661
+ } else {
662
+ tmp1 = 0xD800 | (c >> 10);
663
+ tmp = (unsigned char *) out;
664
+ *tmp = (unsigned char) tmp1;
665
+ *(tmp + 1) = tmp1 >> 8;
666
+ out++;
667
+
668
+ tmp2 = 0xDC00 | (c & 0x03FF);
669
+ tmp = (unsigned char *) out;
670
+ *tmp = (unsigned char) tmp2;
671
+ *(tmp + 1) = tmp2 >> 8;
672
+ out++;
673
+ }
674
+ }
675
+ else
676
+ break;
677
+ processed = in;
678
+ }
679
+ *outlen = (out - outstart) * 2;
680
+ *inlen = processed - instart;
681
+ return(*outlen);
682
+ }
683
+
684
+ /**
685
+ * UTF8ToUTF16:
686
+ * @outb: a pointer to an array of bytes to store the result
687
+ * @outlen: the length of @outb
688
+ * @in: a pointer to an array of UTF-8 chars
689
+ * @inlen: the length of @in
690
+ *
691
+ * Take a block of UTF-8 chars in and try to convert it to an UTF-16
692
+ * block of chars out.
693
+ *
694
+ * Returns the number of bytes written, or -1 if lack of space, or -2
695
+ * if the transcoding failed.
696
+ */
697
+ static int
698
+ UTF8ToUTF16(unsigned char* outb, int *outlen,
699
+ const unsigned char* in, int *inlen)
700
+ {
701
+ if (in == NULL) {
702
+ /*
703
+ * initialization, add the Byte Order Mark for UTF-16LE
704
+ */
705
+ if (*outlen >= 2) {
706
+ outb[0] = 0xFF;
707
+ outb[1] = 0xFE;
708
+ *outlen = 2;
709
+ *inlen = 0;
710
+ #ifdef DEBUG_ENCODING
711
+ xmlGenericError(xmlGenericErrorContext,
712
+ "Added FFFE Byte Order Mark\n");
713
+ #endif
714
+ return(2);
715
+ }
716
+ *outlen = 0;
717
+ *inlen = 0;
718
+ return(0);
719
+ }
720
+ return (UTF8ToUTF16LE(outb, outlen, in, inlen));
721
+ }
722
+ #endif /* LIBXML_OUTPUT_ENABLED */
723
+
724
+ /**
725
+ * UTF16BEToUTF8:
726
+ * @out: a pointer to an array of bytes to store the result
727
+ * @outlen: the length of @out
728
+ * @inb: a pointer to an array of UTF-16 passed as a byte array
729
+ * @inlenb: the length of @in in UTF-16 chars
730
+ *
731
+ * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
732
+ * block of chars out. This function assumes the endian property
733
+ * is the same between the native type of this machine and the
734
+ * inputed one.
735
+ *
736
+ * Returns the number of bytes written, or -1 if lack of space, or -2
737
+ * if the transcoding fails (if *in is not a valid utf16 string)
738
+ * The value of *inlen after return is the number of octets consumed
739
+ * if the return value is positive, else unpredictable.
740
+ */
741
+ static int
742
+ UTF16BEToUTF8(unsigned char* out, int *outlen,
743
+ const unsigned char* inb, int *inlenb)
744
+ {
745
+ unsigned char* outstart = out;
746
+ const unsigned char* processed = inb;
747
+ unsigned char* outend = out + *outlen;
748
+ unsigned short* in = (unsigned short*) inb;
749
+ unsigned short* inend;
750
+ unsigned int c, d, inlen;
751
+ unsigned char *tmp;
752
+ int bits;
753
+
754
+ if ((*inlenb % 2) == 1)
755
+ (*inlenb)--;
756
+ inlen = *inlenb / 2;
757
+ inend= in + inlen;
758
+ while (in < inend) {
759
+ if (xmlLittleEndian) {
760
+ tmp = (unsigned char *) in;
761
+ c = *tmp++;
762
+ c = c << 8;
763
+ c = c | (unsigned int) *tmp;
764
+ in++;
765
+ } else {
766
+ c= *in++;
767
+ }
768
+ if ((c & 0xFC00) == 0xD800) { /* surrogates */
769
+ if (in >= inend) { /* (in > inend) shouldn't happens */
770
+ *outlen = out - outstart;
771
+ *inlenb = processed - inb;
772
+ return(-2);
773
+ }
774
+ if (xmlLittleEndian) {
775
+ tmp = (unsigned char *) in;
776
+ d = *tmp++;
777
+ d = d << 8;
778
+ d = d | (unsigned int) *tmp;
779
+ in++;
780
+ } else {
781
+ d= *in++;
782
+ }
783
+ if ((d & 0xFC00) == 0xDC00) {
784
+ c &= 0x03FF;
785
+ c <<= 10;
786
+ c |= d & 0x03FF;
787
+ c += 0x10000;
788
+ }
789
+ else {
790
+ *outlen = out - outstart;
791
+ *inlenb = processed - inb;
792
+ return(-2);
793
+ }
794
+ }
795
+
796
+ /* assertion: c is a single UTF-4 value */
797
+ if (out >= outend)
798
+ break;
799
+ if (c < 0x80) { *out++= c; bits= -6; }
800
+ else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
801
+ else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
802
+ else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
803
+
804
+ for ( ; bits >= 0; bits-= 6) {
805
+ if (out >= outend)
806
+ break;
807
+ *out++= ((c >> bits) & 0x3F) | 0x80;
808
+ }
809
+ processed = (const unsigned char*) in;
810
+ }
811
+ *outlen = out - outstart;
812
+ *inlenb = processed - inb;
813
+ return(*outlen);
814
+ }
815
+
816
+ #ifdef LIBXML_OUTPUT_ENABLED
817
+ /**
818
+ * UTF8ToUTF16BE:
819
+ * @outb: a pointer to an array of bytes to store the result
820
+ * @outlen: the length of @outb
821
+ * @in: a pointer to an array of UTF-8 chars
822
+ * @inlen: the length of @in
823
+ *
824
+ * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
825
+ * block of chars out.
826
+ *
827
+ * Returns the number of byte written, or -1 by lack of space, or -2
828
+ * if the transcoding failed.
829
+ */
830
+ static int
831
+ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
832
+ const unsigned char* in, int *inlen)
833
+ {
834
+ unsigned short* out = (unsigned short*) outb;
835
+ const unsigned char* processed = in;
836
+ const unsigned char *const instart = in;
837
+ unsigned short* outstart= out;
838
+ unsigned short* outend;
839
+ const unsigned char* inend;
840
+ unsigned int c, d;
841
+ int trailing;
842
+ unsigned char *tmp;
843
+ unsigned short tmp1, tmp2;
844
+
845
+ /* UTF-16BE has no BOM */
846
+ if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
847
+ if (in == NULL) {
848
+ *outlen = 0;
849
+ *inlen = 0;
850
+ return(0);
851
+ }
852
+ inend= in + *inlen;
853
+ outend = out + (*outlen / 2);
854
+ while (in < inend) {
855
+ d= *in++;
856
+ if (d < 0x80) { c= d; trailing= 0; }
857
+ else if (d < 0xC0) {
858
+ /* trailing byte in leading position */
859
+ *outlen = out - outstart;
860
+ *inlen = processed - instart;
861
+ return(-2);
862
+ } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
863
+ else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
864
+ else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
865
+ else {
866
+ /* no chance for this in UTF-16 */
867
+ *outlen = out - outstart;
868
+ *inlen = processed - instart;
869
+ return(-2);
870
+ }
871
+
872
+ if (inend - in < trailing) {
873
+ break;
874
+ }
875
+
876
+ for ( ; trailing; trailing--) {
877
+ if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
878
+ c <<= 6;
879
+ c |= d & 0x3F;
880
+ }
881
+
882
+ /* assertion: c is a single UTF-4 value */
883
+ if (c < 0x10000) {
884
+ if (out >= outend) break;
885
+ if (xmlLittleEndian) {
886
+ tmp = (unsigned char *) out;
887
+ *tmp = c >> 8;
888
+ *(tmp + 1) = c;
889
+ out++;
890
+ } else {
891
+ *out++ = c;
892
+ }
893
+ }
894
+ else if (c < 0x110000) {
895
+ if (out+1 >= outend) break;
896
+ c -= 0x10000;
897
+ if (xmlLittleEndian) {
898
+ tmp1 = 0xD800 | (c >> 10);
899
+ tmp = (unsigned char *) out;
900
+ *tmp = tmp1 >> 8;
901
+ *(tmp + 1) = (unsigned char) tmp1;
902
+ out++;
903
+
904
+ tmp2 = 0xDC00 | (c & 0x03FF);
905
+ tmp = (unsigned char *) out;
906
+ *tmp = tmp2 >> 8;
907
+ *(tmp + 1) = (unsigned char) tmp2;
908
+ out++;
909
+ } else {
910
+ *out++ = 0xD800 | (c >> 10);
911
+ *out++ = 0xDC00 | (c & 0x03FF);
912
+ }
913
+ }
914
+ else
915
+ break;
916
+ processed = in;
917
+ }
918
+ *outlen = (out - outstart) * 2;
919
+ *inlen = processed - instart;
920
+ return(*outlen);
921
+ }
922
+ #endif /* LIBXML_OUTPUT_ENABLED */
923
+
924
+ /************************************************************************
925
+ * *
926
+ * Generic encoding handling routines *
927
+ * *
928
+ ************************************************************************/
929
+
930
+ /**
931
+ * xmlDetectCharEncoding:
932
+ * @in: a pointer to the first bytes of the XML entity, must be at least
933
+ * 2 bytes long (at least 4 if encoding is UTF4 variant).
934
+ * @len: pointer to the length of the buffer
935
+ *
936
+ * Guess the encoding of the entity using the first bytes of the entity content
937
+ * according to the non-normative appendix F of the XML-1.0 recommendation.
938
+ *
939
+ * Returns one of the XML_CHAR_ENCODING_... values.
940
+ */
941
+ xmlCharEncoding
942
+ xmlDetectCharEncoding(const unsigned char* in, int len)
943
+ {
944
+ if (in == NULL)
945
+ return(XML_CHAR_ENCODING_NONE);
946
+ if (len >= 4) {
947
+ if ((in[0] == 0x00) && (in[1] == 0x00) &&
948
+ (in[2] == 0x00) && (in[3] == 0x3C))
949
+ return(XML_CHAR_ENCODING_UCS4BE);
950
+ if ((in[0] == 0x3C) && (in[1] == 0x00) &&
951
+ (in[2] == 0x00) && (in[3] == 0x00))
952
+ return(XML_CHAR_ENCODING_UCS4LE);
953
+ if ((in[0] == 0x00) && (in[1] == 0x00) &&
954
+ (in[2] == 0x3C) && (in[3] == 0x00))
955
+ return(XML_CHAR_ENCODING_UCS4_2143);
956
+ if ((in[0] == 0x00) && (in[1] == 0x3C) &&
957
+ (in[2] == 0x00) && (in[3] == 0x00))
958
+ return(XML_CHAR_ENCODING_UCS4_3412);
959
+ if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
960
+ (in[2] == 0xA7) && (in[3] == 0x94))
961
+ return(XML_CHAR_ENCODING_EBCDIC);
962
+ if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
963
+ (in[2] == 0x78) && (in[3] == 0x6D))
964
+ return(XML_CHAR_ENCODING_UTF8);
965
+ /*
966
+ * Although not part of the recommendation, we also
967
+ * attempt an "auto-recognition" of UTF-16LE and
968
+ * UTF-16BE encodings.
969
+ */
970
+ if ((in[0] == 0x3C) && (in[1] == 0x00) &&
971
+ (in[2] == 0x3F) && (in[3] == 0x00))
972
+ return(XML_CHAR_ENCODING_UTF16LE);
973
+ if ((in[0] == 0x00) && (in[1] == 0x3C) &&
974
+ (in[2] == 0x00) && (in[3] == 0x3F))
975
+ return(XML_CHAR_ENCODING_UTF16BE);
976
+ }
977
+ if (len >= 3) {
978
+ /*
979
+ * Errata on XML-1.0 June 20 2001
980
+ * We now allow an UTF8 encoded BOM
981
+ */
982
+ if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
983
+ (in[2] == 0xBF))
984
+ return(XML_CHAR_ENCODING_UTF8);
985
+ }
986
+ /* For UTF-16 we can recognize by the BOM */
987
+ if (len >= 2) {
988
+ if ((in[0] == 0xFE) && (in[1] == 0xFF))
989
+ return(XML_CHAR_ENCODING_UTF16BE);
990
+ if ((in[0] == 0xFF) && (in[1] == 0xFE))
991
+ return(XML_CHAR_ENCODING_UTF16LE);
992
+ }
993
+ return(XML_CHAR_ENCODING_NONE);
994
+ }
995
+
996
+ /**
997
+ * xmlCleanupEncodingAliases:
998
+ *
999
+ * Unregisters all aliases
1000
+ */
1001
+ void
1002
+ xmlCleanupEncodingAliases(void) {
1003
+ int i;
1004
+
1005
+ if (xmlCharEncodingAliases == NULL)
1006
+ return;
1007
+
1008
+ for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1009
+ if (xmlCharEncodingAliases[i].name != NULL)
1010
+ xmlFree((char *) xmlCharEncodingAliases[i].name);
1011
+ if (xmlCharEncodingAliases[i].alias != NULL)
1012
+ xmlFree((char *) xmlCharEncodingAliases[i].alias);
1013
+ }
1014
+ xmlCharEncodingAliasesNb = 0;
1015
+ xmlCharEncodingAliasesMax = 0;
1016
+ xmlFree(xmlCharEncodingAliases);
1017
+ xmlCharEncodingAliases = NULL;
1018
+ }
1019
+
1020
+ /**
1021
+ * xmlGetEncodingAlias:
1022
+ * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1023
+ *
1024
+ * Lookup an encoding name for the given alias.
1025
+ *
1026
+ * Returns NULL if not found, otherwise the original name
1027
+ */
1028
+ const char *
1029
+ xmlGetEncodingAlias(const char *alias) {
1030
+ int i;
1031
+ char upper[100];
1032
+
1033
+ if (alias == NULL)
1034
+ return(NULL);
1035
+
1036
+ if (xmlCharEncodingAliases == NULL)
1037
+ return(NULL);
1038
+
1039
+ for (i = 0;i < 99;i++) {
1040
+ upper[i] = toupper(alias[i]);
1041
+ if (upper[i] == 0) break;
1042
+ }
1043
+ upper[i] = 0;
1044
+
1045
+ /*
1046
+ * Walk down the list looking for a definition of the alias
1047
+ */
1048
+ for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1049
+ if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1050
+ return(xmlCharEncodingAliases[i].name);
1051
+ }
1052
+ }
1053
+ return(NULL);
1054
+ }
1055
+
1056
+ /**
1057
+ * xmlAddEncodingAlias:
1058
+ * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1059
+ * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1060
+ *
1061
+ * Registers an alias @alias for an encoding named @name. Existing alias
1062
+ * will be overwritten.
1063
+ *
1064
+ * Returns 0 in case of success, -1 in case of error
1065
+ */
1066
+ int
1067
+ xmlAddEncodingAlias(const char *name, const char *alias) {
1068
+ int i;
1069
+ char upper[100];
1070
+
1071
+ if ((name == NULL) || (alias == NULL))
1072
+ return(-1);
1073
+
1074
+ for (i = 0;i < 99;i++) {
1075
+ upper[i] = toupper(alias[i]);
1076
+ if (upper[i] == 0) break;
1077
+ }
1078
+ upper[i] = 0;
1079
+
1080
+ if (xmlCharEncodingAliases == NULL) {
1081
+ xmlCharEncodingAliasesNb = 0;
1082
+ xmlCharEncodingAliasesMax = 20;
1083
+ xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1084
+ xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1085
+ if (xmlCharEncodingAliases == NULL)
1086
+ return(-1);
1087
+ } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1088
+ xmlCharEncodingAliasesMax *= 2;
1089
+ xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1090
+ xmlRealloc(xmlCharEncodingAliases,
1091
+ xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1092
+ }
1093
+ /*
1094
+ * Walk down the list looking for a definition of the alias
1095
+ */
1096
+ for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1097
+ if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1098
+ /*
1099
+ * Replace the definition.
1100
+ */
1101
+ xmlFree((char *) xmlCharEncodingAliases[i].name);
1102
+ xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1103
+ return(0);
1104
+ }
1105
+ }
1106
+ /*
1107
+ * Add the definition
1108
+ */
1109
+ xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1110
+ xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1111
+ xmlCharEncodingAliasesNb++;
1112
+ return(0);
1113
+ }
1114
+
1115
+ /**
1116
+ * xmlDelEncodingAlias:
1117
+ * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1118
+ *
1119
+ * Unregisters an encoding alias @alias
1120
+ *
1121
+ * Returns 0 in case of success, -1 in case of error
1122
+ */
1123
+ int
1124
+ xmlDelEncodingAlias(const char *alias) {
1125
+ int i;
1126
+
1127
+ if (alias == NULL)
1128
+ return(-1);
1129
+
1130
+ if (xmlCharEncodingAliases == NULL)
1131
+ return(-1);
1132
+ /*
1133
+ * Walk down the list looking for a definition of the alias
1134
+ */
1135
+ for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1136
+ if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1137
+ xmlFree((char *) xmlCharEncodingAliases[i].name);
1138
+ xmlFree((char *) xmlCharEncodingAliases[i].alias);
1139
+ xmlCharEncodingAliasesNb--;
1140
+ memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1141
+ sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1142
+ return(0);
1143
+ }
1144
+ }
1145
+ return(-1);
1146
+ }
1147
+
1148
+ /**
1149
+ * xmlParseCharEncoding:
1150
+ * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1151
+ *
1152
+ * Compare the string to the encoding schemes already known. Note
1153
+ * that the comparison is case insensitive accordingly to the section
1154
+ * [XML] 4.3.3 Character Encoding in Entities.
1155
+ *
1156
+ * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1157
+ * if not recognized.
1158
+ */
1159
+ xmlCharEncoding
1160
+ xmlParseCharEncoding(const char* name)
1161
+ {
1162
+ const char *alias;
1163
+ char upper[500];
1164
+ int i;
1165
+
1166
+ if (name == NULL)
1167
+ return(XML_CHAR_ENCODING_NONE);
1168
+
1169
+ /*
1170
+ * Do the alias resolution
1171
+ */
1172
+ alias = xmlGetEncodingAlias(name);
1173
+ if (alias != NULL)
1174
+ name = alias;
1175
+
1176
+ for (i = 0;i < 499;i++) {
1177
+ upper[i] = toupper(name[i]);
1178
+ if (upper[i] == 0) break;
1179
+ }
1180
+ upper[i] = 0;
1181
+
1182
+ if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1183
+ if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1184
+ if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1185
+
1186
+ /*
1187
+ * NOTE: if we were able to parse this, the endianness of UTF16 is
1188
+ * already found and in use
1189
+ */
1190
+ if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1191
+ if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1192
+
1193
+ if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1194
+ if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1195
+ if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1196
+
1197
+ /*
1198
+ * NOTE: if we were able to parse this, the endianness of UCS4 is
1199
+ * already found and in use
1200
+ */
1201
+ if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1202
+ if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1203
+ if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1204
+
1205
+
1206
+ if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1207
+ if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1208
+ if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1209
+
1210
+ if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1211
+ if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1212
+ if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1213
+
1214
+ if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1215
+ if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1216
+ if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1217
+ if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1218
+ if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1219
+ if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1220
+ if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1221
+
1222
+ if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1223
+ if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1224
+ if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1225
+
1226
+ #ifdef DEBUG_ENCODING
1227
+ xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1228
+ #endif
1229
+ return(XML_CHAR_ENCODING_ERROR);
1230
+ }
1231
+
1232
+ /**
1233
+ * xmlGetCharEncodingName:
1234
+ * @enc: the encoding
1235
+ *
1236
+ * The "canonical" name for XML encoding.
1237
+ * C.f. http://www.w3.org/TR/REC-xml#charencoding
1238
+ * Section 4.3.3 Character Encoding in Entities
1239
+ *
1240
+ * Returns the canonical name for the given encoding
1241
+ */
1242
+
1243
+ const char*
1244
+ xmlGetCharEncodingName(xmlCharEncoding enc) {
1245
+ switch (enc) {
1246
+ case XML_CHAR_ENCODING_ERROR:
1247
+ return(NULL);
1248
+ case XML_CHAR_ENCODING_NONE:
1249
+ return(NULL);
1250
+ case XML_CHAR_ENCODING_UTF8:
1251
+ return("UTF-8");
1252
+ case XML_CHAR_ENCODING_UTF16LE:
1253
+ return("UTF-16");
1254
+ case XML_CHAR_ENCODING_UTF16BE:
1255
+ return("UTF-16");
1256
+ case XML_CHAR_ENCODING_EBCDIC:
1257
+ return("EBCDIC");
1258
+ case XML_CHAR_ENCODING_UCS4LE:
1259
+ return("ISO-10646-UCS-4");
1260
+ case XML_CHAR_ENCODING_UCS4BE:
1261
+ return("ISO-10646-UCS-4");
1262
+ case XML_CHAR_ENCODING_UCS4_2143:
1263
+ return("ISO-10646-UCS-4");
1264
+ case XML_CHAR_ENCODING_UCS4_3412:
1265
+ return("ISO-10646-UCS-4");
1266
+ case XML_CHAR_ENCODING_UCS2:
1267
+ return("ISO-10646-UCS-2");
1268
+ case XML_CHAR_ENCODING_8859_1:
1269
+ return("ISO-8859-1");
1270
+ case XML_CHAR_ENCODING_8859_2:
1271
+ return("ISO-8859-2");
1272
+ case XML_CHAR_ENCODING_8859_3:
1273
+ return("ISO-8859-3");
1274
+ case XML_CHAR_ENCODING_8859_4:
1275
+ return("ISO-8859-4");
1276
+ case XML_CHAR_ENCODING_8859_5:
1277
+ return("ISO-8859-5");
1278
+ case XML_CHAR_ENCODING_8859_6:
1279
+ return("ISO-8859-6");
1280
+ case XML_CHAR_ENCODING_8859_7:
1281
+ return("ISO-8859-7");
1282
+ case XML_CHAR_ENCODING_8859_8:
1283
+ return("ISO-8859-8");
1284
+ case XML_CHAR_ENCODING_8859_9:
1285
+ return("ISO-8859-9");
1286
+ case XML_CHAR_ENCODING_2022_JP:
1287
+ return("ISO-2022-JP");
1288
+ case XML_CHAR_ENCODING_SHIFT_JIS:
1289
+ return("Shift-JIS");
1290
+ case XML_CHAR_ENCODING_EUC_JP:
1291
+ return("EUC-JP");
1292
+ case XML_CHAR_ENCODING_ASCII:
1293
+ return(NULL);
1294
+ }
1295
+ return(NULL);
1296
+ }
1297
+
1298
+ /************************************************************************
1299
+ * *
1300
+ * Char encoding handlers *
1301
+ * *
1302
+ ************************************************************************/
1303
+
1304
+
1305
+ /* the size should be growable, but it's not a big deal ... */
1306
+ #define MAX_ENCODING_HANDLERS 50
1307
+ static xmlCharEncodingHandlerPtr *handlers = NULL;
1308
+ static int nbCharEncodingHandler = 0;
1309
+
1310
+ /*
1311
+ * The default is UTF-8 for XML, that's also the default used for the
1312
+ * parser internals, so the default encoding handler is NULL
1313
+ */
1314
+
1315
+ static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1316
+
1317
+ /**
1318
+ * xmlNewCharEncodingHandler:
1319
+ * @name: the encoding name, in UTF-8 format (ASCII actually)
1320
+ * @input: the xmlCharEncodingInputFunc to read that encoding
1321
+ * @output: the xmlCharEncodingOutputFunc to write that encoding
1322
+ *
1323
+ * Create and registers an xmlCharEncodingHandler.
1324
+ *
1325
+ * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1326
+ */
1327
+ xmlCharEncodingHandlerPtr
1328
+ xmlNewCharEncodingHandler(const char *name,
1329
+ xmlCharEncodingInputFunc input,
1330
+ xmlCharEncodingOutputFunc output) {
1331
+ xmlCharEncodingHandlerPtr handler;
1332
+ const char *alias;
1333
+ char upper[500];
1334
+ int i;
1335
+ char *up = NULL;
1336
+
1337
+ /*
1338
+ * Do the alias resolution
1339
+ */
1340
+ alias = xmlGetEncodingAlias(name);
1341
+ if (alias != NULL)
1342
+ name = alias;
1343
+
1344
+ /*
1345
+ * Keep only the uppercase version of the encoding.
1346
+ */
1347
+ if (name == NULL) {
1348
+ xmlEncodingErr(XML_I18N_NO_NAME,
1349
+ "xmlNewCharEncodingHandler : no name !\n", NULL);
1350
+ return(NULL);
1351
+ }
1352
+ for (i = 0;i < 499;i++) {
1353
+ upper[i] = toupper(name[i]);
1354
+ if (upper[i] == 0) break;
1355
+ }
1356
+ upper[i] = 0;
1357
+ up = xmlMemStrdup(upper);
1358
+ if (up == NULL) {
1359
+ xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1360
+ return(NULL);
1361
+ }
1362
+
1363
+ /*
1364
+ * allocate and fill-up an handler block.
1365
+ */
1366
+ handler = (xmlCharEncodingHandlerPtr)
1367
+ xmlMalloc(sizeof(xmlCharEncodingHandler));
1368
+ if (handler == NULL) {
1369
+ xmlFree(up);
1370
+ xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1371
+ return(NULL);
1372
+ }
1373
+ memset(handler, 0, sizeof(xmlCharEncodingHandler));
1374
+ handler->input = input;
1375
+ handler->output = output;
1376
+ handler->name = up;
1377
+
1378
+ #ifdef LIBXML_ICONV_ENABLED
1379
+ handler->iconv_in = NULL;
1380
+ handler->iconv_out = NULL;
1381
+ #endif
1382
+ #ifdef LIBXML_ICU_ENABLED
1383
+ handler->uconv_in = NULL;
1384
+ handler->uconv_out = NULL;
1385
+ #endif
1386
+
1387
+ /*
1388
+ * registers and returns the handler.
1389
+ */
1390
+ xmlRegisterCharEncodingHandler(handler);
1391
+ #ifdef DEBUG_ENCODING
1392
+ xmlGenericError(xmlGenericErrorContext,
1393
+ "Registered encoding handler for %s\n", name);
1394
+ #endif
1395
+ return(handler);
1396
+ }
1397
+
1398
+ /**
1399
+ * xmlInitCharEncodingHandlers:
1400
+ *
1401
+ * Initialize the char encoding support, it registers the default
1402
+ * encoding supported.
1403
+ * NOTE: while public, this function usually doesn't need to be called
1404
+ * in normal processing.
1405
+ */
1406
+ void
1407
+ xmlInitCharEncodingHandlers(void) {
1408
+ unsigned short int tst = 0x1234;
1409
+ unsigned char *ptr = (unsigned char *) &tst;
1410
+
1411
+ if (handlers != NULL) return;
1412
+
1413
+ handlers = (xmlCharEncodingHandlerPtr *)
1414
+ xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1415
+
1416
+ if (*ptr == 0x12) xmlLittleEndian = 0;
1417
+ else if (*ptr == 0x34) xmlLittleEndian = 1;
1418
+ else {
1419
+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1420
+ "Odd problem at endianness detection\n", NULL);
1421
+ }
1422
+
1423
+ if (handlers == NULL) {
1424
+ xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1425
+ return;
1426
+ }
1427
+ xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1428
+ #ifdef LIBXML_OUTPUT_ENABLED
1429
+ xmlUTF16LEHandler =
1430
+ xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1431
+ xmlUTF16BEHandler =
1432
+ xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1433
+ xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1434
+ xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1435
+ xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1436
+ xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1437
+ #ifdef LIBXML_HTML_ENABLED
1438
+ xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1439
+ #endif
1440
+ #else
1441
+ xmlUTF16LEHandler =
1442
+ xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1443
+ xmlUTF16BEHandler =
1444
+ xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1445
+ xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1446
+ xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1447
+ xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1448
+ xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1449
+ #endif /* LIBXML_OUTPUT_ENABLED */
1450
+ #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1451
+ #ifdef LIBXML_ISO8859X_ENABLED
1452
+ xmlRegisterCharEncodingHandlersISO8859x ();
1453
+ #endif
1454
+ #endif
1455
+
1456
+ }
1457
+
1458
+ /**
1459
+ * xmlCleanupCharEncodingHandlers:
1460
+ *
1461
+ * Cleanup the memory allocated for the char encoding support, it
1462
+ * unregisters all the encoding handlers and the aliases.
1463
+ */
1464
+ void
1465
+ xmlCleanupCharEncodingHandlers(void) {
1466
+ xmlCleanupEncodingAliases();
1467
+
1468
+ if (handlers == NULL) return;
1469
+
1470
+ for (;nbCharEncodingHandler > 0;) {
1471
+ nbCharEncodingHandler--;
1472
+ if (handlers[nbCharEncodingHandler] != NULL) {
1473
+ if (handlers[nbCharEncodingHandler]->name != NULL)
1474
+ xmlFree(handlers[nbCharEncodingHandler]->name);
1475
+ xmlFree(handlers[nbCharEncodingHandler]);
1476
+ }
1477
+ }
1478
+ xmlFree(handlers);
1479
+ handlers = NULL;
1480
+ nbCharEncodingHandler = 0;
1481
+ xmlDefaultCharEncodingHandler = NULL;
1482
+ }
1483
+
1484
+ /**
1485
+ * xmlRegisterCharEncodingHandler:
1486
+ * @handler: the xmlCharEncodingHandlerPtr handler block
1487
+ *
1488
+ * Register the char encoding handler, surprising, isn't it ?
1489
+ */
1490
+ void
1491
+ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1492
+ if (handlers == NULL) xmlInitCharEncodingHandlers();
1493
+ if ((handler == NULL) || (handlers == NULL)) {
1494
+ xmlEncodingErr(XML_I18N_NO_HANDLER,
1495
+ "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1496
+ goto free_handler;
1497
+ }
1498
+
1499
+ if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1500
+ xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1501
+ "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1502
+ "MAX_ENCODING_HANDLERS");
1503
+ goto free_handler;
1504
+ }
1505
+ handlers[nbCharEncodingHandler++] = handler;
1506
+ return;
1507
+
1508
+ free_handler:
1509
+ if (handler != NULL) {
1510
+ if (handler->name != NULL) {
1511
+ xmlFree(handler->name);
1512
+ }
1513
+ xmlFree(handler);
1514
+ }
1515
+ }
1516
+
1517
+ /**
1518
+ * xmlGetCharEncodingHandler:
1519
+ * @enc: an xmlCharEncoding value.
1520
+ *
1521
+ * Search in the registered set the handler able to read/write that encoding.
1522
+ *
1523
+ * Returns the handler or NULL if not found
1524
+ */
1525
+ xmlCharEncodingHandlerPtr
1526
+ xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1527
+ xmlCharEncodingHandlerPtr handler;
1528
+
1529
+ if (handlers == NULL) xmlInitCharEncodingHandlers();
1530
+ switch (enc) {
1531
+ case XML_CHAR_ENCODING_ERROR:
1532
+ return(NULL);
1533
+ case XML_CHAR_ENCODING_NONE:
1534
+ return(NULL);
1535
+ case XML_CHAR_ENCODING_UTF8:
1536
+ return(NULL);
1537
+ case XML_CHAR_ENCODING_UTF16LE:
1538
+ return(xmlUTF16LEHandler);
1539
+ case XML_CHAR_ENCODING_UTF16BE:
1540
+ return(xmlUTF16BEHandler);
1541
+ case XML_CHAR_ENCODING_EBCDIC:
1542
+ handler = xmlFindCharEncodingHandler("EBCDIC");
1543
+ if (handler != NULL) return(handler);
1544
+ handler = xmlFindCharEncodingHandler("ebcdic");
1545
+ if (handler != NULL) return(handler);
1546
+ handler = xmlFindCharEncodingHandler("EBCDIC-US");
1547
+ if (handler != NULL) return(handler);
1548
+ handler = xmlFindCharEncodingHandler("IBM-037");
1549
+ if (handler != NULL) return(handler);
1550
+ break;
1551
+ case XML_CHAR_ENCODING_UCS4BE:
1552
+ handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1553
+ if (handler != NULL) return(handler);
1554
+ handler = xmlFindCharEncodingHandler("UCS-4");
1555
+ if (handler != NULL) return(handler);
1556
+ handler = xmlFindCharEncodingHandler("UCS4");
1557
+ if (handler != NULL) return(handler);
1558
+ break;
1559
+ case XML_CHAR_ENCODING_UCS4LE:
1560
+ handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1561
+ if (handler != NULL) return(handler);
1562
+ handler = xmlFindCharEncodingHandler("UCS-4");
1563
+ if (handler != NULL) return(handler);
1564
+ handler = xmlFindCharEncodingHandler("UCS4");
1565
+ if (handler != NULL) return(handler);
1566
+ break;
1567
+ case XML_CHAR_ENCODING_UCS4_2143:
1568
+ break;
1569
+ case XML_CHAR_ENCODING_UCS4_3412:
1570
+ break;
1571
+ case XML_CHAR_ENCODING_UCS2:
1572
+ handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1573
+ if (handler != NULL) return(handler);
1574
+ handler = xmlFindCharEncodingHandler("UCS-2");
1575
+ if (handler != NULL) return(handler);
1576
+ handler = xmlFindCharEncodingHandler("UCS2");
1577
+ if (handler != NULL) return(handler);
1578
+ break;
1579
+
1580
+ /*
1581
+ * We used to keep ISO Latin encodings native in the
1582
+ * generated data. This led to so many problems that
1583
+ * this has been removed. One can still change this
1584
+ * back by registering no-ops encoders for those
1585
+ */
1586
+ case XML_CHAR_ENCODING_8859_1:
1587
+ handler = xmlFindCharEncodingHandler("ISO-8859-1");
1588
+ if (handler != NULL) return(handler);
1589
+ break;
1590
+ case XML_CHAR_ENCODING_8859_2:
1591
+ handler = xmlFindCharEncodingHandler("ISO-8859-2");
1592
+ if (handler != NULL) return(handler);
1593
+ break;
1594
+ case XML_CHAR_ENCODING_8859_3:
1595
+ handler = xmlFindCharEncodingHandler("ISO-8859-3");
1596
+ if (handler != NULL) return(handler);
1597
+ break;
1598
+ case XML_CHAR_ENCODING_8859_4:
1599
+ handler = xmlFindCharEncodingHandler("ISO-8859-4");
1600
+ if (handler != NULL) return(handler);
1601
+ break;
1602
+ case XML_CHAR_ENCODING_8859_5:
1603
+ handler = xmlFindCharEncodingHandler("ISO-8859-5");
1604
+ if (handler != NULL) return(handler);
1605
+ break;
1606
+ case XML_CHAR_ENCODING_8859_6:
1607
+ handler = xmlFindCharEncodingHandler("ISO-8859-6");
1608
+ if (handler != NULL) return(handler);
1609
+ break;
1610
+ case XML_CHAR_ENCODING_8859_7:
1611
+ handler = xmlFindCharEncodingHandler("ISO-8859-7");
1612
+ if (handler != NULL) return(handler);
1613
+ break;
1614
+ case XML_CHAR_ENCODING_8859_8:
1615
+ handler = xmlFindCharEncodingHandler("ISO-8859-8");
1616
+ if (handler != NULL) return(handler);
1617
+ break;
1618
+ case XML_CHAR_ENCODING_8859_9:
1619
+ handler = xmlFindCharEncodingHandler("ISO-8859-9");
1620
+ if (handler != NULL) return(handler);
1621
+ break;
1622
+
1623
+
1624
+ case XML_CHAR_ENCODING_2022_JP:
1625
+ handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1626
+ if (handler != NULL) return(handler);
1627
+ break;
1628
+ case XML_CHAR_ENCODING_SHIFT_JIS:
1629
+ handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1630
+ if (handler != NULL) return(handler);
1631
+ handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1632
+ if (handler != NULL) return(handler);
1633
+ handler = xmlFindCharEncodingHandler("Shift_JIS");
1634
+ if (handler != NULL) return(handler);
1635
+ break;
1636
+ case XML_CHAR_ENCODING_EUC_JP:
1637
+ handler = xmlFindCharEncodingHandler("EUC-JP");
1638
+ if (handler != NULL) return(handler);
1639
+ break;
1640
+ default:
1641
+ break;
1642
+ }
1643
+
1644
+ #ifdef DEBUG_ENCODING
1645
+ xmlGenericError(xmlGenericErrorContext,
1646
+ "No handler found for encoding %d\n", enc);
1647
+ #endif
1648
+ return(NULL);
1649
+ }
1650
+
1651
+ /**
1652
+ * xmlFindCharEncodingHandler:
1653
+ * @name: a string describing the char encoding.
1654
+ *
1655
+ * Search in the registered set the handler able to read/write that encoding.
1656
+ *
1657
+ * Returns the handler or NULL if not found
1658
+ */
1659
+ xmlCharEncodingHandlerPtr
1660
+ xmlFindCharEncodingHandler(const char *name) {
1661
+ const char *nalias;
1662
+ const char *norig;
1663
+ xmlCharEncoding alias;
1664
+ #ifdef LIBXML_ICONV_ENABLED
1665
+ xmlCharEncodingHandlerPtr enc;
1666
+ iconv_t icv_in, icv_out;
1667
+ #endif /* LIBXML_ICONV_ENABLED */
1668
+ #ifdef LIBXML_ICU_ENABLED
1669
+ xmlCharEncodingHandlerPtr encu;
1670
+ uconv_t *ucv_in, *ucv_out;
1671
+ #endif /* LIBXML_ICU_ENABLED */
1672
+ char upper[100];
1673
+ int i;
1674
+
1675
+ if (handlers == NULL) xmlInitCharEncodingHandlers();
1676
+ if (name == NULL) return(xmlDefaultCharEncodingHandler);
1677
+ if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1678
+
1679
+ /*
1680
+ * Do the alias resolution
1681
+ */
1682
+ norig = name;
1683
+ nalias = xmlGetEncodingAlias(name);
1684
+ if (nalias != NULL)
1685
+ name = nalias;
1686
+
1687
+ /*
1688
+ * Check first for directly registered encoding names
1689
+ */
1690
+ for (i = 0;i < 99;i++) {
1691
+ upper[i] = toupper(name[i]);
1692
+ if (upper[i] == 0) break;
1693
+ }
1694
+ upper[i] = 0;
1695
+
1696
+ if (handlers != NULL) {
1697
+ for (i = 0;i < nbCharEncodingHandler; i++) {
1698
+ if (!strcmp(upper, handlers[i]->name)) {
1699
+ #ifdef DEBUG_ENCODING
1700
+ xmlGenericError(xmlGenericErrorContext,
1701
+ "Found registered handler for encoding %s\n", name);
1702
+ #endif
1703
+ return(handlers[i]);
1704
+ }
1705
+ }
1706
+ }
1707
+
1708
+ #ifdef LIBXML_ICONV_ENABLED
1709
+ /* check whether iconv can handle this */
1710
+ icv_in = iconv_open("UTF-8", name);
1711
+ icv_out = iconv_open(name, "UTF-8");
1712
+ if (icv_in == (iconv_t) -1) {
1713
+ icv_in = iconv_open("UTF-8", upper);
1714
+ }
1715
+ if (icv_out == (iconv_t) -1) {
1716
+ icv_out = iconv_open(upper, "UTF-8");
1717
+ }
1718
+ if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1719
+ enc = (xmlCharEncodingHandlerPtr)
1720
+ xmlMalloc(sizeof(xmlCharEncodingHandler));
1721
+ if (enc == NULL) {
1722
+ iconv_close(icv_in);
1723
+ iconv_close(icv_out);
1724
+ return(NULL);
1725
+ }
1726
+ memset(enc, 0, sizeof(xmlCharEncodingHandler));
1727
+ enc->name = xmlMemStrdup(name);
1728
+ enc->input = NULL;
1729
+ enc->output = NULL;
1730
+ enc->iconv_in = icv_in;
1731
+ enc->iconv_out = icv_out;
1732
+ #ifdef DEBUG_ENCODING
1733
+ xmlGenericError(xmlGenericErrorContext,
1734
+ "Found iconv handler for encoding %s\n", name);
1735
+ #endif
1736
+ return enc;
1737
+ } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1738
+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1739
+ "iconv : problems with filters for '%s'\n", name);
1740
+ }
1741
+ #endif /* LIBXML_ICONV_ENABLED */
1742
+ #ifdef LIBXML_ICU_ENABLED
1743
+ /* check whether icu can handle this */
1744
+ ucv_in = openIcuConverter(name, 1);
1745
+ ucv_out = openIcuConverter(name, 0);
1746
+ if (ucv_in != NULL && ucv_out != NULL) {
1747
+ encu = (xmlCharEncodingHandlerPtr)
1748
+ xmlMalloc(sizeof(xmlCharEncodingHandler));
1749
+ if (encu == NULL) {
1750
+ closeIcuConverter(ucv_in);
1751
+ closeIcuConverter(ucv_out);
1752
+ return(NULL);
1753
+ }
1754
+ memset(encu, 0, sizeof(xmlCharEncodingHandler));
1755
+ encu->name = xmlMemStrdup(name);
1756
+ encu->input = NULL;
1757
+ encu->output = NULL;
1758
+ encu->uconv_in = ucv_in;
1759
+ encu->uconv_out = ucv_out;
1760
+ #ifdef DEBUG_ENCODING
1761
+ xmlGenericError(xmlGenericErrorContext,
1762
+ "Found ICU converter handler for encoding %s\n", name);
1763
+ #endif
1764
+ return encu;
1765
+ } else if (ucv_in != NULL || ucv_out != NULL) {
1766
+ closeIcuConverter(ucv_in);
1767
+ closeIcuConverter(ucv_out);
1768
+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1769
+ "ICU converter : problems with filters for '%s'\n", name);
1770
+ }
1771
+ #endif /* LIBXML_ICU_ENABLED */
1772
+
1773
+ #ifdef DEBUG_ENCODING
1774
+ xmlGenericError(xmlGenericErrorContext,
1775
+ "No handler found for encoding %s\n", name);
1776
+ #endif
1777
+
1778
+ /*
1779
+ * Fallback using the canonical names
1780
+ */
1781
+ alias = xmlParseCharEncoding(norig);
1782
+ if (alias != XML_CHAR_ENCODING_ERROR) {
1783
+ const char* canon;
1784
+ canon = xmlGetCharEncodingName(alias);
1785
+ if ((canon != NULL) && (strcmp(name, canon))) {
1786
+ return(xmlFindCharEncodingHandler(canon));
1787
+ }
1788
+ }
1789
+
1790
+ /* If "none of the above", give up */
1791
+ return(NULL);
1792
+ }
1793
+
1794
+ /************************************************************************
1795
+ * *
1796
+ * ICONV based generic conversion functions *
1797
+ * *
1798
+ ************************************************************************/
1799
+
1800
+ #ifdef LIBXML_ICONV_ENABLED
1801
+ /**
1802
+ * xmlIconvWrapper:
1803
+ * @cd: iconv converter data structure
1804
+ * @out: a pointer to an array of bytes to store the result
1805
+ * @outlen: the length of @out
1806
+ * @in: a pointer to an array of input bytes
1807
+ * @inlen: the length of @in
1808
+ *
1809
+ * Returns 0 if success, or
1810
+ * -1 by lack of space, or
1811
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
1812
+ * the result of transformation can't fit into the encoding we want), or
1813
+ * -3 if there the last byte can't form a single output char.
1814
+ *
1815
+ * The value of @inlen after return is the number of octets consumed
1816
+ * as the return value is positive, else unpredictable.
1817
+ * The value of @outlen after return is the number of octets produced.
1818
+ */
1819
+ static int
1820
+ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1821
+ const unsigned char *in, int *inlen) {
1822
+ size_t icv_inlen, icv_outlen;
1823
+ const char *icv_in = (const char *) in;
1824
+ char *icv_out = (char *) out;
1825
+ int ret;
1826
+
1827
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1828
+ if (outlen != NULL) *outlen = 0;
1829
+ return(-1);
1830
+ }
1831
+ icv_inlen = *inlen;
1832
+ icv_outlen = *outlen;
1833
+ ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1834
+ *inlen -= icv_inlen;
1835
+ *outlen -= icv_outlen;
1836
+ if ((icv_inlen != 0) || (ret == -1)) {
1837
+ #ifdef EILSEQ
1838
+ if (errno == EILSEQ) {
1839
+ return -2;
1840
+ } else
1841
+ #endif
1842
+ #ifdef E2BIG
1843
+ if (errno == E2BIG) {
1844
+ return -1;
1845
+ } else
1846
+ #endif
1847
+ #ifdef EINVAL
1848
+ if (errno == EINVAL) {
1849
+ return -3;
1850
+ } else
1851
+ #endif
1852
+ {
1853
+ return -3;
1854
+ }
1855
+ }
1856
+ return 0;
1857
+ }
1858
+ #endif /* LIBXML_ICONV_ENABLED */
1859
+
1860
+ /************************************************************************
1861
+ * *
1862
+ * ICU based generic conversion functions *
1863
+ * *
1864
+ ************************************************************************/
1865
+
1866
+ #ifdef LIBXML_ICU_ENABLED
1867
+ /**
1868
+ * xmlUconvWrapper:
1869
+ * @cd: ICU uconverter data structure
1870
+ * @toUnicode : non-zero if toUnicode. 0 otherwise.
1871
+ * @out: a pointer to an array of bytes to store the result
1872
+ * @outlen: the length of @out
1873
+ * @in: a pointer to an array of input bytes
1874
+ * @inlen: the length of @in
1875
+ * @flush: if true, indicates end of input
1876
+ *
1877
+ * Returns 0 if success, or
1878
+ * -1 by lack of space, or
1879
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
1880
+ * the result of transformation can't fit into the encoding we want), or
1881
+ * -3 if there the last byte can't form a single output char.
1882
+ *
1883
+ * The value of @inlen after return is the number of octets consumed
1884
+ * as the return value is positive, else unpredictable.
1885
+ * The value of @outlen after return is the number of octets produced.
1886
+ */
1887
+ static int
1888
+ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1889
+ const unsigned char *in, int *inlen, int flush) {
1890
+ const char *ucv_in = (const char *) in;
1891
+ char *ucv_out = (char *) out;
1892
+ UErrorCode err = U_ZERO_ERROR;
1893
+
1894
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1895
+ if (outlen != NULL) *outlen = 0;
1896
+ return(-1);
1897
+ }
1898
+
1899
+ if (toUnicode) {
1900
+ /* encoding => UTF-16 => UTF-8 */
1901
+ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1902
+ &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1903
+ &cd->pivot_source, &cd->pivot_target,
1904
+ cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1905
+ } else {
1906
+ /* UTF-8 => UTF-16 => encoding */
1907
+ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1908
+ &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1909
+ &cd->pivot_source, &cd->pivot_target,
1910
+ cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1911
+ }
1912
+ *inlen = ucv_in - (const char*) in;
1913
+ *outlen = ucv_out - (char *) out;
1914
+ if (U_SUCCESS(err)) {
1915
+ /* reset pivot buf if this is the last call for input (flush==TRUE) */
1916
+ if (flush)
1917
+ cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1918
+ return 0;
1919
+ }
1920
+ if (err == U_BUFFER_OVERFLOW_ERROR)
1921
+ return -1;
1922
+ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1923
+ return -2;
1924
+ return -3;
1925
+ }
1926
+ #endif /* LIBXML_ICU_ENABLED */
1927
+
1928
+ /************************************************************************
1929
+ * *
1930
+ * The real API used by libxml for on-the-fly conversion *
1931
+ * *
1932
+ ************************************************************************/
1933
+
1934
+ /**
1935
+ * xmlEncInputChunk:
1936
+ * @handler: encoding handler
1937
+ * @out: a pointer to an array of bytes to store the result
1938
+ * @outlen: the length of @out
1939
+ * @in: a pointer to an array of input bytes
1940
+ * @inlen: the length of @in
1941
+ * @flush: flush (ICU-related)
1942
+ *
1943
+ * Returns 0 if success, or
1944
+ * -1 by lack of space, or
1945
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
1946
+ * the result of transformation can't fit into the encoding we want), or
1947
+ * -3 if there the last byte can't form a single output char.
1948
+ *
1949
+ * The value of @inlen after return is the number of octets consumed
1950
+ * as the return value is 0, else unpredictable.
1951
+ * The value of @outlen after return is the number of octets produced.
1952
+ */
1953
+ static int
1954
+ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1955
+ int *outlen, const unsigned char *in, int *inlen, int flush) {
1956
+ int ret;
1957
+ (void)flush;
1958
+
1959
+ if (handler->input != NULL) {
1960
+ ret = handler->input(out, outlen, in, inlen);
1961
+ if (ret > 0)
1962
+ ret = 0;
1963
+ }
1964
+ #ifdef LIBXML_ICONV_ENABLED
1965
+ else if (handler->iconv_in != NULL) {
1966
+ ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1967
+ }
1968
+ #endif /* LIBXML_ICONV_ENABLED */
1969
+ #ifdef LIBXML_ICU_ENABLED
1970
+ else if (handler->uconv_in != NULL) {
1971
+ ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1972
+ flush);
1973
+ }
1974
+ #endif /* LIBXML_ICU_ENABLED */
1975
+ else {
1976
+ *outlen = 0;
1977
+ *inlen = 0;
1978
+ ret = -2;
1979
+ }
1980
+
1981
+ return(ret);
1982
+ }
1983
+
1984
+ /**
1985
+ * xmlEncOutputChunk:
1986
+ * @handler: encoding handler
1987
+ * @out: a pointer to an array of bytes to store the result
1988
+ * @outlen: the length of @out
1989
+ * @in: a pointer to an array of input bytes
1990
+ * @inlen: the length of @in
1991
+ *
1992
+ * Returns 0 if success, or
1993
+ * -1 by lack of space, or
1994
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
1995
+ * the result of transformation can't fit into the encoding we want), or
1996
+ * -3 if there the last byte can't form a single output char.
1997
+ * -4 if no output function was found.
1998
+ *
1999
+ * The value of @inlen after return is the number of octets consumed
2000
+ * as the return value is 0, else unpredictable.
2001
+ * The value of @outlen after return is the number of octets produced.
2002
+ */
2003
+ static int
2004
+ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2005
+ int *outlen, const unsigned char *in, int *inlen) {
2006
+ int ret;
2007
+
2008
+ if (handler->output != NULL) {
2009
+ ret = handler->output(out, outlen, in, inlen);
2010
+ if (ret > 0)
2011
+ ret = 0;
2012
+ }
2013
+ #ifdef LIBXML_ICONV_ENABLED
2014
+ else if (handler->iconv_out != NULL) {
2015
+ ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2016
+ }
2017
+ #endif /* LIBXML_ICONV_ENABLED */
2018
+ #ifdef LIBXML_ICU_ENABLED
2019
+ else if (handler->uconv_out != NULL) {
2020
+ ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2021
+ 1);
2022
+ }
2023
+ #endif /* LIBXML_ICU_ENABLED */
2024
+ else {
2025
+ *outlen = 0;
2026
+ *inlen = 0;
2027
+ ret = -4;
2028
+ }
2029
+
2030
+ return(ret);
2031
+ }
2032
+
2033
+ /**
2034
+ * xmlCharEncFirstLineInt:
2035
+ * @handler: char encoding transformation data structure
2036
+ * @out: an xmlBuffer for the output.
2037
+ * @in: an xmlBuffer for the input
2038
+ * @len: number of bytes to convert for the first line, or -1
2039
+ *
2040
+ * Front-end for the encoding handler input function, but handle only
2041
+ * the very first line, i.e. limit itself to 45 chars.
2042
+ *
2043
+ * Returns the number of byte written if success, or
2044
+ * -1 general error
2045
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
2046
+ * the result of transformation can't fit into the encoding we want), or
2047
+ */
2048
+ int
2049
+ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2050
+ xmlBufferPtr in, int len) {
2051
+ int ret;
2052
+ int written;
2053
+ int toconv;
2054
+
2055
+ if (handler == NULL) return(-1);
2056
+ if (out == NULL) return(-1);
2057
+ if (in == NULL) return(-1);
2058
+
2059
+ /* calculate space available */
2060
+ written = out->size - out->use - 1; /* count '\0' */
2061
+ toconv = in->use;
2062
+ /*
2063
+ * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2064
+ * 45 chars should be sufficient to reach the end of the encoding
2065
+ * declaration without going too far inside the document content.
2066
+ * on UTF-16 this means 90bytes, on UCS4 this means 180
2067
+ * The actual value depending on guessed encoding is passed as @len
2068
+ * if provided
2069
+ */
2070
+ if (len >= 0) {
2071
+ if (toconv > len)
2072
+ toconv = len;
2073
+ } else {
2074
+ if (toconv > 180)
2075
+ toconv = 180;
2076
+ }
2077
+ if (toconv * 2 >= written) {
2078
+ xmlBufferGrow(out, toconv * 2);
2079
+ written = out->size - out->use - 1;
2080
+ }
2081
+
2082
+ ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2083
+ in->content, &toconv, 0);
2084
+ xmlBufferShrink(in, toconv);
2085
+ out->use += written;
2086
+ out->content[out->use] = 0;
2087
+ if (ret == -1) ret = -3;
2088
+
2089
+ #ifdef DEBUG_ENCODING
2090
+ switch (ret) {
2091
+ case 0:
2092
+ xmlGenericError(xmlGenericErrorContext,
2093
+ "converted %d bytes to %d bytes of input\n",
2094
+ toconv, written);
2095
+ break;
2096
+ case -1:
2097
+ xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2098
+ toconv, written, in->use);
2099
+ break;
2100
+ case -2:
2101
+ xmlGenericError(xmlGenericErrorContext,
2102
+ "input conversion failed due to input error\n");
2103
+ break;
2104
+ case -3:
2105
+ xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2106
+ toconv, written, in->use);
2107
+ break;
2108
+ default:
2109
+ xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2110
+ }
2111
+ #endif /* DEBUG_ENCODING */
2112
+ /*
2113
+ * Ignore when input buffer is not on a boundary
2114
+ */
2115
+ if (ret == -3) ret = 0;
2116
+ if (ret == -1) ret = 0;
2117
+ return(written ? written : ret);
2118
+ }
2119
+
2120
+ /**
2121
+ * xmlCharEncFirstLine:
2122
+ * @handler: char encoding transformation data structure
2123
+ * @out: an xmlBuffer for the output.
2124
+ * @in: an xmlBuffer for the input
2125
+ *
2126
+ * Front-end for the encoding handler input function, but handle only
2127
+ * the very first line, i.e. limit itself to 45 chars.
2128
+ *
2129
+ * Returns the number of byte written if success, or
2130
+ * -1 general error
2131
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
2132
+ * the result of transformation can't fit into the encoding we want), or
2133
+ */
2134
+ int
2135
+ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2136
+ xmlBufferPtr in) {
2137
+ return(xmlCharEncFirstLineInt(handler, out, in, -1));
2138
+ }
2139
+
2140
+ /**
2141
+ * xmlCharEncFirstLineInput:
2142
+ * @input: a parser input buffer
2143
+ * @len: number of bytes to convert for the first line, or -1
2144
+ *
2145
+ * Front-end for the encoding handler input function, but handle only
2146
+ * the very first line. Point is that this is based on autodetection
2147
+ * of the encoding and once that first line is converted we may find
2148
+ * out that a different decoder is needed to process the input.
2149
+ *
2150
+ * Returns the number of byte written if success, or
2151
+ * -1 general error
2152
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
2153
+ * the result of transformation can't fit into the encoding we want), or
2154
+ */
2155
+ int
2156
+ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2157
+ {
2158
+ int ret;
2159
+ size_t written;
2160
+ size_t toconv;
2161
+ int c_in;
2162
+ int c_out;
2163
+ xmlBufPtr in;
2164
+ xmlBufPtr out;
2165
+
2166
+ if ((input == NULL) || (input->encoder == NULL) ||
2167
+ (input->buffer == NULL) || (input->raw == NULL))
2168
+ return (-1);
2169
+ out = input->buffer;
2170
+ in = input->raw;
2171
+
2172
+ toconv = xmlBufUse(in);
2173
+ if (toconv == 0)
2174
+ return (0);
2175
+ written = xmlBufAvail(out) - 1; /* count '\0' */
2176
+ /*
2177
+ * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2178
+ * 45 chars should be sufficient to reach the end of the encoding
2179
+ * declaration without going too far inside the document content.
2180
+ * on UTF-16 this means 90bytes, on UCS4 this means 180
2181
+ * The actual value depending on guessed encoding is passed as @len
2182
+ * if provided
2183
+ */
2184
+ if (len >= 0) {
2185
+ if (toconv > (unsigned int) len)
2186
+ toconv = len;
2187
+ } else {
2188
+ if (toconv > 180)
2189
+ toconv = 180;
2190
+ }
2191
+ if (toconv * 2 >= written) {
2192
+ xmlBufGrow(out, toconv * 2);
2193
+ written = xmlBufAvail(out) - 1;
2194
+ }
2195
+ if (written > 360)
2196
+ written = 360;
2197
+
2198
+ c_in = toconv;
2199
+ c_out = written;
2200
+ ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2201
+ xmlBufContent(in), &c_in, 0);
2202
+ xmlBufShrink(in, c_in);
2203
+ xmlBufAddLen(out, c_out);
2204
+ if (ret == -1)
2205
+ ret = -3;
2206
+
2207
+ switch (ret) {
2208
+ case 0:
2209
+ #ifdef DEBUG_ENCODING
2210
+ xmlGenericError(xmlGenericErrorContext,
2211
+ "converted %d bytes to %d bytes of input\n",
2212
+ c_in, c_out);
2213
+ #endif
2214
+ break;
2215
+ case -1:
2216
+ #ifdef DEBUG_ENCODING
2217
+ xmlGenericError(xmlGenericErrorContext,
2218
+ "converted %d bytes to %d bytes of input, %d left\n",
2219
+ c_in, c_out, (int)xmlBufUse(in));
2220
+ #endif
2221
+ break;
2222
+ case -3:
2223
+ #ifdef DEBUG_ENCODING
2224
+ xmlGenericError(xmlGenericErrorContext,
2225
+ "converted %d bytes to %d bytes of input, %d left\n",
2226
+ c_in, c_out, (int)xmlBufUse(in));
2227
+ #endif
2228
+ break;
2229
+ case -2: {
2230
+ char buf[50];
2231
+ const xmlChar *content = xmlBufContent(in);
2232
+
2233
+ snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2234
+ content[0], content[1],
2235
+ content[2], content[3]);
2236
+ buf[49] = 0;
2237
+ xmlEncodingErr(XML_I18N_CONV_FAILED,
2238
+ "input conversion failed due to input error, bytes %s\n",
2239
+ buf);
2240
+ }
2241
+ }
2242
+ /*
2243
+ * Ignore when input buffer is not on a boundary
2244
+ */
2245
+ if (ret == -3) ret = 0;
2246
+ if (ret == -1) ret = 0;
2247
+ return(c_out ? c_out : ret);
2248
+ }
2249
+
2250
+ /**
2251
+ * xmlCharEncInput:
2252
+ * @input: a parser input buffer
2253
+ * @flush: try to flush all the raw buffer
2254
+ *
2255
+ * Generic front-end for the encoding handler on parser input
2256
+ *
2257
+ * Returns the number of byte written if success, or
2258
+ * -1 general error
2259
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
2260
+ * the result of transformation can't fit into the encoding we want), or
2261
+ */
2262
+ int
2263
+ xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2264
+ {
2265
+ int ret;
2266
+ size_t written;
2267
+ size_t toconv;
2268
+ int c_in;
2269
+ int c_out;
2270
+ xmlBufPtr in;
2271
+ xmlBufPtr out;
2272
+
2273
+ if ((input == NULL) || (input->encoder == NULL) ||
2274
+ (input->buffer == NULL) || (input->raw == NULL))
2275
+ return (-1);
2276
+ out = input->buffer;
2277
+ in = input->raw;
2278
+
2279
+ toconv = xmlBufUse(in);
2280
+ if (toconv == 0)
2281
+ return (0);
2282
+ if ((toconv > 64 * 1024) && (flush == 0))
2283
+ toconv = 64 * 1024;
2284
+ written = xmlBufAvail(out);
2285
+ if (written > 0)
2286
+ written--; /* count '\0' */
2287
+ if (toconv * 2 >= written) {
2288
+ xmlBufGrow(out, toconv * 2);
2289
+ written = xmlBufAvail(out);
2290
+ if (written > 0)
2291
+ written--; /* count '\0' */
2292
+ }
2293
+ if ((written > 128 * 1024) && (flush == 0))
2294
+ written = 128 * 1024;
2295
+
2296
+ c_in = toconv;
2297
+ c_out = written;
2298
+ ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2299
+ xmlBufContent(in), &c_in, flush);
2300
+ xmlBufShrink(in, c_in);
2301
+ xmlBufAddLen(out, c_out);
2302
+ if (ret == -1)
2303
+ ret = -3;
2304
+
2305
+ switch (ret) {
2306
+ case 0:
2307
+ #ifdef DEBUG_ENCODING
2308
+ xmlGenericError(xmlGenericErrorContext,
2309
+ "converted %d bytes to %d bytes of input\n",
2310
+ c_in, c_out);
2311
+ #endif
2312
+ break;
2313
+ case -1:
2314
+ #ifdef DEBUG_ENCODING
2315
+ xmlGenericError(xmlGenericErrorContext,
2316
+ "converted %d bytes to %d bytes of input, %d left\n",
2317
+ c_in, c_out, (int)xmlBufUse(in));
2318
+ #endif
2319
+ break;
2320
+ case -3:
2321
+ #ifdef DEBUG_ENCODING
2322
+ xmlGenericError(xmlGenericErrorContext,
2323
+ "converted %d bytes to %d bytes of input, %d left\n",
2324
+ c_in, c_out, (int)xmlBufUse(in));
2325
+ #endif
2326
+ break;
2327
+ case -2: {
2328
+ char buf[50];
2329
+ const xmlChar *content = xmlBufContent(in);
2330
+
2331
+ snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2332
+ content[0], content[1],
2333
+ content[2], content[3]);
2334
+ buf[49] = 0;
2335
+ xmlEncodingErr(XML_I18N_CONV_FAILED,
2336
+ "input conversion failed due to input error, bytes %s\n",
2337
+ buf);
2338
+ }
2339
+ }
2340
+ /*
2341
+ * Ignore when input buffer is not on a boundary
2342
+ */
2343
+ if (ret == -3)
2344
+ ret = 0;
2345
+ return (c_out? c_out : ret);
2346
+ }
2347
+
2348
+ /**
2349
+ * xmlCharEncInFunc:
2350
+ * @handler: char encoding transformation data structure
2351
+ * @out: an xmlBuffer for the output.
2352
+ * @in: an xmlBuffer for the input
2353
+ *
2354
+ * Generic front-end for the encoding handler input function
2355
+ *
2356
+ * Returns the number of byte written if success, or
2357
+ * -1 general error
2358
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
2359
+ * the result of transformation can't fit into the encoding we want), or
2360
+ */
2361
+ int
2362
+ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2363
+ xmlBufferPtr in)
2364
+ {
2365
+ int ret;
2366
+ int written;
2367
+ int toconv;
2368
+
2369
+ if (handler == NULL)
2370
+ return (-1);
2371
+ if (out == NULL)
2372
+ return (-1);
2373
+ if (in == NULL)
2374
+ return (-1);
2375
+
2376
+ toconv = in->use;
2377
+ if (toconv == 0)
2378
+ return (0);
2379
+ written = out->size - out->use -1; /* count '\0' */
2380
+ if (toconv * 2 >= written) {
2381
+ xmlBufferGrow(out, out->size + toconv * 2);
2382
+ written = out->size - out->use - 1;
2383
+ }
2384
+ ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2385
+ in->content, &toconv, 1);
2386
+ xmlBufferShrink(in, toconv);
2387
+ out->use += written;
2388
+ out->content[out->use] = 0;
2389
+ if (ret == -1)
2390
+ ret = -3;
2391
+
2392
+ switch (ret) {
2393
+ case 0:
2394
+ #ifdef DEBUG_ENCODING
2395
+ xmlGenericError(xmlGenericErrorContext,
2396
+ "converted %d bytes to %d bytes of input\n",
2397
+ toconv, written);
2398
+ #endif
2399
+ break;
2400
+ case -1:
2401
+ #ifdef DEBUG_ENCODING
2402
+ xmlGenericError(xmlGenericErrorContext,
2403
+ "converted %d bytes to %d bytes of input, %d left\n",
2404
+ toconv, written, in->use);
2405
+ #endif
2406
+ break;
2407
+ case -3:
2408
+ #ifdef DEBUG_ENCODING
2409
+ xmlGenericError(xmlGenericErrorContext,
2410
+ "converted %d bytes to %d bytes of input, %d left\n",
2411
+ toconv, written, in->use);
2412
+ #endif
2413
+ break;
2414
+ case -2: {
2415
+ char buf[50];
2416
+
2417
+ snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2418
+ in->content[0], in->content[1],
2419
+ in->content[2], in->content[3]);
2420
+ buf[49] = 0;
2421
+ xmlEncodingErr(XML_I18N_CONV_FAILED,
2422
+ "input conversion failed due to input error, bytes %s\n",
2423
+ buf);
2424
+ }
2425
+ }
2426
+ /*
2427
+ * Ignore when input buffer is not on a boundary
2428
+ */
2429
+ if (ret == -3)
2430
+ ret = 0;
2431
+ return (written? written : ret);
2432
+ }
2433
+
2434
+ #ifdef LIBXML_OUTPUT_ENABLED
2435
+ /**
2436
+ * xmlCharEncOutput:
2437
+ * @output: a parser output buffer
2438
+ * @init: is this an initialization call without data
2439
+ *
2440
+ * Generic front-end for the encoding handler on parser output
2441
+ * a first call with @init == 1 has to be made first to initiate the
2442
+ * output in case of non-stateless encoding needing to initiate their
2443
+ * state or the output (like the BOM in UTF16).
2444
+ * In case of UTF8 sequence conversion errors for the given encoder,
2445
+ * the content will be automatically remapped to a CharRef sequence.
2446
+ *
2447
+ * Returns the number of byte written if success, or
2448
+ * -1 general error
2449
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
2450
+ * the result of transformation can't fit into the encoding we want), or
2451
+ */
2452
+ int
2453
+ xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2454
+ {
2455
+ int ret;
2456
+ size_t written;
2457
+ int writtentot = 0;
2458
+ size_t toconv;
2459
+ int c_in;
2460
+ int c_out;
2461
+ xmlBufPtr in;
2462
+ xmlBufPtr out;
2463
+
2464
+ if ((output == NULL) || (output->encoder == NULL) ||
2465
+ (output->buffer == NULL) || (output->conv == NULL))
2466
+ return (-1);
2467
+ out = output->conv;
2468
+ in = output->buffer;
2469
+
2470
+ retry:
2471
+
2472
+ written = xmlBufAvail(out);
2473
+ if (written > 0)
2474
+ written--; /* count '\0' */
2475
+
2476
+ /*
2477
+ * First specific handling of the initialization call
2478
+ */
2479
+ if (init) {
2480
+ c_in = 0;
2481
+ c_out = written;
2482
+ /* TODO: Check return value. */
2483
+ xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2484
+ NULL, &c_in);
2485
+ xmlBufAddLen(out, c_out);
2486
+ #ifdef DEBUG_ENCODING
2487
+ xmlGenericError(xmlGenericErrorContext,
2488
+ "initialized encoder\n");
2489
+ #endif
2490
+ return(c_out);
2491
+ }
2492
+
2493
+ /*
2494
+ * Conversion itself.
2495
+ */
2496
+ toconv = xmlBufUse(in);
2497
+ if (toconv == 0)
2498
+ return (0);
2499
+ if (toconv > 64 * 1024)
2500
+ toconv = 64 * 1024;
2501
+ if (toconv * 4 >= written) {
2502
+ xmlBufGrow(out, toconv * 4);
2503
+ written = xmlBufAvail(out) - 1;
2504
+ }
2505
+ if (written > 256 * 1024)
2506
+ written = 256 * 1024;
2507
+
2508
+ c_in = toconv;
2509
+ c_out = written;
2510
+ ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2511
+ xmlBufContent(in), &c_in);
2512
+ xmlBufShrink(in, c_in);
2513
+ xmlBufAddLen(out, c_out);
2514
+ writtentot += c_out;
2515
+ if (ret == -1) {
2516
+ if (c_out > 0) {
2517
+ /* Can be a limitation of iconv or uconv */
2518
+ goto retry;
2519
+ }
2520
+ ret = -3;
2521
+ }
2522
+
2523
+ /*
2524
+ * Attempt to handle error cases
2525
+ */
2526
+ switch (ret) {
2527
+ case 0:
2528
+ #ifdef DEBUG_ENCODING
2529
+ xmlGenericError(xmlGenericErrorContext,
2530
+ "converted %d bytes to %d bytes of output\n",
2531
+ c_in, c_out);
2532
+ #endif
2533
+ break;
2534
+ case -1:
2535
+ #ifdef DEBUG_ENCODING
2536
+ xmlGenericError(xmlGenericErrorContext,
2537
+ "output conversion failed by lack of space\n");
2538
+ #endif
2539
+ break;
2540
+ case -3:
2541
+ #ifdef DEBUG_ENCODING
2542
+ xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2543
+ c_in, c_out, (int) xmlBufUse(in));
2544
+ #endif
2545
+ break;
2546
+ case -4:
2547
+ xmlEncodingErr(XML_I18N_NO_OUTPUT,
2548
+ "xmlCharEncOutFunc: no output function !\n", NULL);
2549
+ ret = -1;
2550
+ break;
2551
+ case -2: {
2552
+ xmlChar charref[20];
2553
+ int len = (int) xmlBufUse(in);
2554
+ xmlChar *content = xmlBufContent(in);
2555
+ int cur, charrefLen;
2556
+
2557
+ cur = xmlGetUTF8Char(content, &len);
2558
+ if (cur <= 0)
2559
+ break;
2560
+
2561
+ #ifdef DEBUG_ENCODING
2562
+ xmlGenericError(xmlGenericErrorContext,
2563
+ "handling output conversion error\n");
2564
+ xmlGenericError(xmlGenericErrorContext,
2565
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2566
+ content[0], content[1],
2567
+ content[2], content[3]);
2568
+ #endif
2569
+ /*
2570
+ * Removes the UTF8 sequence, and replace it by a charref
2571
+ * and continue the transcoding phase, hoping the error
2572
+ * did not mangle the encoder state.
2573
+ */
2574
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2575
+ "&#%d;", cur);
2576
+ xmlBufShrink(in, len);
2577
+ xmlBufGrow(out, charrefLen * 4);
2578
+ c_out = xmlBufAvail(out) - 1;
2579
+ c_in = charrefLen;
2580
+ ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2581
+ charref, &c_in);
2582
+
2583
+ if ((ret < 0) || (c_in != charrefLen)) {
2584
+ char buf[50];
2585
+
2586
+ snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2587
+ content[0], content[1],
2588
+ content[2], content[3]);
2589
+ buf[49] = 0;
2590
+ xmlEncodingErr(XML_I18N_CONV_FAILED,
2591
+ "output conversion failed due to conv error, bytes %s\n",
2592
+ buf);
2593
+ if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2594
+ content[0] = ' ';
2595
+ break;
2596
+ }
2597
+
2598
+ xmlBufAddLen(out, c_out);
2599
+ writtentot += c_out;
2600
+ goto retry;
2601
+ }
2602
+ }
2603
+ return(writtentot ? writtentot : ret);
2604
+ }
2605
+ #endif
2606
+
2607
+ /**
2608
+ * xmlCharEncOutFunc:
2609
+ * @handler: char encoding transformation data structure
2610
+ * @out: an xmlBuffer for the output.
2611
+ * @in: an xmlBuffer for the input
2612
+ *
2613
+ * Generic front-end for the encoding handler output function
2614
+ * a first call with @in == NULL has to be made firs to initiate the
2615
+ * output in case of non-stateless encoding needing to initiate their
2616
+ * state or the output (like the BOM in UTF16).
2617
+ * In case of UTF8 sequence conversion errors for the given encoder,
2618
+ * the content will be automatically remapped to a CharRef sequence.
2619
+ *
2620
+ * Returns the number of byte written if success, or
2621
+ * -1 general error
2622
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
2623
+ * the result of transformation can't fit into the encoding we want), or
2624
+ */
2625
+ int
2626
+ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2627
+ xmlBufferPtr in) {
2628
+ int ret;
2629
+ int written;
2630
+ int writtentot = 0;
2631
+ int toconv;
2632
+
2633
+ if (handler == NULL) return(-1);
2634
+ if (out == NULL) return(-1);
2635
+
2636
+ retry:
2637
+
2638
+ written = out->size - out->use;
2639
+
2640
+ if (written > 0)
2641
+ written--; /* Gennady: count '/0' */
2642
+
2643
+ /*
2644
+ * First specific handling of in = NULL, i.e. the initialization call
2645
+ */
2646
+ if (in == NULL) {
2647
+ toconv = 0;
2648
+ /* TODO: Check return value. */
2649
+ xmlEncOutputChunk(handler, &out->content[out->use], &written,
2650
+ NULL, &toconv);
2651
+ out->use += written;
2652
+ out->content[out->use] = 0;
2653
+ #ifdef DEBUG_ENCODING
2654
+ xmlGenericError(xmlGenericErrorContext,
2655
+ "initialized encoder\n");
2656
+ #endif
2657
+ return(0);
2658
+ }
2659
+
2660
+ /*
2661
+ * Conversion itself.
2662
+ */
2663
+ toconv = in->use;
2664
+ if (toconv == 0)
2665
+ return(0);
2666
+ if (toconv * 4 >= written) {
2667
+ xmlBufferGrow(out, toconv * 4);
2668
+ written = out->size - out->use - 1;
2669
+ }
2670
+ ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2671
+ in->content, &toconv);
2672
+ xmlBufferShrink(in, toconv);
2673
+ out->use += written;
2674
+ writtentot += written;
2675
+ out->content[out->use] = 0;
2676
+ if (ret == -1) {
2677
+ if (written > 0) {
2678
+ /* Can be a limitation of iconv or uconv */
2679
+ goto retry;
2680
+ }
2681
+ ret = -3;
2682
+ }
2683
+
2684
+ /*
2685
+ * Attempt to handle error cases
2686
+ */
2687
+ switch (ret) {
2688
+ case 0:
2689
+ #ifdef DEBUG_ENCODING
2690
+ xmlGenericError(xmlGenericErrorContext,
2691
+ "converted %d bytes to %d bytes of output\n",
2692
+ toconv, written);
2693
+ #endif
2694
+ break;
2695
+ case -1:
2696
+ #ifdef DEBUG_ENCODING
2697
+ xmlGenericError(xmlGenericErrorContext,
2698
+ "output conversion failed by lack of space\n");
2699
+ #endif
2700
+ break;
2701
+ case -3:
2702
+ #ifdef DEBUG_ENCODING
2703
+ xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2704
+ toconv, written, in->use);
2705
+ #endif
2706
+ break;
2707
+ case -4:
2708
+ xmlEncodingErr(XML_I18N_NO_OUTPUT,
2709
+ "xmlCharEncOutFunc: no output function !\n", NULL);
2710
+ ret = -1;
2711
+ break;
2712
+ case -2: {
2713
+ xmlChar charref[20];
2714
+ int len = in->use;
2715
+ const xmlChar *utf = (const xmlChar *) in->content;
2716
+ int cur, charrefLen;
2717
+
2718
+ cur = xmlGetUTF8Char(utf, &len);
2719
+ if (cur <= 0)
2720
+ break;
2721
+
2722
+ #ifdef DEBUG_ENCODING
2723
+ xmlGenericError(xmlGenericErrorContext,
2724
+ "handling output conversion error\n");
2725
+ xmlGenericError(xmlGenericErrorContext,
2726
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2727
+ in->content[0], in->content[1],
2728
+ in->content[2], in->content[3]);
2729
+ #endif
2730
+ /*
2731
+ * Removes the UTF8 sequence, and replace it by a charref
2732
+ * and continue the transcoding phase, hoping the error
2733
+ * did not mangle the encoder state.
2734
+ */
2735
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2736
+ "&#%d;", cur);
2737
+ xmlBufferShrink(in, len);
2738
+ xmlBufferGrow(out, charrefLen * 4);
2739
+ written = out->size - out->use - 1;
2740
+ toconv = charrefLen;
2741
+ ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2742
+ charref, &toconv);
2743
+
2744
+ if ((ret < 0) || (toconv != charrefLen)) {
2745
+ char buf[50];
2746
+
2747
+ snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2748
+ in->content[0], in->content[1],
2749
+ in->content[2], in->content[3]);
2750
+ buf[49] = 0;
2751
+ xmlEncodingErr(XML_I18N_CONV_FAILED,
2752
+ "output conversion failed due to conv error, bytes %s\n",
2753
+ buf);
2754
+ if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2755
+ in->content[0] = ' ';
2756
+ break;
2757
+ }
2758
+
2759
+ out->use += written;
2760
+ writtentot += written;
2761
+ out->content[out->use] = 0;
2762
+ goto retry;
2763
+ }
2764
+ }
2765
+ return(writtentot ? writtentot : ret);
2766
+ }
2767
+
2768
+ /**
2769
+ * xmlCharEncCloseFunc:
2770
+ * @handler: char encoding transformation data structure
2771
+ *
2772
+ * Generic front-end for encoding handler close function
2773
+ *
2774
+ * Returns 0 if success, or -1 in case of error
2775
+ */
2776
+ int
2777
+ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2778
+ int ret = 0;
2779
+ int tofree = 0;
2780
+ int i, handler_in_list = 0;
2781
+
2782
+ if (handler == NULL) return(-1);
2783
+ if (handler->name == NULL) return(-1);
2784
+ if (handlers != NULL) {
2785
+ for (i = 0;i < nbCharEncodingHandler; i++) {
2786
+ if (handler == handlers[i]) {
2787
+ handler_in_list = 1;
2788
+ break;
2789
+ }
2790
+ }
2791
+ }
2792
+ #ifdef LIBXML_ICONV_ENABLED
2793
+ /*
2794
+ * Iconv handlers can be used only once, free the whole block.
2795
+ * and the associated icon resources.
2796
+ */
2797
+ if ((handler_in_list == 0) &&
2798
+ ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2799
+ tofree = 1;
2800
+ if (handler->iconv_out != NULL) {
2801
+ if (iconv_close(handler->iconv_out))
2802
+ ret = -1;
2803
+ handler->iconv_out = NULL;
2804
+ }
2805
+ if (handler->iconv_in != NULL) {
2806
+ if (iconv_close(handler->iconv_in))
2807
+ ret = -1;
2808
+ handler->iconv_in = NULL;
2809
+ }
2810
+ }
2811
+ #endif /* LIBXML_ICONV_ENABLED */
2812
+ #ifdef LIBXML_ICU_ENABLED
2813
+ if ((handler_in_list == 0) &&
2814
+ ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2815
+ tofree = 1;
2816
+ if (handler->uconv_out != NULL) {
2817
+ closeIcuConverter(handler->uconv_out);
2818
+ handler->uconv_out = NULL;
2819
+ }
2820
+ if (handler->uconv_in != NULL) {
2821
+ closeIcuConverter(handler->uconv_in);
2822
+ handler->uconv_in = NULL;
2823
+ }
2824
+ }
2825
+ #endif
2826
+ if (tofree) {
2827
+ /* free up only dynamic handlers iconv/uconv */
2828
+ if (handler->name != NULL)
2829
+ xmlFree(handler->name);
2830
+ handler->name = NULL;
2831
+ xmlFree(handler);
2832
+ }
2833
+ #ifdef DEBUG_ENCODING
2834
+ if (ret)
2835
+ xmlGenericError(xmlGenericErrorContext,
2836
+ "failed to close the encoding handler\n");
2837
+ else
2838
+ xmlGenericError(xmlGenericErrorContext,
2839
+ "closed the encoding handler\n");
2840
+ #endif
2841
+
2842
+ return(ret);
2843
+ }
2844
+
2845
+ /**
2846
+ * xmlByteConsumed:
2847
+ * @ctxt: an XML parser context
2848
+ *
2849
+ * This function provides the current index of the parser relative
2850
+ * to the start of the current entity. This function is computed in
2851
+ * bytes from the beginning starting at zero and finishing at the
2852
+ * size in byte of the file if parsing a file. The function is
2853
+ * of constant cost if the input is UTF-8 but can be costly if run
2854
+ * on non-UTF-8 input.
2855
+ *
2856
+ * Returns the index in bytes from the beginning of the entity or -1
2857
+ * in case the index could not be computed.
2858
+ */
2859
+ long
2860
+ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2861
+ xmlParserInputPtr in;
2862
+
2863
+ if (ctxt == NULL) return(-1);
2864
+ in = ctxt->input;
2865
+ if (in == NULL) return(-1);
2866
+ if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2867
+ unsigned int unused = 0;
2868
+ xmlCharEncodingHandler * handler = in->buf->encoder;
2869
+ /*
2870
+ * Encoding conversion, compute the number of unused original
2871
+ * bytes from the input not consumed and subtract that from
2872
+ * the raw consumed value, this is not a cheap operation
2873
+ */
2874
+ if (in->end - in->cur > 0) {
2875
+ unsigned char convbuf[32000];
2876
+ const unsigned char *cur = (const unsigned char *)in->cur;
2877
+ int toconv = in->end - in->cur, written = 32000;
2878
+
2879
+ int ret;
2880
+
2881
+ do {
2882
+ toconv = in->end - cur;
2883
+ written = 32000;
2884
+ ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2885
+ cur, &toconv);
2886
+ if (ret < 0) {
2887
+ if (written > 0)
2888
+ ret = -2;
2889
+ else
2890
+ return(-1);
2891
+ }
2892
+ unused += written;
2893
+ cur += toconv;
2894
+ } while (ret == -2);
2895
+ }
2896
+ if (in->buf->rawconsumed < unused)
2897
+ return(-1);
2898
+ return(in->buf->rawconsumed - unused);
2899
+ }
2900
+ return(in->consumed + (in->cur - in->base));
2901
+ }
2902
+
2903
+ #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2904
+ #ifdef LIBXML_ISO8859X_ENABLED
2905
+
2906
+ /**
2907
+ * UTF8ToISO8859x:
2908
+ * @out: a pointer to an array of bytes to store the result
2909
+ * @outlen: the length of @out
2910
+ * @in: a pointer to an array of UTF-8 chars
2911
+ * @inlen: the length of @in
2912
+ * @xlattable: the 2-level transcoding table
2913
+ *
2914
+ * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2915
+ * block of chars out.
2916
+ *
2917
+ * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2918
+ * The value of @inlen after return is the number of octets consumed
2919
+ * as the return value is positive, else unpredictable.
2920
+ * The value of @outlen after return is the number of octets consumed.
2921
+ */
2922
+ static int
2923
+ UTF8ToISO8859x(unsigned char* out, int *outlen,
2924
+ const unsigned char* in, int *inlen,
2925
+ unsigned char const *xlattable) {
2926
+ const unsigned char* outstart = out;
2927
+ const unsigned char* inend;
2928
+ const unsigned char* instart = in;
2929
+ const unsigned char* processed = in;
2930
+
2931
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2932
+ (xlattable == NULL))
2933
+ return(-1);
2934
+ if (in == NULL) {
2935
+ /*
2936
+ * initialization nothing to do
2937
+ */
2938
+ *outlen = 0;
2939
+ *inlen = 0;
2940
+ return(0);
2941
+ }
2942
+ inend = in + (*inlen);
2943
+ while (in < inend) {
2944
+ unsigned char d = *in++;
2945
+ if (d < 0x80) {
2946
+ *out++ = d;
2947
+ } else if (d < 0xC0) {
2948
+ /* trailing byte in leading position */
2949
+ *outlen = out - outstart;
2950
+ *inlen = processed - instart;
2951
+ return(-2);
2952
+ } else if (d < 0xE0) {
2953
+ unsigned char c;
2954
+ if (!(in < inend)) {
2955
+ /* trailing byte not in input buffer */
2956
+ *outlen = out - outstart;
2957
+ *inlen = processed - instart;
2958
+ return(-3);
2959
+ }
2960
+ c = *in++;
2961
+ if ((c & 0xC0) != 0x80) {
2962
+ /* not a trailing byte */
2963
+ *outlen = out - outstart;
2964
+ *inlen = processed - instart;
2965
+ return(-2);
2966
+ }
2967
+ c = c & 0x3F;
2968
+ d = d & 0x1F;
2969
+ d = xlattable [48 + c + xlattable [d] * 64];
2970
+ if (d == 0) {
2971
+ /* not in character set */
2972
+ *outlen = out - outstart;
2973
+ *inlen = processed - instart;
2974
+ return(-2);
2975
+ }
2976
+ *out++ = d;
2977
+ } else if (d < 0xF0) {
2978
+ unsigned char c1;
2979
+ unsigned char c2;
2980
+ if (!(in < inend - 1)) {
2981
+ /* trailing bytes not in input buffer */
2982
+ *outlen = out - outstart;
2983
+ *inlen = processed - instart;
2984
+ return(-3);
2985
+ }
2986
+ c1 = *in++;
2987
+ if ((c1 & 0xC0) != 0x80) {
2988
+ /* not a trailing byte (c1) */
2989
+ *outlen = out - outstart;
2990
+ *inlen = processed - instart;
2991
+ return(-2);
2992
+ }
2993
+ c2 = *in++;
2994
+ if ((c2 & 0xC0) != 0x80) {
2995
+ /* not a trailing byte (c2) */
2996
+ *outlen = out - outstart;
2997
+ *inlen = processed - instart;
2998
+ return(-2);
2999
+ }
3000
+ c1 = c1 & 0x3F;
3001
+ c2 = c2 & 0x3F;
3002
+ d = d & 0x0F;
3003
+ d = xlattable [48 + c2 + xlattable [48 + c1 +
3004
+ xlattable [32 + d] * 64] * 64];
3005
+ if (d == 0) {
3006
+ /* not in character set */
3007
+ *outlen = out - outstart;
3008
+ *inlen = processed - instart;
3009
+ return(-2);
3010
+ }
3011
+ *out++ = d;
3012
+ } else {
3013
+ /* cannot transcode >= U+010000 */
3014
+ *outlen = out - outstart;
3015
+ *inlen = processed - instart;
3016
+ return(-2);
3017
+ }
3018
+ processed = in;
3019
+ }
3020
+ *outlen = out - outstart;
3021
+ *inlen = processed - instart;
3022
+ return(*outlen);
3023
+ }
3024
+
3025
+ /**
3026
+ * ISO8859xToUTF8
3027
+ * @out: a pointer to an array of bytes to store the result
3028
+ * @outlen: the length of @out
3029
+ * @in: a pointer to an array of ISO Latin 1 chars
3030
+ * @inlen: the length of @in
3031
+ *
3032
+ * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3033
+ * block of chars out.
3034
+ * Returns 0 if success, or -1 otherwise
3035
+ * The value of @inlen after return is the number of octets consumed
3036
+ * The value of @outlen after return is the number of octets produced.
3037
+ */
3038
+ static int
3039
+ ISO8859xToUTF8(unsigned char* out, int *outlen,
3040
+ const unsigned char* in, int *inlen,
3041
+ unsigned short const *unicodetable) {
3042
+ unsigned char* outstart = out;
3043
+ unsigned char* outend;
3044
+ const unsigned char* instart = in;
3045
+ const unsigned char* inend;
3046
+ const unsigned char* instop;
3047
+ unsigned int c;
3048
+
3049
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3050
+ (in == NULL) || (unicodetable == NULL))
3051
+ return(-1);
3052
+ outend = out + *outlen;
3053
+ inend = in + *inlen;
3054
+ instop = inend;
3055
+
3056
+ while ((in < inend) && (out < outend - 2)) {
3057
+ if (*in >= 0x80) {
3058
+ c = unicodetable [*in - 0x80];
3059
+ if (c == 0) {
3060
+ /* undefined code point */
3061
+ *outlen = out - outstart;
3062
+ *inlen = in - instart;
3063
+ return (-1);
3064
+ }
3065
+ if (c < 0x800) {
3066
+ *out++ = ((c >> 6) & 0x1F) | 0xC0;
3067
+ *out++ = (c & 0x3F) | 0x80;
3068
+ } else {
3069
+ *out++ = ((c >> 12) & 0x0F) | 0xE0;
3070
+ *out++ = ((c >> 6) & 0x3F) | 0x80;
3071
+ *out++ = (c & 0x3F) | 0x80;
3072
+ }
3073
+ ++in;
3074
+ }
3075
+ if (instop - in > outend - out) instop = in + (outend - out);
3076
+ while ((*in < 0x80) && (in < instop)) {
3077
+ *out++ = *in++;
3078
+ }
3079
+ }
3080
+ if ((in < inend) && (out < outend) && (*in < 0x80)) {
3081
+ *out++ = *in++;
3082
+ }
3083
+ if ((in < inend) && (out < outend) && (*in < 0x80)) {
3084
+ *out++ = *in++;
3085
+ }
3086
+ *outlen = out - outstart;
3087
+ *inlen = in - instart;
3088
+ return (*outlen);
3089
+ }
3090
+
3091
+
3092
+ /************************************************************************
3093
+ * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3094
+ ************************************************************************/
3095
+
3096
+ static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3097
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3098
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3099
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3100
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3101
+ 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3102
+ 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3103
+ 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3104
+ 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3105
+ 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3106
+ 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3107
+ 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3108
+ 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3109
+ 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3110
+ 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3111
+ 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3112
+ 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3113
+ };
3114
+
3115
+ static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3116
+ "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3117
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3124
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3125
+ "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3126
+ "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3127
+ "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3128
+ "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3129
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3131
+ "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3132
+ "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3133
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135
+ "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3136
+ "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3137
+ "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3138
+ "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3139
+ "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3140
+ "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3141
+ "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3142
+ "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3143
+ };
3144
+
3145
+ static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3146
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3147
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3148
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3149
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3150
+ 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3151
+ 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3152
+ 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3153
+ 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3154
+ 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3155
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3156
+ 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3157
+ 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3158
+ 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3159
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3160
+ 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3161
+ 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3162
+ };
3163
+
3164
+ static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3165
+ "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3166
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3173
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3174
+ "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3175
+ "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3176
+ "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3177
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3178
+ "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3179
+ "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181
+ "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3182
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184
+ "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3190
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3191
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3192
+ "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3193
+ "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3194
+ "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3195
+ "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3196
+ };
3197
+
3198
+ static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3199
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3200
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3201
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3202
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3203
+ 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3204
+ 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3205
+ 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3206
+ 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3207
+ 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3208
+ 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3209
+ 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3210
+ 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3211
+ 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3212
+ 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3213
+ 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3214
+ 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3215
+ };
3216
+
3217
+ static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3218
+ "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3219
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3226
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3227
+ "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3228
+ "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3229
+ "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3230
+ "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3231
+ "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3232
+ "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3233
+ "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3234
+ "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3235
+ "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3236
+ "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3237
+ "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3238
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3239
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241
+ "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3242
+ "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3243
+ "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3244
+ "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3245
+ };
3246
+
3247
+ static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3248
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3249
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3250
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3251
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3252
+ 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3253
+ 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3254
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3255
+ 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3256
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3257
+ 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3258
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3259
+ 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3260
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3261
+ 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3262
+ 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3263
+ 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3264
+ };
3265
+
3266
+ static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3267
+ "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
+ "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
+ "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3275
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3276
+ "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3277
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278
+ "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3279
+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3280
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3281
+ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3282
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3283
+ "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3284
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286
+ "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291
+ "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294
+ };
3295
+
3296
+ static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3297
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3298
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3299
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3300
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3301
+ 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3302
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3303
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3304
+ 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3305
+ 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3306
+ 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3307
+ 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3308
+ 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3309
+ 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3310
+ 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3311
+ 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3312
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3313
+ };
3314
+
3315
+ static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3316
+ "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3318
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3324
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3325
+ "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3326
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327
+ "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3332
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3333
+ "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3334
+ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3335
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3336
+ "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339
+ };
3340
+
3341
+ static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3342
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3343
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3344
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3345
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3346
+ 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3347
+ 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3348
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3349
+ 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3350
+ 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3351
+ 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3352
+ 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3353
+ 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3354
+ 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3355
+ 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3356
+ 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3357
+ 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3358
+ };
3359
+
3360
+ static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3361
+ "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3362
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
+ "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3369
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3370
+ "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3371
+ "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3372
+ "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377
+ "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3378
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380
+ "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384
+ "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3385
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3386
+ "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3387
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3388
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3389
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392
+ };
3393
+
3394
+ static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3395
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3396
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3397
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3398
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3399
+ 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3400
+ 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3401
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3402
+ 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3403
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3404
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3405
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3406
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3407
+ 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3408
+ 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3409
+ 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3410
+ 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3411
+ };
3412
+
3413
+ static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3414
+ "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415
+ "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3416
+ "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3422
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3423
+ "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3424
+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3425
+ "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430
+ "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3431
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432
+ "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3433
+ "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3438
+ "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3439
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3443
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3444
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445
+ };
3446
+
3447
+ static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3448
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3449
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3450
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3451
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3452
+ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3453
+ 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3454
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3455
+ 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3456
+ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3457
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3458
+ 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3459
+ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3460
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3461
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3462
+ 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3463
+ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3464
+ };
3465
+
3466
+ static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3467
+ "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3475
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3476
+ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3477
+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3478
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3479
+ "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3480
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3481
+ "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3482
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3483
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3484
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485
+ "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3488
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490
+ };
3491
+
3492
+ static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3493
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3494
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3495
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3496
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3497
+ 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3498
+ 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3499
+ 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3500
+ 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3501
+ 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3502
+ 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3503
+ 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3504
+ 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3505
+ 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3506
+ 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3507
+ 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3508
+ 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3509
+ };
3510
+
3511
+ static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3512
+ "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514
+ "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3520
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3521
+ "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3522
+ "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3523
+ "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3524
+ "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3525
+ "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3526
+ "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3527
+ "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3528
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529
+ "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3530
+ "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3531
+ "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536
+ "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539
+ "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3540
+ "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3541
+ "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3542
+ "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3543
+ };
3544
+
3545
+ static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3546
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3547
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3548
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3549
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3550
+ 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3551
+ 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3552
+ 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3553
+ 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3554
+ 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3555
+ 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3556
+ 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3557
+ 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3558
+ 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3559
+ 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3560
+ 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3561
+ 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3562
+ };
3563
+
3564
+ static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3565
+ "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567
+ "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3573
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3574
+ "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3580
+ "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3581
+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3582
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3583
+ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3584
+ "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3585
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3586
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3587
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3589
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3590
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592
+ };
3593
+
3594
+ static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3595
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3596
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3597
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3598
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3599
+ 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3600
+ 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3601
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3602
+ 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3603
+ 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3604
+ 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3605
+ 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3606
+ 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3607
+ 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3608
+ 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3609
+ 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3610
+ 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3611
+ };
3612
+
3613
+ static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3614
+ "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
+ "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3622
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3623
+ "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3624
+ "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3625
+ "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3631
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633
+ "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3634
+ "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3635
+ "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3636
+ "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3637
+ "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3638
+ "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3639
+ "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3640
+ "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3641
+ "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3642
+ "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3643
+ "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3644
+ "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3645
+ };
3646
+
3647
+ static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3648
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3649
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3650
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3651
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3652
+ 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3653
+ 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3654
+ 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3655
+ 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3656
+ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3657
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3658
+ 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3659
+ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3660
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3661
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3662
+ 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3663
+ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3664
+ };
3665
+
3666
+ static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3667
+ "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3668
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669
+ "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3675
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3676
+ "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3677
+ "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3682
+ "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3683
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3684
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3687
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688
+ "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690
+ "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697
+ "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701
+ "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3702
+ "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703
+ "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3704
+ "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3705
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3706
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3707
+ "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3708
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3709
+ "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3710
+ };
3711
+
3712
+ static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3713
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3714
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3715
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3716
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3717
+ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3718
+ 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3719
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3720
+ 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3721
+ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3722
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3723
+ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3724
+ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3725
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3726
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3727
+ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3728
+ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3729
+ };
3730
+
3731
+ static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3732
+ "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3733
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734
+ "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3740
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3741
+ "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3742
+ "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3743
+ "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3750
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752
+ "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753
+ "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754
+ "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3755
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3756
+ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3757
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3758
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3759
+ };
3760
+
3761
+ static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3762
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3763
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3764
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3765
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3766
+ 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3767
+ 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3768
+ 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3769
+ 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3770
+ 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3771
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3772
+ 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3773
+ 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3774
+ 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3775
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3776
+ 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3777
+ 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3778
+ };
3779
+
3780
+ static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3781
+ "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3782
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783
+ "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3786
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3789
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3790
+ "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3791
+ "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3792
+ "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3793
+ "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3794
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796
+ "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797
+ "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3798
+ "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799
+ "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3800
+ "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3803
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3807
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3810
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3811
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3812
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813
+ "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3814
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3815
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816
+ "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3817
+ "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3818
+ "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3819
+ "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3820
+ };
3821
+
3822
+
3823
+ /*
3824
+ * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3825
+ */
3826
+
3827
+ static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3828
+ const unsigned char* in, int *inlen) {
3829
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3830
+ }
3831
+ static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3832
+ const unsigned char* in, int *inlen) {
3833
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3834
+ }
3835
+
3836
+ static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3837
+ const unsigned char* in, int *inlen) {
3838
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3839
+ }
3840
+ static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3841
+ const unsigned char* in, int *inlen) {
3842
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3843
+ }
3844
+
3845
+ static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3846
+ const unsigned char* in, int *inlen) {
3847
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3848
+ }
3849
+ static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3850
+ const unsigned char* in, int *inlen) {
3851
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3852
+ }
3853
+
3854
+ static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3855
+ const unsigned char* in, int *inlen) {
3856
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3857
+ }
3858
+ static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3859
+ const unsigned char* in, int *inlen) {
3860
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3861
+ }
3862
+
3863
+ static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3864
+ const unsigned char* in, int *inlen) {
3865
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3866
+ }
3867
+ static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3868
+ const unsigned char* in, int *inlen) {
3869
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3870
+ }
3871
+
3872
+ static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3873
+ const unsigned char* in, int *inlen) {
3874
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3875
+ }
3876
+ static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3877
+ const unsigned char* in, int *inlen) {
3878
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3879
+ }
3880
+
3881
+ static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3882
+ const unsigned char* in, int *inlen) {
3883
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3884
+ }
3885
+ static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3886
+ const unsigned char* in, int *inlen) {
3887
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3888
+ }
3889
+
3890
+ static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3891
+ const unsigned char* in, int *inlen) {
3892
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3893
+ }
3894
+ static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3895
+ const unsigned char* in, int *inlen) {
3896
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3897
+ }
3898
+
3899
+ static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3900
+ const unsigned char* in, int *inlen) {
3901
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3902
+ }
3903
+ static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3904
+ const unsigned char* in, int *inlen) {
3905
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3906
+ }
3907
+
3908
+ static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3909
+ const unsigned char* in, int *inlen) {
3910
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3911
+ }
3912
+ static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3913
+ const unsigned char* in, int *inlen) {
3914
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3915
+ }
3916
+
3917
+ static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3918
+ const unsigned char* in, int *inlen) {
3919
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3920
+ }
3921
+ static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3922
+ const unsigned char* in, int *inlen) {
3923
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3924
+ }
3925
+
3926
+ static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3927
+ const unsigned char* in, int *inlen) {
3928
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3929
+ }
3930
+ static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3931
+ const unsigned char* in, int *inlen) {
3932
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3933
+ }
3934
+
3935
+ static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3936
+ const unsigned char* in, int *inlen) {
3937
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3938
+ }
3939
+ static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3940
+ const unsigned char* in, int *inlen) {
3941
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3942
+ }
3943
+
3944
+ static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3945
+ const unsigned char* in, int *inlen) {
3946
+ return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3947
+ }
3948
+ static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3949
+ const unsigned char* in, int *inlen) {
3950
+ return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3951
+ }
3952
+
3953
+ static void
3954
+ xmlRegisterCharEncodingHandlersISO8859x (void) {
3955
+ xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3956
+ xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3957
+ xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3958
+ xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3959
+ xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3960
+ xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3961
+ xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3962
+ xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3963
+ xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3964
+ xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3965
+ xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3966
+ xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3967
+ xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3968
+ xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3969
+ }
3970
+
3971
+ #endif
3972
+ #endif
3973
+
3974
+ #define bottom_encoding
3975
+ #include "elfgcchack.h"