nokolexbor 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/extconf.rb +9 -5
- data/ext/nokolexbor/nl_attribute.c +46 -0
- data/ext/nokolexbor/nl_cdata.c +8 -0
- data/ext/nokolexbor/nl_comment.c +6 -0
- data/ext/nokolexbor/nl_document.c +53 -7
- data/ext/nokolexbor/nl_document_fragment.c +9 -0
- data/ext/nokolexbor/nl_error.c +21 -19
- data/ext/nokolexbor/nl_node.c +255 -50
- data/ext/nokolexbor/nl_node_set.c +56 -1
- data/ext/nokolexbor/nl_processing_instruction.c +6 -0
- data/ext/nokolexbor/nl_text.c +6 -0
- data/ext/nokolexbor/nokolexbor.h +1 -0
- data/lib/nokolexbor/document.rb +52 -5
- data/lib/nokolexbor/document_fragment.rb +11 -0
- data/lib/nokolexbor/node.rb +367 -18
- data/lib/nokolexbor/node_set.rb +56 -0
- data/lib/nokolexbor/version.rb +1 -1
- metadata +2 -24
- data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
- data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
- data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
- data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
- data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
- data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
- data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
- data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
- data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
- data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
- data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
- data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
- data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
- data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
- data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
- data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
- data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
- data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
- data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
- data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
@@ -1,3193 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* Copyright (C) 2019 Alexander Borisov
|
3
|
-
*
|
4
|
-
* Author: Alexander Borisov <borisov@lexbor.com>
|
5
|
-
*/
|
6
|
-
|
7
|
-
#include "lexbor/encoding/decode.h"
|
8
|
-
#include "lexbor/encoding/single.h"
|
9
|
-
#include "lexbor/encoding/multi.h"
|
10
|
-
#include "lexbor/encoding/range.h"
|
11
|
-
|
12
|
-
|
13
|
-
#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY(_lower, _upper, _cont) \
|
14
|
-
{ \
|
15
|
-
ch = *p; \
|
16
|
-
\
|
17
|
-
if (ch < _lower || ch > _upper) { \
|
18
|
-
ctx->u.utf_8.lower = 0x00; \
|
19
|
-
ctx->u.utf_8.need = 0; \
|
20
|
-
\
|
21
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN { \
|
22
|
-
*data = p; \
|
23
|
-
ctx->have_error = true; \
|
24
|
-
} \
|
25
|
-
LXB_ENCODING_DECODE_ERROR_END(); \
|
26
|
-
\
|
27
|
-
_cont; \
|
28
|
-
} \
|
29
|
-
else { \
|
30
|
-
p++; \
|
31
|
-
need--; \
|
32
|
-
ctx->codepoint = (ctx->codepoint << 6) | (ch & 0x3F); \
|
33
|
-
} \
|
34
|
-
}
|
35
|
-
|
36
|
-
#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET(first, two, f_lower, s_upper) \
|
37
|
-
do { \
|
38
|
-
if (ch == first) { \
|
39
|
-
ctx->u.utf_8.lower = f_lower; \
|
40
|
-
ctx->u.utf_8.upper = 0xBF; \
|
41
|
-
} \
|
42
|
-
else if (ch == two) { \
|
43
|
-
ctx->u.utf_8.lower = 0x80; \
|
44
|
-
ctx->u.utf_8.upper = s_upper; \
|
45
|
-
} \
|
46
|
-
} \
|
47
|
-
while (0)
|
48
|
-
|
49
|
-
#define LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, cp) \
|
50
|
-
do { \
|
51
|
-
(ctx)->buffer_out[(ctx)->buffer_used++] = (cp); \
|
52
|
-
} \
|
53
|
-
while (0)
|
54
|
-
|
55
|
-
#define LXB_ENCODING_DECODE_APPEND(ctx, cp) \
|
56
|
-
do { \
|
57
|
-
if ((ctx)->buffer_used >= (ctx)->buffer_length) { \
|
58
|
-
return LXB_STATUS_SMALL_BUFFER; \
|
59
|
-
} \
|
60
|
-
\
|
61
|
-
(ctx)->buffer_out[(ctx)->buffer_used++] = (cp); \
|
62
|
-
} \
|
63
|
-
while (0)
|
64
|
-
|
65
|
-
#define LXB_ENCODING_DECODE_APPEND_P(ctx, cp) \
|
66
|
-
do { \
|
67
|
-
if ((ctx)->buffer_used >= (ctx)->buffer_length) { \
|
68
|
-
*data = p; \
|
69
|
-
return LXB_STATUS_SMALL_BUFFER; \
|
70
|
-
} \
|
71
|
-
\
|
72
|
-
(ctx)->buffer_out[(ctx)->buffer_used++] = (cp); \
|
73
|
-
} \
|
74
|
-
while (0)
|
75
|
-
|
76
|
-
#define LXB_ENCODING_DECODE_CHECK_OUT(ctx) \
|
77
|
-
do { \
|
78
|
-
if ((ctx)->buffer_used >= (ctx)->buffer_length) { \
|
79
|
-
return LXB_STATUS_SMALL_BUFFER; \
|
80
|
-
} \
|
81
|
-
} \
|
82
|
-
while (0)
|
83
|
-
|
84
|
-
#define LXB_ENCODING_DECODE_ERROR_BEGIN \
|
85
|
-
do { \
|
86
|
-
if (ctx->replace_to == NULL) { \
|
87
|
-
return LXB_STATUS_ERROR; \
|
88
|
-
} \
|
89
|
-
\
|
90
|
-
if ((ctx->buffer_used + ctx->replace_len) > ctx->buffer_length) { \
|
91
|
-
do
|
92
|
-
|
93
|
-
#define LXB_ENCODING_DECODE_ERROR_END() \
|
94
|
-
while (0); \
|
95
|
-
\
|
96
|
-
return LXB_STATUS_SMALL_BUFFER; \
|
97
|
-
} \
|
98
|
-
\
|
99
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used], ctx->replace_to, \
|
100
|
-
sizeof(lxb_codepoint_t) * ctx->replace_len); \
|
101
|
-
\
|
102
|
-
ctx->buffer_used += ctx->replace_len; \
|
103
|
-
} \
|
104
|
-
while (0)
|
105
|
-
|
106
|
-
#define LXB_ENCODING_DECODE_ERROR(ctx) \
|
107
|
-
do { \
|
108
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN { \
|
109
|
-
} LXB_ENCODING_DECODE_ERROR_END(); \
|
110
|
-
} \
|
111
|
-
while (0)
|
112
|
-
|
113
|
-
#define LXB_ENCODING_DECODE_FAILED(ident) \
|
114
|
-
do { \
|
115
|
-
if ((byte) < (0x80)) { \
|
116
|
-
(*data)--; \
|
117
|
-
} \
|
118
|
-
\
|
119
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN { \
|
120
|
-
ctx->have_error = true; \
|
121
|
-
(ident) = 0x01; \
|
122
|
-
} \
|
123
|
-
LXB_ENCODING_DECODE_ERROR_END(); \
|
124
|
-
} \
|
125
|
-
while (0)
|
126
|
-
|
127
|
-
#define LXB_ENCODING_DECODE_SINGLE(decode_map) \
|
128
|
-
do { \
|
129
|
-
const lxb_char_t *p = *data; \
|
130
|
-
\
|
131
|
-
while (p < end) { \
|
132
|
-
if (*p < 0x80) { \
|
133
|
-
LXB_ENCODING_DECODE_APPEND_P(ctx, *p++); \
|
134
|
-
} \
|
135
|
-
else { \
|
136
|
-
ctx->codepoint = decode_map[(*p++) - 0x80].codepoint; \
|
137
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) { \
|
138
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN { \
|
139
|
-
*data = p - 1; \
|
140
|
-
} \
|
141
|
-
LXB_ENCODING_DECODE_ERROR_END(); \
|
142
|
-
continue; \
|
143
|
-
} \
|
144
|
-
\
|
145
|
-
LXB_ENCODING_DECODE_APPEND_P(ctx, ctx->codepoint); \
|
146
|
-
} \
|
147
|
-
\
|
148
|
-
*data = p; \
|
149
|
-
} \
|
150
|
-
} \
|
151
|
-
while (0)
|
152
|
-
|
153
|
-
#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SINGLE(lower, upper) \
|
154
|
-
do { \
|
155
|
-
ch = **data; \
|
156
|
-
\
|
157
|
-
if (ch < lower || ch > upper) { \
|
158
|
-
goto failed; \
|
159
|
-
} \
|
160
|
-
\
|
161
|
-
(*data)++; \
|
162
|
-
needed--; \
|
163
|
-
ctx->codepoint = (ctx->codepoint << 6) | (ch & 0x3F); \
|
164
|
-
} \
|
165
|
-
while (0)
|
166
|
-
|
167
|
-
#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET_SINGLE(first, two, f_lower, \
|
168
|
-
s_upper) \
|
169
|
-
do { \
|
170
|
-
if (ch == first) { \
|
171
|
-
ctx->u.utf_8.lower = f_lower; \
|
172
|
-
ctx->u.utf_8.upper = 0xBF; \
|
173
|
-
} \
|
174
|
-
else if (ch == two) { \
|
175
|
-
ctx->u.utf_8.lower = 0x80; \
|
176
|
-
ctx->u.utf_8.upper = s_upper; \
|
177
|
-
} \
|
178
|
-
} \
|
179
|
-
while (0)
|
180
|
-
|
181
|
-
|
182
|
-
lxb_status_t
|
183
|
-
lxb_encoding_decode_default(lxb_encoding_decode_t *ctx,
|
184
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
185
|
-
{
|
186
|
-
return lxb_encoding_decode_utf_8(ctx, data, end);
|
187
|
-
}
|
188
|
-
|
189
|
-
lxb_status_t
|
190
|
-
lxb_encoding_decode_auto(lxb_encoding_decode_t *ctx,
|
191
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
192
|
-
{
|
193
|
-
*data = end;
|
194
|
-
return LXB_STATUS_ERROR;
|
195
|
-
}
|
196
|
-
|
197
|
-
lxb_status_t
|
198
|
-
lxb_encoding_decode_undefined(lxb_encoding_decode_t *ctx,
|
199
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
200
|
-
{
|
201
|
-
*data = end;
|
202
|
-
return LXB_STATUS_ERROR;
|
203
|
-
}
|
204
|
-
|
205
|
-
lxb_status_t
|
206
|
-
lxb_encoding_decode_big5(lxb_encoding_decode_t *ctx,
|
207
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
208
|
-
{
|
209
|
-
uint32_t index;
|
210
|
-
lxb_char_t lead, byte;
|
211
|
-
|
212
|
-
ctx->status = LXB_STATUS_OK;
|
213
|
-
|
214
|
-
if (ctx->u.lead != 0x00) {
|
215
|
-
if (ctx->have_error) {
|
216
|
-
ctx->u.lead = 0x00;
|
217
|
-
ctx->have_error = false;
|
218
|
-
|
219
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
220
|
-
ctx->u.lead = 0x01;
|
221
|
-
ctx->have_error = true;
|
222
|
-
} LXB_ENCODING_DECODE_ERROR_END();
|
223
|
-
}
|
224
|
-
else if (ctx->second_codepoint != 0x0000) {
|
225
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
226
|
-
return LXB_STATUS_SMALL_BUFFER;
|
227
|
-
}
|
228
|
-
|
229
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->u.lead);
|
230
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->second_codepoint);
|
231
|
-
|
232
|
-
ctx->u.lead = 0x00;
|
233
|
-
ctx->second_codepoint = 0x0000;
|
234
|
-
}
|
235
|
-
else {
|
236
|
-
if (*data >= end) {
|
237
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
238
|
-
|
239
|
-
return LXB_STATUS_CONTINUE;
|
240
|
-
}
|
241
|
-
|
242
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
243
|
-
|
244
|
-
lead = (lxb_char_t) ctx->u.lead;
|
245
|
-
ctx->u.lead = 0x00;
|
246
|
-
|
247
|
-
goto lead_state;
|
248
|
-
}
|
249
|
-
}
|
250
|
-
|
251
|
-
while (*data < end) {
|
252
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
253
|
-
|
254
|
-
lead = *(*data)++;
|
255
|
-
|
256
|
-
if (lead < 0x80) {
|
257
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, lead);
|
258
|
-
continue;
|
259
|
-
}
|
260
|
-
|
261
|
-
if ((unsigned) (lead - 0x81) > (0xFE - 0x81)) {
|
262
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
263
|
-
(*data)--;
|
264
|
-
}
|
265
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
266
|
-
|
267
|
-
continue;
|
268
|
-
}
|
269
|
-
|
270
|
-
if (*data >= end) {
|
271
|
-
ctx->u.lead = lead;
|
272
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
273
|
-
|
274
|
-
return LXB_STATUS_CONTINUE;
|
275
|
-
}
|
276
|
-
|
277
|
-
lead_state:
|
278
|
-
|
279
|
-
index = 0;
|
280
|
-
byte = *(*data)++;
|
281
|
-
|
282
|
-
if ((unsigned) (byte - 0x40) <= (0x7E - 0x40)
|
283
|
-
|| (unsigned) (byte - 0xA1) <= (0xFE - 0xA1))
|
284
|
-
{
|
285
|
-
if (byte < 0x7F) {
|
286
|
-
/* Max index == (0xFE - 0x81) * 157 + (0x7E - 0x62) == 19653 */
|
287
|
-
index = (lead - 0x81) * 157 + (byte - 0x40);
|
288
|
-
}
|
289
|
-
else {
|
290
|
-
/* Max index == (0xFE - 0x81) * 157 + (0xFE - 0x62) == 19781 */
|
291
|
-
index = (lead - 0x81) * 157 + (byte - 0x62);
|
292
|
-
}
|
293
|
-
}
|
294
|
-
|
295
|
-
/*
|
296
|
-
* 1133 U+00CA U+0304 Ê̄ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON)
|
297
|
-
* 1135 U+00CA U+030C Ê̌ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON)
|
298
|
-
* 1164 U+00EA U+0304 ê̄ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON)
|
299
|
-
* 1166 U+00EA U+030C ê̌ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON)
|
300
|
-
*/
|
301
|
-
switch (index) {
|
302
|
-
case 1133:
|
303
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
304
|
-
ctx->u.lead = 0x00CA;
|
305
|
-
ctx->second_codepoint = 0x0304;
|
306
|
-
|
307
|
-
return LXB_STATUS_SMALL_BUFFER;
|
308
|
-
}
|
309
|
-
|
310
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x00CA);
|
311
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x0304);
|
312
|
-
|
313
|
-
continue;
|
314
|
-
|
315
|
-
case 1135:
|
316
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
317
|
-
ctx->u.lead = 0x00CA;
|
318
|
-
ctx->second_codepoint = 0x030C;
|
319
|
-
|
320
|
-
return LXB_STATUS_SMALL_BUFFER;
|
321
|
-
}
|
322
|
-
|
323
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x00CA);
|
324
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x030C);
|
325
|
-
|
326
|
-
continue;
|
327
|
-
|
328
|
-
case 1164:
|
329
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
330
|
-
ctx->u.lead = 0x00EA;
|
331
|
-
ctx->second_codepoint = 0x0304;
|
332
|
-
|
333
|
-
return LXB_STATUS_SMALL_BUFFER;
|
334
|
-
}
|
335
|
-
|
336
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x00EA);
|
337
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x0304);
|
338
|
-
|
339
|
-
continue;
|
340
|
-
|
341
|
-
case 1166:
|
342
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
343
|
-
ctx->u.lead = 0x00EA;
|
344
|
-
ctx->second_codepoint = 0x030C;
|
345
|
-
|
346
|
-
return LXB_STATUS_SMALL_BUFFER;
|
347
|
-
}
|
348
|
-
|
349
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x00EA);
|
350
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x030C);
|
351
|
-
|
352
|
-
continue;
|
353
|
-
|
354
|
-
case 0:
|
355
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.lead);
|
356
|
-
continue;
|
357
|
-
}
|
358
|
-
|
359
|
-
ctx->codepoint = lxb_encoding_multi_index_big5[index].codepoint;
|
360
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
361
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.lead);
|
362
|
-
continue;
|
363
|
-
}
|
364
|
-
|
365
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->codepoint);
|
366
|
-
}
|
367
|
-
|
368
|
-
return LXB_STATUS_OK;
|
369
|
-
}
|
370
|
-
|
371
|
-
lxb_status_t
|
372
|
-
lxb_encoding_decode_euc_jp(lxb_encoding_decode_t *ctx,
|
373
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
374
|
-
{
|
375
|
-
bool is_jis0212;
|
376
|
-
lxb_char_t byte, lead;
|
377
|
-
|
378
|
-
ctx->status = LXB_STATUS_OK;
|
379
|
-
|
380
|
-
if (ctx->u.euc_jp.lead != 0x00) {
|
381
|
-
if (ctx->have_error) {
|
382
|
-
ctx->have_error = false;
|
383
|
-
ctx->u.euc_jp.lead = 0x00;
|
384
|
-
|
385
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
386
|
-
ctx->have_error = true;
|
387
|
-
ctx->u.euc_jp.lead = 0x01;
|
388
|
-
} LXB_ENCODING_DECODE_ERROR_END();
|
389
|
-
}
|
390
|
-
else {
|
391
|
-
if (*data >= end) {
|
392
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
393
|
-
|
394
|
-
return LXB_STATUS_CONTINUE;
|
395
|
-
}
|
396
|
-
|
397
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
398
|
-
|
399
|
-
lead = ctx->u.euc_jp.lead;
|
400
|
-
byte = *(*data)++;
|
401
|
-
|
402
|
-
ctx->u.euc_jp.lead = 0x00;
|
403
|
-
|
404
|
-
if (ctx->u.euc_jp.is_jis0212) {
|
405
|
-
is_jis0212 = true;
|
406
|
-
ctx->u.euc_jp.is_jis0212 = false;
|
407
|
-
|
408
|
-
goto lead_jis_state;
|
409
|
-
}
|
410
|
-
|
411
|
-
goto lead_state;
|
412
|
-
}
|
413
|
-
}
|
414
|
-
|
415
|
-
while (*data < end) {
|
416
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
417
|
-
|
418
|
-
lead = *(*data)++;
|
419
|
-
|
420
|
-
if (lead < 0x80) {
|
421
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, lead);
|
422
|
-
continue;
|
423
|
-
}
|
424
|
-
|
425
|
-
if ((unsigned) (lead - 0xA1) > (0xFE - 0xA1)
|
426
|
-
&& (lead != 0x8E && lead != 0x8F))
|
427
|
-
{
|
428
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
429
|
-
(*data)--;
|
430
|
-
}
|
431
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
432
|
-
|
433
|
-
continue;
|
434
|
-
}
|
435
|
-
|
436
|
-
if (*data >= end) {
|
437
|
-
ctx->u.euc_jp.lead = lead;
|
438
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
439
|
-
|
440
|
-
return LXB_STATUS_CONTINUE;
|
441
|
-
}
|
442
|
-
|
443
|
-
byte = *(*data)++;
|
444
|
-
|
445
|
-
lead_state:
|
446
|
-
|
447
|
-
if (lead == 0x8E && (unsigned) (byte - 0xA1) <= (0xDF - 0xA1)) {
|
448
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0xFF61 - 0xA1 + byte);
|
449
|
-
continue;
|
450
|
-
}
|
451
|
-
|
452
|
-
is_jis0212 = false;
|
453
|
-
|
454
|
-
if (lead == 0x8F && (unsigned) (byte - 0xA1) <= (0xFE - 0xA1)) {
|
455
|
-
if (*data >= end) {
|
456
|
-
ctx->u.euc_jp.lead = byte;
|
457
|
-
ctx->u.euc_jp.is_jis0212 = true;
|
458
|
-
|
459
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
460
|
-
|
461
|
-
return LXB_STATUS_CONTINUE;
|
462
|
-
}
|
463
|
-
|
464
|
-
lead = byte;
|
465
|
-
byte = *(*data)++;
|
466
|
-
is_jis0212 = true;
|
467
|
-
}
|
468
|
-
|
469
|
-
lead_jis_state:
|
470
|
-
|
471
|
-
if ((unsigned) (lead - 0xA1) > (0xFE - 0xA1)
|
472
|
-
|| (unsigned) (byte - 0xA1) > (0xFE - 0xA1))
|
473
|
-
{
|
474
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.euc_jp.lead);
|
475
|
-
continue;
|
476
|
-
}
|
477
|
-
|
478
|
-
/* Max index == (0xFE - 0xA1) * 94 + 0xFE - 0xA1 == 8835 */
|
479
|
-
ctx->codepoint = (lead - 0xA1) * 94 + byte - 0xA1;
|
480
|
-
|
481
|
-
if (is_jis0212) {
|
482
|
-
if ((sizeof(lxb_encoding_multi_index_jis0212)
|
483
|
-
/ sizeof(lxb_encoding_multi_index_t)) <= ctx->codepoint)
|
484
|
-
{
|
485
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.euc_jp.lead);
|
486
|
-
continue;
|
487
|
-
}
|
488
|
-
|
489
|
-
ctx->codepoint = lxb_encoding_multi_index_jis0212[ctx->codepoint].codepoint;
|
490
|
-
}
|
491
|
-
else {
|
492
|
-
if ((sizeof(lxb_encoding_multi_index_jis0208)
|
493
|
-
/ sizeof(lxb_encoding_multi_index_t)) <= ctx->codepoint)
|
494
|
-
{
|
495
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.euc_jp.lead);
|
496
|
-
continue;
|
497
|
-
}
|
498
|
-
|
499
|
-
ctx->codepoint = lxb_encoding_multi_index_jis0208[ctx->codepoint].codepoint;
|
500
|
-
}
|
501
|
-
|
502
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
503
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.euc_jp.lead);
|
504
|
-
continue;
|
505
|
-
}
|
506
|
-
|
507
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->codepoint);
|
508
|
-
}
|
509
|
-
|
510
|
-
return LXB_STATUS_OK;
|
511
|
-
}
|
512
|
-
|
513
|
-
lxb_status_t
|
514
|
-
lxb_encoding_decode_euc_kr(lxb_encoding_decode_t *ctx,
|
515
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
516
|
-
{
|
517
|
-
lxb_char_t lead, byte;
|
518
|
-
|
519
|
-
ctx->status = LXB_STATUS_OK;
|
520
|
-
|
521
|
-
if (ctx->u.lead != 0x00) {
|
522
|
-
if (ctx->have_error) {
|
523
|
-
ctx->have_error = false;
|
524
|
-
ctx->u.lead = 0x00;
|
525
|
-
|
526
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
527
|
-
ctx->have_error = true;
|
528
|
-
ctx->u.lead = 0x01;
|
529
|
-
} LXB_ENCODING_DECODE_ERROR_END();
|
530
|
-
}
|
531
|
-
else {
|
532
|
-
if (*data >= end) {
|
533
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
534
|
-
|
535
|
-
return LXB_STATUS_CONTINUE;
|
536
|
-
}
|
537
|
-
|
538
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
539
|
-
|
540
|
-
lead = (lxb_char_t) ctx->u.lead;
|
541
|
-
ctx->u.lead = 0x00;
|
542
|
-
|
543
|
-
goto lead_state;
|
544
|
-
}
|
545
|
-
}
|
546
|
-
|
547
|
-
while (*data < end) {
|
548
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
549
|
-
|
550
|
-
lead = *(*data)++;
|
551
|
-
|
552
|
-
if (lead < 0x80) {
|
553
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, lead);
|
554
|
-
continue;
|
555
|
-
}
|
556
|
-
|
557
|
-
if ((unsigned) (lead - 0x81) > (0xFE - 0x81)) {
|
558
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
559
|
-
(*data)--;
|
560
|
-
}
|
561
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
562
|
-
|
563
|
-
continue;
|
564
|
-
}
|
565
|
-
|
566
|
-
if (*data == end) {
|
567
|
-
ctx->u.lead = lead;
|
568
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
569
|
-
|
570
|
-
return LXB_STATUS_CONTINUE;
|
571
|
-
}
|
572
|
-
|
573
|
-
lead_state:
|
574
|
-
|
575
|
-
byte = *(*data)++;
|
576
|
-
|
577
|
-
if ((unsigned) (byte - 0x41) > (0xFE - 0x41)) {
|
578
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.lead);
|
579
|
-
continue;
|
580
|
-
}
|
581
|
-
|
582
|
-
/* Max index == (0xFE - 0x81) * 190 + (0xFE - 0x41) == 23939 */
|
583
|
-
ctx->codepoint = (lead - 0x81) * 190 + (byte - 0x41);
|
584
|
-
|
585
|
-
if (ctx->codepoint >= sizeof(lxb_encoding_multi_index_euc_kr)
|
586
|
-
/ sizeof(lxb_encoding_multi_index_t))
|
587
|
-
{
|
588
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.lead);
|
589
|
-
continue;
|
590
|
-
}
|
591
|
-
|
592
|
-
ctx->codepoint = lxb_encoding_multi_index_euc_kr[ctx->codepoint].codepoint;
|
593
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
594
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.lead);
|
595
|
-
continue;
|
596
|
-
}
|
597
|
-
|
598
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->codepoint);
|
599
|
-
}
|
600
|
-
|
601
|
-
return LXB_STATUS_OK;
|
602
|
-
}
|
603
|
-
|
604
|
-
lxb_status_t
|
605
|
-
lxb_encoding_decode_gbk(lxb_encoding_decode_t *ctx,
|
606
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
607
|
-
{
|
608
|
-
return lxb_encoding_decode_gb18030(ctx, data, end);
|
609
|
-
}
|
610
|
-
|
611
|
-
lxb_status_t
|
612
|
-
lxb_encoding_decode_ibm866(lxb_encoding_decode_t *ctx,
|
613
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
614
|
-
{
|
615
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_ibm866);
|
616
|
-
|
617
|
-
return LXB_STATUS_OK;
|
618
|
-
}
|
619
|
-
|
620
|
-
lxb_status_t
|
621
|
-
lxb_encoding_decode_iso_2022_jp(lxb_encoding_decode_t *ctx,
|
622
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
623
|
-
{
|
624
|
-
#define LXB_ENCODING_DECODE_ISO_2022_JP_OK() \
|
625
|
-
do { \
|
626
|
-
if (*data >= end) { \
|
627
|
-
return LXB_STATUS_OK; \
|
628
|
-
} \
|
629
|
-
} \
|
630
|
-
while (0)
|
631
|
-
|
632
|
-
#define LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE() \
|
633
|
-
do { \
|
634
|
-
if (*data >= end) { \
|
635
|
-
ctx->status = LXB_STATUS_CONTINUE; \
|
636
|
-
return LXB_STATUS_CONTINUE; \
|
637
|
-
} \
|
638
|
-
} \
|
639
|
-
while (0)
|
640
|
-
|
641
|
-
|
642
|
-
lxb_char_t byte;
|
643
|
-
lxb_encoding_ctx_2022_jp_t *iso = &ctx->u.iso_2022_jp;
|
644
|
-
|
645
|
-
ctx->status = LXB_STATUS_OK;
|
646
|
-
|
647
|
-
if (ctx->have_error) {
|
648
|
-
ctx->have_error = false;
|
649
|
-
|
650
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
651
|
-
ctx->have_error = true;
|
652
|
-
}
|
653
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
654
|
-
}
|
655
|
-
|
656
|
-
if (iso->prepand != 0x00) {
|
657
|
-
if (*data >= end) {
|
658
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
659
|
-
|
660
|
-
return LXB_STATUS_CONTINUE;
|
661
|
-
}
|
662
|
-
|
663
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
664
|
-
|
665
|
-
byte = iso->prepand;
|
666
|
-
iso->prepand = 0x00;
|
667
|
-
|
668
|
-
goto prepand;
|
669
|
-
}
|
670
|
-
|
671
|
-
if (*data >= end) {
|
672
|
-
return LXB_STATUS_OK;
|
673
|
-
}
|
674
|
-
|
675
|
-
do {
|
676
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
677
|
-
|
678
|
-
byte = *(*data)++;
|
679
|
-
|
680
|
-
prepand:
|
681
|
-
|
682
|
-
switch (iso->state) {
|
683
|
-
case LXB_ENCODING_DECODE_2022_JP_ASCII:
|
684
|
-
if (byte == 0x1B) {
|
685
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
686
|
-
|
687
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE();
|
688
|
-
break;
|
689
|
-
}
|
690
|
-
|
691
|
-
/* 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B */
|
692
|
-
if ((unsigned) (byte - 0x00) <= (0x7F - 0x00)
|
693
|
-
&& byte != 0x0E && byte != 0x0F)
|
694
|
-
{
|
695
|
-
iso->out_flag = false;
|
696
|
-
|
697
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, byte);
|
698
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
699
|
-
break;
|
700
|
-
}
|
701
|
-
|
702
|
-
iso->out_flag = false;
|
703
|
-
|
704
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
705
|
-
ctx->have_error = true;
|
706
|
-
}
|
707
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
708
|
-
|
709
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
710
|
-
break;
|
711
|
-
|
712
|
-
case LXB_ENCODING_DECODE_2022_JP_ROMAN:
|
713
|
-
switch (byte) {
|
714
|
-
case 0x1B:
|
715
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
716
|
-
|
717
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE();
|
718
|
-
continue;
|
719
|
-
|
720
|
-
case 0x5C:
|
721
|
-
iso->out_flag = false;
|
722
|
-
|
723
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x00A5);
|
724
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
725
|
-
|
726
|
-
continue;
|
727
|
-
|
728
|
-
case 0x7E:
|
729
|
-
iso->out_flag = false;
|
730
|
-
|
731
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x203E);
|
732
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
733
|
-
|
734
|
-
continue;
|
735
|
-
|
736
|
-
case 0x0E:
|
737
|
-
case 0x0F:
|
738
|
-
break;
|
739
|
-
|
740
|
-
default:
|
741
|
-
/* 0x00 to 0x7F */
|
742
|
-
if ((unsigned) (byte - 0x00) <= (0x7F - 0x00)) {
|
743
|
-
iso->out_flag = false;
|
744
|
-
|
745
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, byte);
|
746
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
747
|
-
|
748
|
-
continue;
|
749
|
-
}
|
750
|
-
|
751
|
-
break;
|
752
|
-
}
|
753
|
-
|
754
|
-
iso->out_flag = false;
|
755
|
-
|
756
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
757
|
-
ctx->have_error = true;
|
758
|
-
}
|
759
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
760
|
-
|
761
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
762
|
-
break;
|
763
|
-
|
764
|
-
case LXB_ENCODING_DECODE_2022_JP_KATAKANA:
|
765
|
-
if (byte == 0x1B) {
|
766
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
767
|
-
|
768
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE();
|
769
|
-
break;
|
770
|
-
}
|
771
|
-
|
772
|
-
/* 0x21 to 0x5F */
|
773
|
-
if ((unsigned) (byte - 0x21) <= (0x5F - 0x21)) {
|
774
|
-
iso->out_flag = false;
|
775
|
-
|
776
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx,
|
777
|
-
0xFF61 - 0x21 + byte);
|
778
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
779
|
-
break;
|
780
|
-
}
|
781
|
-
|
782
|
-
iso->out_flag = false;
|
783
|
-
|
784
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
785
|
-
ctx->have_error = true;
|
786
|
-
}
|
787
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
788
|
-
|
789
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
790
|
-
break;
|
791
|
-
|
792
|
-
case LXB_ENCODING_DECODE_2022_JP_LEAD:
|
793
|
-
if (byte == 0x1B) {
|
794
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
795
|
-
|
796
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE();
|
797
|
-
break;
|
798
|
-
}
|
799
|
-
|
800
|
-
/* 0x21 to 0x7E */
|
801
|
-
if ((unsigned) (byte - 0x21) <= (0x7E - 0x21)) {
|
802
|
-
iso->out_flag = false;
|
803
|
-
iso->lead = byte;
|
804
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_TRAIL;
|
805
|
-
|
806
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE();
|
807
|
-
break;
|
808
|
-
}
|
809
|
-
|
810
|
-
iso->out_flag = false;
|
811
|
-
|
812
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
813
|
-
ctx->have_error = true;
|
814
|
-
}
|
815
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
816
|
-
|
817
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
818
|
-
break;
|
819
|
-
|
820
|
-
case LXB_ENCODING_DECODE_2022_JP_TRAIL:
|
821
|
-
if (byte == 0x1B) {
|
822
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
823
|
-
|
824
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
825
|
-
ctx->have_error = true;
|
826
|
-
}
|
827
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
828
|
-
|
829
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
830
|
-
break;
|
831
|
-
}
|
832
|
-
|
833
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_LEAD;
|
834
|
-
|
835
|
-
/* 0x21 to 0x7E */
|
836
|
-
if ((unsigned) (byte - 0x21) <= (0x7E - 0x21)) {
|
837
|
-
/* Max index == (0x7E - 0x21) * 94 + 0x7E - 0x21 == 8835 */
|
838
|
-
ctx->codepoint = (iso->lead - 0x21) * 94 + byte - 0x21;
|
839
|
-
|
840
|
-
ctx->codepoint = lxb_encoding_multi_index_jis0208[ctx->codepoint].codepoint;
|
841
|
-
|
842
|
-
if (ctx->codepoint != LXB_ENCODING_ERROR_CODEPOINT) {
|
843
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->codepoint);
|
844
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
845
|
-
|
846
|
-
break;
|
847
|
-
}
|
848
|
-
}
|
849
|
-
|
850
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
851
|
-
iso->prepand = 0x01;
|
852
|
-
ctx->have_error = true;
|
853
|
-
}
|
854
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
855
|
-
|
856
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
857
|
-
break;
|
858
|
-
|
859
|
-
case LXB_ENCODING_DECODE_2022_JP_ESCAPE_START:
|
860
|
-
if (byte == 0x24 || byte == 0x28) {
|
861
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE;
|
862
|
-
iso->lead = byte;
|
863
|
-
|
864
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE();
|
865
|
-
break;
|
866
|
-
}
|
867
|
-
|
868
|
-
(*data)--;
|
869
|
-
|
870
|
-
iso->out_flag = false;
|
871
|
-
iso->state = ctx->u.iso_2022_jp.out_state;
|
872
|
-
|
873
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
874
|
-
iso->prepand = 0x01;
|
875
|
-
ctx->have_error = true;
|
876
|
-
}
|
877
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
878
|
-
|
879
|
-
break;
|
880
|
-
|
881
|
-
case LXB_ENCODING_DECODE_2022_JP_ESCAPE:
|
882
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_UNSET;
|
883
|
-
|
884
|
-
if (iso->lead == 0x28) {
|
885
|
-
if (byte == 0x42) {
|
886
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ASCII;
|
887
|
-
}
|
888
|
-
else if (byte == 0x4A) {
|
889
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ROMAN;
|
890
|
-
}
|
891
|
-
else if (byte == 0x49) {
|
892
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_KATAKANA;
|
893
|
-
}
|
894
|
-
}
|
895
|
-
else if (iso->lead == 0x24) {
|
896
|
-
if (byte == 0x40 || byte == 0x42) {
|
897
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_LEAD;
|
898
|
-
}
|
899
|
-
}
|
900
|
-
|
901
|
-
if (iso->state == LXB_ENCODING_DECODE_2022_JP_UNSET) {
|
902
|
-
(*data)--;
|
903
|
-
|
904
|
-
iso->out_flag = false;
|
905
|
-
iso->state = iso->out_state;
|
906
|
-
|
907
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
908
|
-
iso->prepand = iso->lead;
|
909
|
-
iso->lead = 0x00;
|
910
|
-
|
911
|
-
ctx->have_error = true;
|
912
|
-
}
|
913
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
914
|
-
|
915
|
-
byte = iso->lead;
|
916
|
-
iso->lead = 0x00;
|
917
|
-
|
918
|
-
goto prepand;
|
919
|
-
}
|
920
|
-
|
921
|
-
iso->lead = 0x00;
|
922
|
-
iso->out_state = iso->state;
|
923
|
-
|
924
|
-
if (iso->out_flag) {
|
925
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
926
|
-
ctx->have_error = true;
|
927
|
-
}
|
928
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
929
|
-
|
930
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_OK();
|
931
|
-
break;
|
932
|
-
}
|
933
|
-
|
934
|
-
iso->out_flag = true;
|
935
|
-
|
936
|
-
LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE();
|
937
|
-
break;
|
938
|
-
}
|
939
|
-
}
|
940
|
-
while (true);
|
941
|
-
|
942
|
-
return LXB_STATUS_OK;
|
943
|
-
|
944
|
-
#undef LXB_ENCODING_DECODE_ISO_2022_JP_OK
|
945
|
-
#undef LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE
|
946
|
-
}
|
947
|
-
|
948
|
-
lxb_status_t
|
949
|
-
lxb_encoding_decode_iso_8859_10(lxb_encoding_decode_t *ctx,
|
950
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
951
|
-
{
|
952
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_10);
|
953
|
-
|
954
|
-
return LXB_STATUS_OK;
|
955
|
-
}
|
956
|
-
|
957
|
-
lxb_status_t
|
958
|
-
lxb_encoding_decode_iso_8859_13(lxb_encoding_decode_t *ctx,
|
959
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
960
|
-
{
|
961
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_13);
|
962
|
-
|
963
|
-
return LXB_STATUS_OK;
|
964
|
-
}
|
965
|
-
|
966
|
-
lxb_status_t
|
967
|
-
lxb_encoding_decode_iso_8859_14(lxb_encoding_decode_t *ctx,
|
968
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
969
|
-
{
|
970
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_14);
|
971
|
-
|
972
|
-
return LXB_STATUS_OK;
|
973
|
-
}
|
974
|
-
|
975
|
-
lxb_status_t
|
976
|
-
lxb_encoding_decode_iso_8859_15(lxb_encoding_decode_t *ctx,
|
977
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
978
|
-
{
|
979
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_15);
|
980
|
-
|
981
|
-
return LXB_STATUS_OK;
|
982
|
-
}
|
983
|
-
|
984
|
-
lxb_status_t
|
985
|
-
lxb_encoding_decode_iso_8859_16(lxb_encoding_decode_t *ctx,
|
986
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
987
|
-
{
|
988
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_16);
|
989
|
-
|
990
|
-
return LXB_STATUS_OK;
|
991
|
-
}
|
992
|
-
|
993
|
-
lxb_status_t
|
994
|
-
lxb_encoding_decode_iso_8859_2(lxb_encoding_decode_t *ctx,
|
995
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
996
|
-
{
|
997
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_2);
|
998
|
-
|
999
|
-
return LXB_STATUS_OK;
|
1000
|
-
}
|
1001
|
-
|
1002
|
-
lxb_status_t
|
1003
|
-
lxb_encoding_decode_iso_8859_3(lxb_encoding_decode_t *ctx,
|
1004
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1005
|
-
{
|
1006
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_3);
|
1007
|
-
|
1008
|
-
return LXB_STATUS_OK;
|
1009
|
-
}
|
1010
|
-
|
1011
|
-
lxb_status_t
|
1012
|
-
lxb_encoding_decode_iso_8859_4(lxb_encoding_decode_t *ctx,
|
1013
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1014
|
-
{
|
1015
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_4);
|
1016
|
-
|
1017
|
-
return LXB_STATUS_OK;
|
1018
|
-
}
|
1019
|
-
|
1020
|
-
lxb_status_t
|
1021
|
-
lxb_encoding_decode_iso_8859_5(lxb_encoding_decode_t *ctx,
|
1022
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1023
|
-
{
|
1024
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_5);
|
1025
|
-
|
1026
|
-
return LXB_STATUS_OK;
|
1027
|
-
}
|
1028
|
-
|
1029
|
-
lxb_status_t
|
1030
|
-
lxb_encoding_decode_iso_8859_6(lxb_encoding_decode_t *ctx,
|
1031
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1032
|
-
{
|
1033
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_6);
|
1034
|
-
|
1035
|
-
return LXB_STATUS_OK;
|
1036
|
-
}
|
1037
|
-
|
1038
|
-
lxb_status_t
|
1039
|
-
lxb_encoding_decode_iso_8859_7(lxb_encoding_decode_t *ctx,
|
1040
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1041
|
-
{
|
1042
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_7);
|
1043
|
-
|
1044
|
-
return LXB_STATUS_OK;
|
1045
|
-
}
|
1046
|
-
|
1047
|
-
lxb_status_t
|
1048
|
-
lxb_encoding_decode_iso_8859_8(lxb_encoding_decode_t *ctx,
|
1049
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1050
|
-
{
|
1051
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_8);
|
1052
|
-
|
1053
|
-
return LXB_STATUS_OK;
|
1054
|
-
}
|
1055
|
-
|
1056
|
-
lxb_status_t
|
1057
|
-
lxb_encoding_decode_iso_8859_8_i(lxb_encoding_decode_t *ctx,
|
1058
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1059
|
-
{
|
1060
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_iso_8859_8);
|
1061
|
-
|
1062
|
-
return LXB_STATUS_OK;
|
1063
|
-
}
|
1064
|
-
|
1065
|
-
lxb_status_t
|
1066
|
-
lxb_encoding_decode_koi8_r(lxb_encoding_decode_t *ctx,
|
1067
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1068
|
-
{
|
1069
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_koi8_r);
|
1070
|
-
|
1071
|
-
return LXB_STATUS_OK;
|
1072
|
-
}
|
1073
|
-
|
1074
|
-
lxb_status_t
|
1075
|
-
lxb_encoding_decode_koi8_u(lxb_encoding_decode_t *ctx,
|
1076
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1077
|
-
{
|
1078
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_koi8_u);
|
1079
|
-
|
1080
|
-
return LXB_STATUS_OK;
|
1081
|
-
}
|
1082
|
-
|
1083
|
-
lxb_status_t
|
1084
|
-
lxb_encoding_decode_shift_jis(lxb_encoding_decode_t *ctx,
|
1085
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1086
|
-
{
|
1087
|
-
lxb_char_t byte, lead;
|
1088
|
-
|
1089
|
-
ctx->status = LXB_STATUS_OK;
|
1090
|
-
|
1091
|
-
if (ctx->u.lead != 0x00) {
|
1092
|
-
if (ctx->have_error) {
|
1093
|
-
ctx->have_error = false;
|
1094
|
-
ctx->u.lead = 0x00;
|
1095
|
-
|
1096
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1097
|
-
ctx->have_error = true;
|
1098
|
-
ctx->u.lead = 0x01;
|
1099
|
-
} LXB_ENCODING_DECODE_ERROR_END();
|
1100
|
-
}
|
1101
|
-
else {
|
1102
|
-
if (*data >= end) {
|
1103
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1104
|
-
|
1105
|
-
return LXB_STATUS_CONTINUE;
|
1106
|
-
}
|
1107
|
-
|
1108
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
1109
|
-
|
1110
|
-
lead = (lxb_char_t) ctx->u.lead;
|
1111
|
-
ctx->u.lead = 0x00;
|
1112
|
-
|
1113
|
-
goto lead_state;
|
1114
|
-
}
|
1115
|
-
}
|
1116
|
-
|
1117
|
-
while (*data < end) {
|
1118
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
1119
|
-
|
1120
|
-
lead = *(*data)++;
|
1121
|
-
|
1122
|
-
if (lead <= 0x80) {
|
1123
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, lead);
|
1124
|
-
continue;
|
1125
|
-
}
|
1126
|
-
|
1127
|
-
if ((unsigned) (lead - 0xA1) <= (0xDF - 0xA1)) {
|
1128
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0xFF61 - 0xA1 + lead);
|
1129
|
-
continue;
|
1130
|
-
}
|
1131
|
-
|
1132
|
-
if ((unsigned) (lead - 0x81) > (0x9F - 0x81)
|
1133
|
-
&& lead != 0xE0 && lead != 0xFC)
|
1134
|
-
{
|
1135
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1136
|
-
(*data)--;
|
1137
|
-
}
|
1138
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1139
|
-
|
1140
|
-
continue;
|
1141
|
-
}
|
1142
|
-
|
1143
|
-
if (*data >= end) {
|
1144
|
-
ctx->u.lead = lead;
|
1145
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1146
|
-
|
1147
|
-
return LXB_STATUS_CONTINUE;
|
1148
|
-
}
|
1149
|
-
|
1150
|
-
lead_state:
|
1151
|
-
|
1152
|
-
byte = *(*data)++;
|
1153
|
-
|
1154
|
-
if (byte < 0x7F) {
|
1155
|
-
ctx->codepoint = 0x40;
|
1156
|
-
}
|
1157
|
-
else {
|
1158
|
-
ctx->codepoint = 0x41;
|
1159
|
-
}
|
1160
|
-
|
1161
|
-
if (lead < 0xA0) {
|
1162
|
-
ctx->second_codepoint = 0x81;
|
1163
|
-
}
|
1164
|
-
else {
|
1165
|
-
ctx->second_codepoint = 0xC1;
|
1166
|
-
}
|
1167
|
-
|
1168
|
-
if ((unsigned) (byte - 0x40) > (0x7E - 0x40)
|
1169
|
-
&& (unsigned) (byte - 0x80) > (0xFC - 0x80))
|
1170
|
-
{
|
1171
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.lead);
|
1172
|
-
continue;
|
1173
|
-
}
|
1174
|
-
|
1175
|
-
/* Max index == (0xFC - 0xC1) * 188 + 0xFC - 0x41 = 11279 */
|
1176
|
-
ctx->codepoint = (lead - ctx->second_codepoint) * 188
|
1177
|
-
+ byte - ctx->codepoint;
|
1178
|
-
|
1179
|
-
if (ctx->codepoint >= (sizeof(lxb_encoding_multi_index_jis0208)
|
1180
|
-
/ sizeof(lxb_encoding_multi_index_t)))
|
1181
|
-
{
|
1182
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.lead);
|
1183
|
-
continue;
|
1184
|
-
}
|
1185
|
-
|
1186
|
-
if ((unsigned) (ctx->codepoint - 8836) <= (10715 - 8836)) {
|
1187
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0xE000 - 8836 + ctx->codepoint);
|
1188
|
-
continue;
|
1189
|
-
}
|
1190
|
-
|
1191
|
-
ctx->codepoint = lxb_encoding_multi_index_jis0208[ctx->codepoint].codepoint;
|
1192
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
1193
|
-
LXB_ENCODING_DECODE_FAILED(ctx->u.lead);
|
1194
|
-
continue;
|
1195
|
-
}
|
1196
|
-
|
1197
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->codepoint);
|
1198
|
-
}
|
1199
|
-
|
1200
|
-
return LXB_STATUS_OK;
|
1201
|
-
}
|
1202
|
-
|
1203
|
-
lxb_inline lxb_status_t
|
1204
|
-
lxb_encoding_decode_utf_16(lxb_encoding_decode_t *ctx, bool is_be,
|
1205
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1206
|
-
{
|
1207
|
-
unsigned lead;
|
1208
|
-
lxb_codepoint_t unit;
|
1209
|
-
|
1210
|
-
ctx->status = LXB_STATUS_OK;
|
1211
|
-
|
1212
|
-
if (ctx->have_error) {
|
1213
|
-
ctx->have_error = false;
|
1214
|
-
|
1215
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1216
|
-
ctx->have_error = true;
|
1217
|
-
}
|
1218
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1219
|
-
}
|
1220
|
-
|
1221
|
-
if (ctx->u.lead != 0x00) {
|
1222
|
-
if (*data >= end) {
|
1223
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1224
|
-
|
1225
|
-
return LXB_STATUS_CONTINUE;
|
1226
|
-
}
|
1227
|
-
|
1228
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
1229
|
-
|
1230
|
-
lead = ctx->u.lead - 0x01;
|
1231
|
-
ctx->u.lead = 0x00;
|
1232
|
-
|
1233
|
-
goto lead_state;
|
1234
|
-
}
|
1235
|
-
|
1236
|
-
while (*data < end) {
|
1237
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
1238
|
-
|
1239
|
-
pair_state:
|
1240
|
-
|
1241
|
-
lead = *(*data)++;
|
1242
|
-
|
1243
|
-
if (*data >= end) {
|
1244
|
-
ctx->u.lead = lead + 0x01;
|
1245
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1246
|
-
|
1247
|
-
return LXB_STATUS_CONTINUE;
|
1248
|
-
}
|
1249
|
-
|
1250
|
-
lead_state:
|
1251
|
-
|
1252
|
-
/* For UTF-16BE or UTF-16LE */
|
1253
|
-
if (is_be) {
|
1254
|
-
unit = (lead << 8) + *(*data)++;
|
1255
|
-
}
|
1256
|
-
else {
|
1257
|
-
unit = (*(*data)++ << 8) + lead;
|
1258
|
-
}
|
1259
|
-
|
1260
|
-
if (ctx->second_codepoint != 0x00) {
|
1261
|
-
if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
|
1262
|
-
ctx->codepoint = 0x10000 + ((ctx->second_codepoint - 0xD800) << 10)
|
1263
|
-
+ (unit - 0xDC00);
|
1264
|
-
|
1265
|
-
ctx->second_codepoint = 0x00;
|
1266
|
-
|
1267
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->codepoint);
|
1268
|
-
continue;
|
1269
|
-
}
|
1270
|
-
|
1271
|
-
(*data)--;
|
1272
|
-
|
1273
|
-
ctx->second_codepoint = 0x00;
|
1274
|
-
|
1275
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1276
|
-
ctx->have_error = true;
|
1277
|
-
|
1278
|
-
ctx->u.lead = lead + 0x01;
|
1279
|
-
}
|
1280
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1281
|
-
|
1282
|
-
goto lead_state;
|
1283
|
-
}
|
1284
|
-
|
1285
|
-
/* Surrogate pair */
|
1286
|
-
if ((unsigned) (unit - 0xD800) <= (0xDFFF - 0xD800)) {
|
1287
|
-
if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
|
1288
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1289
|
-
ctx->have_error = true;
|
1290
|
-
}
|
1291
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1292
|
-
|
1293
|
-
continue;
|
1294
|
-
}
|
1295
|
-
|
1296
|
-
ctx->second_codepoint = unit;
|
1297
|
-
|
1298
|
-
if (*data >= end) {
|
1299
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1300
|
-
|
1301
|
-
return LXB_STATUS_CONTINUE;
|
1302
|
-
}
|
1303
|
-
|
1304
|
-
goto pair_state;
|
1305
|
-
}
|
1306
|
-
|
1307
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, unit);
|
1308
|
-
}
|
1309
|
-
|
1310
|
-
return LXB_STATUS_OK;
|
1311
|
-
}
|
1312
|
-
|
1313
|
-
lxb_status_t
|
1314
|
-
lxb_encoding_decode_utf_16be(lxb_encoding_decode_t *ctx,
|
1315
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1316
|
-
{
|
1317
|
-
return lxb_encoding_decode_utf_16(ctx, true, data, end);
|
1318
|
-
}
|
1319
|
-
|
1320
|
-
lxb_status_t
|
1321
|
-
lxb_encoding_decode_utf_16le(lxb_encoding_decode_t *ctx,
|
1322
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1323
|
-
{
|
1324
|
-
return lxb_encoding_decode_utf_16(ctx, false, data, end);
|
1325
|
-
}
|
1326
|
-
|
1327
|
-
lxb_status_t
|
1328
|
-
lxb_encoding_decode_utf_8(lxb_encoding_decode_t *ctx,
|
1329
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1330
|
-
{
|
1331
|
-
unsigned need;
|
1332
|
-
lxb_char_t ch;
|
1333
|
-
const lxb_char_t *p = *data;
|
1334
|
-
|
1335
|
-
ctx->status = LXB_STATUS_OK;
|
1336
|
-
|
1337
|
-
if (ctx->have_error) {
|
1338
|
-
ctx->have_error = false;
|
1339
|
-
|
1340
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1341
|
-
ctx->have_error = true;
|
1342
|
-
}
|
1343
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1344
|
-
}
|
1345
|
-
|
1346
|
-
if (ctx->u.utf_8.need != 0) {
|
1347
|
-
if (p >= end) {
|
1348
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1349
|
-
|
1350
|
-
return LXB_STATUS_CONTINUE;
|
1351
|
-
}
|
1352
|
-
|
1353
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
1354
|
-
|
1355
|
-
need = ctx->u.utf_8.need;
|
1356
|
-
ctx->u.utf_8.need = 0;
|
1357
|
-
|
1358
|
-
if (ctx->u.utf_8.lower != 0x00) {
|
1359
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY(ctx->u.utf_8.lower,
|
1360
|
-
ctx->u.utf_8.upper, goto begin);
|
1361
|
-
ctx->u.utf_8.lower = 0x00;
|
1362
|
-
}
|
1363
|
-
|
1364
|
-
goto decode;
|
1365
|
-
}
|
1366
|
-
|
1367
|
-
begin:
|
1368
|
-
|
1369
|
-
while (p < end) {
|
1370
|
-
if (ctx->buffer_used >= ctx->buffer_length) {
|
1371
|
-
*data = p;
|
1372
|
-
|
1373
|
-
return LXB_STATUS_SMALL_BUFFER;
|
1374
|
-
}
|
1375
|
-
|
1376
|
-
ch = *p++;
|
1377
|
-
|
1378
|
-
if (ch < 0x80) {
|
1379
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ch);
|
1380
|
-
continue;
|
1381
|
-
}
|
1382
|
-
else if (ch <= 0xDF) {
|
1383
|
-
if (ch < 0xC2) {
|
1384
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1385
|
-
*data = p - 1;
|
1386
|
-
}
|
1387
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1388
|
-
|
1389
|
-
continue;
|
1390
|
-
}
|
1391
|
-
|
1392
|
-
need = 1;
|
1393
|
-
ctx->codepoint = ch & 0x1F;
|
1394
|
-
}
|
1395
|
-
else if (ch < 0xF0) {
|
1396
|
-
need = 2;
|
1397
|
-
ctx->codepoint = ch & 0x0F;
|
1398
|
-
|
1399
|
-
if (p == end) {
|
1400
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET(0xE0, 0xED, 0xA0, 0x9F);
|
1401
|
-
|
1402
|
-
*data = p;
|
1403
|
-
|
1404
|
-
ctx->u.utf_8.need = need;
|
1405
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1406
|
-
|
1407
|
-
return LXB_STATUS_CONTINUE;
|
1408
|
-
}
|
1409
|
-
|
1410
|
-
if (ch == 0xE0) {
|
1411
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY(0xA0, 0xBF, continue);
|
1412
|
-
}
|
1413
|
-
else if (ch == 0xED) {
|
1414
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY(0x80, 0x9F, continue);
|
1415
|
-
}
|
1416
|
-
}
|
1417
|
-
else if (ch < 0xF5) {
|
1418
|
-
need = 3;
|
1419
|
-
ctx->codepoint = ch & 0x07;
|
1420
|
-
|
1421
|
-
if (p == end) {
|
1422
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET(0xF0, 0xF4, 0x90, 0x8F);
|
1423
|
-
|
1424
|
-
*data = p;
|
1425
|
-
|
1426
|
-
ctx->u.utf_8.need = need;
|
1427
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1428
|
-
|
1429
|
-
return LXB_STATUS_CONTINUE;
|
1430
|
-
}
|
1431
|
-
|
1432
|
-
if (ch == 0xF0) {
|
1433
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY(0x90, 0xBF, continue);
|
1434
|
-
}
|
1435
|
-
else if (ch == 0xF4) {
|
1436
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY(0x80, 0x8F, continue);
|
1437
|
-
}
|
1438
|
-
}
|
1439
|
-
else {
|
1440
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1441
|
-
*data = p - 1;
|
1442
|
-
}
|
1443
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1444
|
-
|
1445
|
-
continue;
|
1446
|
-
}
|
1447
|
-
|
1448
|
-
decode:
|
1449
|
-
|
1450
|
-
do {
|
1451
|
-
if (p >= end) {
|
1452
|
-
*data = p;
|
1453
|
-
|
1454
|
-
ctx->u.utf_8.need = need;
|
1455
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1456
|
-
|
1457
|
-
return LXB_STATUS_CONTINUE;
|
1458
|
-
}
|
1459
|
-
|
1460
|
-
ch = *p++;
|
1461
|
-
|
1462
|
-
if (ch < 0x80 || ch > 0xBF) {
|
1463
|
-
p--;
|
1464
|
-
|
1465
|
-
ctx->u.utf_8.need = 0;
|
1466
|
-
|
1467
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1468
|
-
*data = p;
|
1469
|
-
ctx->have_error = true;
|
1470
|
-
}
|
1471
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1472
|
-
|
1473
|
-
break;
|
1474
|
-
}
|
1475
|
-
|
1476
|
-
ctx->codepoint = (ctx->codepoint << 6) | (ch & 0x3F);
|
1477
|
-
|
1478
|
-
if (--need == 0) {
|
1479
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->codepoint);
|
1480
|
-
|
1481
|
-
break;
|
1482
|
-
}
|
1483
|
-
}
|
1484
|
-
while (true);
|
1485
|
-
}
|
1486
|
-
|
1487
|
-
*data = p;
|
1488
|
-
|
1489
|
-
return LXB_STATUS_OK;
|
1490
|
-
}
|
1491
|
-
|
1492
|
-
lxb_inline lxb_codepoint_t
|
1493
|
-
lxb_encoding_decode_gb18030_range(uint32_t index)
|
1494
|
-
{
|
1495
|
-
size_t mid, left, right;
|
1496
|
-
const lxb_encoding_range_index_t *range;
|
1497
|
-
|
1498
|
-
/*
|
1499
|
-
* Pointer greater than 39419 and less than 189000,
|
1500
|
-
* or pointer is greater than 1237575
|
1501
|
-
*/
|
1502
|
-
if ((unsigned) (index - 39419) < (189000 - 39419)
|
1503
|
-
|| index > 1237575)
|
1504
|
-
{
|
1505
|
-
return LXB_ENCODING_ERROR_CODEPOINT;
|
1506
|
-
}
|
1507
|
-
|
1508
|
-
if (index == 7457) {
|
1509
|
-
return 0xE7C7;
|
1510
|
-
}
|
1511
|
-
|
1512
|
-
left = 0;
|
1513
|
-
right = LXB_ENCODING_RANGE_INDEX_GB18030_SIZE;
|
1514
|
-
range = lxb_encoding_range_index_gb18030;
|
1515
|
-
|
1516
|
-
/* Some compilers say about uninitialized mid */
|
1517
|
-
mid = 0;
|
1518
|
-
|
1519
|
-
while (left < right) {
|
1520
|
-
mid = left + (right - left) / 2;
|
1521
|
-
|
1522
|
-
if (range[mid].index < index) {
|
1523
|
-
left = mid + 1;
|
1524
|
-
|
1525
|
-
if (left < right && range[ left ].index > index) {
|
1526
|
-
break;
|
1527
|
-
}
|
1528
|
-
}
|
1529
|
-
else if (range[mid].index > index) {
|
1530
|
-
right = mid - 1;
|
1531
|
-
|
1532
|
-
if (right > 0 && range[right].index <= index) {
|
1533
|
-
mid = right;
|
1534
|
-
break;
|
1535
|
-
}
|
1536
|
-
}
|
1537
|
-
else {
|
1538
|
-
break;
|
1539
|
-
}
|
1540
|
-
}
|
1541
|
-
|
1542
|
-
return range[mid].codepoint + index - range[mid].index;
|
1543
|
-
}
|
1544
|
-
|
1545
|
-
lxb_status_t
|
1546
|
-
lxb_encoding_decode_gb18030(lxb_encoding_decode_t *ctx,
|
1547
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1548
|
-
{
|
1549
|
-
uint32_t pointer;
|
1550
|
-
lxb_char_t first, second, third, offset;
|
1551
|
-
|
1552
|
-
/* Make compiler happy */
|
1553
|
-
second = 0x00;
|
1554
|
-
|
1555
|
-
ctx->status = LXB_STATUS_OK;
|
1556
|
-
|
1557
|
-
if (ctx->have_error) {
|
1558
|
-
ctx->have_error = false;
|
1559
|
-
|
1560
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1561
|
-
ctx->have_error = true;
|
1562
|
-
}
|
1563
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1564
|
-
}
|
1565
|
-
|
1566
|
-
if (ctx->u.gb18030.first != 0) {
|
1567
|
-
if (*data >= end) {
|
1568
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1569
|
-
|
1570
|
-
return LXB_STATUS_CONTINUE;
|
1571
|
-
}
|
1572
|
-
|
1573
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
1574
|
-
|
1575
|
-
if (ctx->u.gb18030.third != 0x00) {
|
1576
|
-
first = ctx->u.gb18030.first;
|
1577
|
-
second = ctx->u.gb18030.second;
|
1578
|
-
third = ctx->u.gb18030.third;
|
1579
|
-
|
1580
|
-
memset(&ctx->u.gb18030, 0, sizeof(lxb_encoding_ctx_gb18030_t));
|
1581
|
-
|
1582
|
-
if (ctx->prepend) {
|
1583
|
-
/* The first is always < 0x80 */
|
1584
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, second);
|
1585
|
-
|
1586
|
-
if (ctx->buffer_used == ctx->buffer_length) {
|
1587
|
-
ctx->u.gb18030.first = third;
|
1588
|
-
|
1589
|
-
return LXB_STATUS_SMALL_BUFFER;
|
1590
|
-
}
|
1591
|
-
|
1592
|
-
first = third;
|
1593
|
-
ctx->prepend = false;
|
1594
|
-
|
1595
|
-
goto prepend_first;
|
1596
|
-
}
|
1597
|
-
|
1598
|
-
goto third_state;
|
1599
|
-
}
|
1600
|
-
else if (ctx->u.gb18030.second != 0x00) {
|
1601
|
-
first = ctx->u.gb18030.first;
|
1602
|
-
second = ctx->u.gb18030.second;
|
1603
|
-
|
1604
|
-
memset(&ctx->u.gb18030, 0, sizeof(lxb_encoding_ctx_gb18030_t));
|
1605
|
-
|
1606
|
-
goto second_state;
|
1607
|
-
}
|
1608
|
-
|
1609
|
-
first = ctx->u.gb18030.first;
|
1610
|
-
ctx->u.gb18030.first = 0x00;
|
1611
|
-
|
1612
|
-
if (ctx->prepend) {
|
1613
|
-
ctx->prepend = false;
|
1614
|
-
goto prepend_first;
|
1615
|
-
}
|
1616
|
-
|
1617
|
-
goto first_state;
|
1618
|
-
}
|
1619
|
-
|
1620
|
-
while (*data < end) {
|
1621
|
-
LXB_ENCODING_DECODE_CHECK_OUT(ctx);
|
1622
|
-
|
1623
|
-
first = *(*data)++;
|
1624
|
-
|
1625
|
-
prepend_first:
|
1626
|
-
|
1627
|
-
if (first < 0x80) {
|
1628
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, first);
|
1629
|
-
continue;
|
1630
|
-
}
|
1631
|
-
|
1632
|
-
if (first == 0x80) {
|
1633
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0x20AC);
|
1634
|
-
continue;
|
1635
|
-
}
|
1636
|
-
|
1637
|
-
/* Range 0x81 to 0xFE, inclusive */
|
1638
|
-
if ((unsigned) (first - 0x81) > (0xFE - 0x81)) {
|
1639
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1640
|
-
(*data)--;
|
1641
|
-
}
|
1642
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1643
|
-
|
1644
|
-
continue;
|
1645
|
-
}
|
1646
|
-
|
1647
|
-
if (*data == end) {
|
1648
|
-
ctx->u.gb18030.first = first;
|
1649
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1650
|
-
|
1651
|
-
return LXB_STATUS_CONTINUE;
|
1652
|
-
}
|
1653
|
-
|
1654
|
-
/* First */
|
1655
|
-
first_state:
|
1656
|
-
|
1657
|
-
second = *(*data)++;
|
1658
|
-
|
1659
|
-
/* Range 0x30 to 0x39, inclusive */
|
1660
|
-
if ((unsigned) (second - 0x30) > (0x39 - 0x30)) {
|
1661
|
-
offset = (second < 0x7F) ? 0x40 : 0x41;
|
1662
|
-
|
1663
|
-
/* Range 0x40 to 0x7E, inclusive, or 0x80 to 0xFE, inclusive */
|
1664
|
-
if ((unsigned) (second - 0x40) <= (0x7E - 0x40)
|
1665
|
-
|| (unsigned) (second - 0x80) <= (0xFE - 0x80))
|
1666
|
-
{
|
1667
|
-
pointer = (first - 0x81) * 190 + (second - offset);
|
1668
|
-
}
|
1669
|
-
else {
|
1670
|
-
if (second < 0x80) {
|
1671
|
-
(*data)--;
|
1672
|
-
}
|
1673
|
-
|
1674
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1675
|
-
ctx->have_error = true;
|
1676
|
-
}
|
1677
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1678
|
-
|
1679
|
-
continue;
|
1680
|
-
}
|
1681
|
-
|
1682
|
-
/* Max pointer value == (0xFE - 0x81) * 190 + (0xFE - 0x41) == 23939 */
|
1683
|
-
ctx->codepoint = lxb_encoding_multi_index_gb18030[pointer].codepoint;
|
1684
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
1685
|
-
if (second < 0x80) {
|
1686
|
-
(*data)--;
|
1687
|
-
}
|
1688
|
-
|
1689
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1690
|
-
ctx->have_error = true;
|
1691
|
-
}
|
1692
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1693
|
-
|
1694
|
-
continue;
|
1695
|
-
}
|
1696
|
-
|
1697
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->codepoint);
|
1698
|
-
continue;
|
1699
|
-
}
|
1700
|
-
|
1701
|
-
if (*data == end) {
|
1702
|
-
ctx->u.gb18030.first = first;
|
1703
|
-
ctx->u.gb18030.second = second;
|
1704
|
-
|
1705
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1706
|
-
|
1707
|
-
return LXB_STATUS_CONTINUE;
|
1708
|
-
}
|
1709
|
-
|
1710
|
-
/* Second */
|
1711
|
-
second_state:
|
1712
|
-
|
1713
|
-
third = *(*data)++;
|
1714
|
-
|
1715
|
-
/* Range 0x81 to 0xFE, inclusive */
|
1716
|
-
if ((unsigned) (third - 0x81) > (0xFE - 0x81)) {
|
1717
|
-
(*data)--;
|
1718
|
-
|
1719
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1720
|
-
ctx->prepend = true;
|
1721
|
-
ctx->have_error = true;
|
1722
|
-
ctx->u.gb18030.first = second;
|
1723
|
-
}
|
1724
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1725
|
-
|
1726
|
-
first = second;
|
1727
|
-
|
1728
|
-
goto prepend_first;
|
1729
|
-
}
|
1730
|
-
|
1731
|
-
if (*data == end) {
|
1732
|
-
ctx->u.gb18030.first = first;
|
1733
|
-
ctx->u.gb18030.second = second;
|
1734
|
-
ctx->u.gb18030.third = third;
|
1735
|
-
|
1736
|
-
ctx->status = LXB_STATUS_CONTINUE;
|
1737
|
-
|
1738
|
-
return LXB_STATUS_CONTINUE;
|
1739
|
-
}
|
1740
|
-
|
1741
|
-
/* Third */
|
1742
|
-
third_state:
|
1743
|
-
|
1744
|
-
/* Range 0x30 to 0x39, inclusive */
|
1745
|
-
if ((unsigned) (**data - 0x30) > (0x39 - 0x30)) {
|
1746
|
-
ctx->prepend = true;
|
1747
|
-
|
1748
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {
|
1749
|
-
ctx->prepend = true;
|
1750
|
-
ctx->have_error = true;
|
1751
|
-
|
1752
|
-
/* First is a fake for trigger */
|
1753
|
-
ctx->u.gb18030.first = 0x01;
|
1754
|
-
ctx->u.gb18030.second = second;
|
1755
|
-
ctx->u.gb18030.third = third;
|
1756
|
-
}
|
1757
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1758
|
-
|
1759
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, second);
|
1760
|
-
|
1761
|
-
if (ctx->buffer_used == ctx->buffer_length) {
|
1762
|
-
ctx->prepend = true;
|
1763
|
-
ctx->have_error = true;
|
1764
|
-
|
1765
|
-
/* First is a fake for trigger */
|
1766
|
-
ctx->u.gb18030.first = 0x01;
|
1767
|
-
ctx->u.gb18030.second = second;
|
1768
|
-
ctx->u.gb18030.third = third;
|
1769
|
-
|
1770
|
-
return LXB_STATUS_SMALL_BUFFER;
|
1771
|
-
}
|
1772
|
-
|
1773
|
-
first = third;
|
1774
|
-
|
1775
|
-
goto prepend_first;
|
1776
|
-
}
|
1777
|
-
|
1778
|
-
pointer = ((first - 0x81) * (10 * 126 * 10))
|
1779
|
-
+ ((second - 0x30) * (10 * 126))
|
1780
|
-
+ ((third - 0x81) * 10) + (*(*data)++) - 0x30;
|
1781
|
-
|
1782
|
-
ctx->codepoint = lxb_encoding_decode_gb18030_range(pointer);
|
1783
|
-
|
1784
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
1785
|
-
LXB_ENCODING_DECODE_ERROR_BEGIN {}
|
1786
|
-
LXB_ENCODING_DECODE_ERROR_END();
|
1787
|
-
|
1788
|
-
continue;
|
1789
|
-
}
|
1790
|
-
|
1791
|
-
LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, ctx->codepoint);
|
1792
|
-
}
|
1793
|
-
|
1794
|
-
return LXB_STATUS_OK;
|
1795
|
-
}
|
1796
|
-
|
1797
|
-
lxb_status_t
|
1798
|
-
lxb_encoding_decode_macintosh(lxb_encoding_decode_t *ctx,
|
1799
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1800
|
-
{
|
1801
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_macintosh);
|
1802
|
-
|
1803
|
-
return LXB_STATUS_OK;
|
1804
|
-
}
|
1805
|
-
|
1806
|
-
lxb_status_t
|
1807
|
-
lxb_encoding_decode_replacement(lxb_encoding_decode_t *ctx,
|
1808
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1809
|
-
{
|
1810
|
-
*data = end;
|
1811
|
-
return LXB_STATUS_ERROR;
|
1812
|
-
}
|
1813
|
-
|
1814
|
-
lxb_status_t
|
1815
|
-
lxb_encoding_decode_windows_1250(lxb_encoding_decode_t *ctx,
|
1816
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1817
|
-
{
|
1818
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_1250);
|
1819
|
-
|
1820
|
-
return LXB_STATUS_OK;
|
1821
|
-
}
|
1822
|
-
|
1823
|
-
lxb_status_t
|
1824
|
-
lxb_encoding_decode_windows_1251(lxb_encoding_decode_t *ctx,
|
1825
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1826
|
-
{
|
1827
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_1251);
|
1828
|
-
|
1829
|
-
return LXB_STATUS_OK;
|
1830
|
-
}
|
1831
|
-
|
1832
|
-
lxb_status_t
|
1833
|
-
lxb_encoding_decode_windows_1252(lxb_encoding_decode_t *ctx,
|
1834
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1835
|
-
{
|
1836
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_1252);
|
1837
|
-
|
1838
|
-
return LXB_STATUS_OK;
|
1839
|
-
}
|
1840
|
-
|
1841
|
-
lxb_status_t
|
1842
|
-
lxb_encoding_decode_windows_1253(lxb_encoding_decode_t *ctx,
|
1843
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1844
|
-
{
|
1845
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_1253);
|
1846
|
-
|
1847
|
-
return LXB_STATUS_OK;
|
1848
|
-
}
|
1849
|
-
|
1850
|
-
lxb_status_t
|
1851
|
-
lxb_encoding_decode_windows_1254(lxb_encoding_decode_t *ctx,
|
1852
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1853
|
-
{
|
1854
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_1254);
|
1855
|
-
|
1856
|
-
return LXB_STATUS_OK;
|
1857
|
-
}
|
1858
|
-
|
1859
|
-
lxb_status_t
|
1860
|
-
lxb_encoding_decode_windows_1255(lxb_encoding_decode_t *ctx,
|
1861
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1862
|
-
{
|
1863
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_1255);
|
1864
|
-
|
1865
|
-
return LXB_STATUS_OK;
|
1866
|
-
}
|
1867
|
-
|
1868
|
-
lxb_status_t
|
1869
|
-
lxb_encoding_decode_windows_1256(lxb_encoding_decode_t *ctx,
|
1870
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1871
|
-
{
|
1872
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_1256);
|
1873
|
-
|
1874
|
-
return LXB_STATUS_OK;
|
1875
|
-
}
|
1876
|
-
|
1877
|
-
lxb_status_t
|
1878
|
-
lxb_encoding_decode_windows_1257(lxb_encoding_decode_t *ctx,
|
1879
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1880
|
-
{
|
1881
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_1257);
|
1882
|
-
|
1883
|
-
return LXB_STATUS_OK;
|
1884
|
-
}
|
1885
|
-
|
1886
|
-
lxb_status_t
|
1887
|
-
lxb_encoding_decode_windows_1258(lxb_encoding_decode_t *ctx,
|
1888
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1889
|
-
{
|
1890
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_1258);
|
1891
|
-
|
1892
|
-
return LXB_STATUS_OK;
|
1893
|
-
}
|
1894
|
-
|
1895
|
-
lxb_status_t
|
1896
|
-
lxb_encoding_decode_windows_874(lxb_encoding_decode_t *ctx,
|
1897
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1898
|
-
{
|
1899
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_windows_874);
|
1900
|
-
|
1901
|
-
return LXB_STATUS_OK;
|
1902
|
-
}
|
1903
|
-
|
1904
|
-
lxb_status_t
|
1905
|
-
lxb_encoding_decode_x_mac_cyrillic(lxb_encoding_decode_t *ctx,
|
1906
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1907
|
-
{
|
1908
|
-
LXB_ENCODING_DECODE_SINGLE(lxb_encoding_single_index_x_mac_cyrillic);
|
1909
|
-
|
1910
|
-
return LXB_STATUS_OK;
|
1911
|
-
}
|
1912
|
-
|
1913
|
-
lxb_status_t
|
1914
|
-
lxb_encoding_decode_x_user_defined(lxb_encoding_decode_t *ctx,
|
1915
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1916
|
-
{
|
1917
|
-
while (*data < end) {
|
1918
|
-
if (**data < 0x80) {
|
1919
|
-
LXB_ENCODING_DECODE_APPEND(ctx, *(*data)++);
|
1920
|
-
}
|
1921
|
-
else {
|
1922
|
-
LXB_ENCODING_DECODE_APPEND(ctx, 0xF780 + (*(*data)++) - 0x80);
|
1923
|
-
}
|
1924
|
-
}
|
1925
|
-
|
1926
|
-
return LXB_STATUS_OK;
|
1927
|
-
}
|
1928
|
-
|
1929
|
-
/*
|
1930
|
-
* Single
|
1931
|
-
*/
|
1932
|
-
lxb_codepoint_t
|
1933
|
-
lxb_encoding_decode_default_single(lxb_encoding_decode_t *ctx,
|
1934
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1935
|
-
{
|
1936
|
-
return lxb_encoding_decode_utf_8_single(ctx, data, end);
|
1937
|
-
}
|
1938
|
-
|
1939
|
-
lxb_codepoint_t
|
1940
|
-
lxb_encoding_decode_auto_single(lxb_encoding_decode_t *ctx,
|
1941
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1942
|
-
{
|
1943
|
-
return LXB_ENCODING_DECODE_ERROR;
|
1944
|
-
}
|
1945
|
-
|
1946
|
-
lxb_codepoint_t
|
1947
|
-
lxb_encoding_decode_undefined_single(lxb_encoding_decode_t *ctx,
|
1948
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1949
|
-
{
|
1950
|
-
return LXB_ENCODING_DECODE_ERROR;
|
1951
|
-
}
|
1952
|
-
|
1953
|
-
lxb_codepoint_t
|
1954
|
-
lxb_encoding_decode_big5_single(lxb_encoding_decode_t *ctx,
|
1955
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
1956
|
-
{
|
1957
|
-
uint32_t index;
|
1958
|
-
lxb_char_t lead, byte;
|
1959
|
-
|
1960
|
-
if (ctx->u.lead != 0x00) {
|
1961
|
-
if (ctx->second_codepoint != 0x00) {
|
1962
|
-
(*data)++;
|
1963
|
-
|
1964
|
-
ctx->u.lead = 0x00;
|
1965
|
-
|
1966
|
-
ctx->codepoint = ctx->second_codepoint;
|
1967
|
-
ctx->second_codepoint = 0x00;
|
1968
|
-
|
1969
|
-
return ctx->codepoint;
|
1970
|
-
}
|
1971
|
-
|
1972
|
-
lead = (lxb_char_t) ctx->u.lead;
|
1973
|
-
ctx->u.lead = 0x00;
|
1974
|
-
|
1975
|
-
goto lead_state;
|
1976
|
-
}
|
1977
|
-
|
1978
|
-
lead = *(*data)++;
|
1979
|
-
|
1980
|
-
if (lead < 0x80) {
|
1981
|
-
return lead;
|
1982
|
-
}
|
1983
|
-
|
1984
|
-
if ((unsigned) (lead - 0x81) > (0xFE - 0x81)) {
|
1985
|
-
return LXB_ENCODING_DECODE_ERROR;
|
1986
|
-
}
|
1987
|
-
|
1988
|
-
if (*data >= end) {
|
1989
|
-
ctx->u.lead = lead;
|
1990
|
-
|
1991
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
1992
|
-
}
|
1993
|
-
|
1994
|
-
lead_state:
|
1995
|
-
|
1996
|
-
index = 0;
|
1997
|
-
byte = **data;
|
1998
|
-
|
1999
|
-
if ((unsigned) (byte - 0x40) <= (0x7E - 0x40)
|
2000
|
-
|| (unsigned) (byte - 0xA1) <= (0xFE - 0xA1))
|
2001
|
-
{
|
2002
|
-
if (byte < 0x7F) {
|
2003
|
-
/* Max index == (0xFE - 0x81) * 157 + (0x7E - 0x62) == 19653 */
|
2004
|
-
index = (lead - 0x81) * 157 + (byte - 0x40);
|
2005
|
-
}
|
2006
|
-
else {
|
2007
|
-
/* Max index == (0xFE - 0x81) * 157 + (0xFE - 0x62) == 19781 */
|
2008
|
-
index = (lead - 0x81) * 157 + (byte - 0x62);
|
2009
|
-
}
|
2010
|
-
}
|
2011
|
-
|
2012
|
-
/*
|
2013
|
-
* 1133 U+00CA U+0304 Ê̄ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON)
|
2014
|
-
* 1135 U+00CA U+030C Ê̌ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON)
|
2015
|
-
* 1164 U+00EA U+0304 ê̄ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON)
|
2016
|
-
* 1166 U+00EA U+030C ê̌ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON)
|
2017
|
-
*/
|
2018
|
-
switch (index) {
|
2019
|
-
case 1133:
|
2020
|
-
ctx->u.lead = lead;
|
2021
|
-
ctx->second_codepoint = 0x0304;
|
2022
|
-
return 0x00CA;
|
2023
|
-
|
2024
|
-
case 1135:
|
2025
|
-
ctx->u.lead = lead;
|
2026
|
-
ctx->second_codepoint = 0x030C;
|
2027
|
-
return 0x00CA;
|
2028
|
-
|
2029
|
-
case 1164:
|
2030
|
-
ctx->u.lead = lead;
|
2031
|
-
ctx->second_codepoint = 0x0304;
|
2032
|
-
return 0x00EA;
|
2033
|
-
|
2034
|
-
case 1166:
|
2035
|
-
ctx->u.lead = lead;
|
2036
|
-
ctx->second_codepoint = 0x030C;
|
2037
|
-
return 0x00EA;
|
2038
|
-
|
2039
|
-
case 0:
|
2040
|
-
goto failed;
|
2041
|
-
}
|
2042
|
-
|
2043
|
-
ctx->codepoint = lxb_encoding_multi_index_big5[index].codepoint;
|
2044
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
2045
|
-
goto failed;
|
2046
|
-
}
|
2047
|
-
|
2048
|
-
(*data)++;
|
2049
|
-
|
2050
|
-
return ctx->codepoint;
|
2051
|
-
|
2052
|
-
failed:
|
2053
|
-
|
2054
|
-
if (byte >= 0x80) {
|
2055
|
-
(*data)++;
|
2056
|
-
}
|
2057
|
-
|
2058
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2059
|
-
}
|
2060
|
-
|
2061
|
-
lxb_codepoint_t
|
2062
|
-
lxb_encoding_decode_euc_jp_single(lxb_encoding_decode_t *ctx,
|
2063
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2064
|
-
{
|
2065
|
-
bool is_jis0212;
|
2066
|
-
lxb_char_t byte, lead;
|
2067
|
-
|
2068
|
-
if (ctx->u.euc_jp.lead != 0x00) {
|
2069
|
-
lead = ctx->u.euc_jp.lead;
|
2070
|
-
byte = *(*data)++;
|
2071
|
-
|
2072
|
-
ctx->u.euc_jp.lead = 0x00;
|
2073
|
-
|
2074
|
-
if (ctx->u.euc_jp.is_jis0212) {
|
2075
|
-
is_jis0212 = true;
|
2076
|
-
ctx->u.euc_jp.is_jis0212 = false;
|
2077
|
-
|
2078
|
-
goto lead_jis_state;
|
2079
|
-
}
|
2080
|
-
|
2081
|
-
goto lead_state;
|
2082
|
-
}
|
2083
|
-
|
2084
|
-
lead = *(*data)++;
|
2085
|
-
|
2086
|
-
if (lead < 0x80) {
|
2087
|
-
return lead;
|
2088
|
-
}
|
2089
|
-
|
2090
|
-
if ((unsigned) (lead - 0xA1) > (0xFE - 0xA1)
|
2091
|
-
&& (lead != 0x8E && lead != 0x8F))
|
2092
|
-
{
|
2093
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2094
|
-
}
|
2095
|
-
|
2096
|
-
if (*data >= end) {
|
2097
|
-
ctx->u.euc_jp.lead = lead;
|
2098
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2099
|
-
}
|
2100
|
-
|
2101
|
-
byte = *(*data)++;
|
2102
|
-
|
2103
|
-
lead_state:
|
2104
|
-
|
2105
|
-
if (lead == 0x8E && (unsigned) (byte - 0xA1) <= (0xDF - 0xA1)) {
|
2106
|
-
return 0xFF61 - 0xA1 + byte;
|
2107
|
-
}
|
2108
|
-
|
2109
|
-
is_jis0212 = false;
|
2110
|
-
|
2111
|
-
if (lead == 0x8F && (unsigned) (byte - 0xA1) <= (0xFE - 0xA1)) {
|
2112
|
-
if (*data >= end) {
|
2113
|
-
ctx->u.euc_jp.lead = byte;
|
2114
|
-
ctx->u.euc_jp.is_jis0212 = true;
|
2115
|
-
|
2116
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2117
|
-
}
|
2118
|
-
|
2119
|
-
lead = byte;
|
2120
|
-
byte = *(*data)++;
|
2121
|
-
is_jis0212 = true;
|
2122
|
-
}
|
2123
|
-
|
2124
|
-
lead_jis_state:
|
2125
|
-
|
2126
|
-
if ((unsigned) (lead - 0xA1) > (0xFE - 0xA1)
|
2127
|
-
|| (unsigned) (byte - 0xA1) > (0xFE - 0xA1))
|
2128
|
-
{
|
2129
|
-
goto failed;
|
2130
|
-
}
|
2131
|
-
|
2132
|
-
/* Max index == (0xFE - 0xA1) * 94 + 0xFE - 0xA1 == 8835 */
|
2133
|
-
ctx->codepoint = (lead - 0xA1) * 94 + byte - 0xA1;
|
2134
|
-
|
2135
|
-
if (is_jis0212) {
|
2136
|
-
if ((sizeof(lxb_encoding_multi_index_jis0212)
|
2137
|
-
/ sizeof(lxb_encoding_multi_index_t)) <= ctx->codepoint)
|
2138
|
-
{
|
2139
|
-
goto failed;
|
2140
|
-
}
|
2141
|
-
|
2142
|
-
ctx->codepoint = lxb_encoding_multi_index_jis0212[ctx->codepoint].codepoint;
|
2143
|
-
}
|
2144
|
-
else {
|
2145
|
-
if ((sizeof(lxb_encoding_multi_index_jis0208)
|
2146
|
-
/ sizeof(lxb_encoding_multi_index_t)) <= ctx->codepoint)
|
2147
|
-
{
|
2148
|
-
goto failed;
|
2149
|
-
}
|
2150
|
-
|
2151
|
-
ctx->codepoint = lxb_encoding_multi_index_jis0208[ctx->codepoint].codepoint;
|
2152
|
-
}
|
2153
|
-
|
2154
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
2155
|
-
goto failed;
|
2156
|
-
}
|
2157
|
-
|
2158
|
-
return ctx->codepoint;
|
2159
|
-
|
2160
|
-
failed:
|
2161
|
-
|
2162
|
-
if (byte < 0x80) {
|
2163
|
-
(*data)--;
|
2164
|
-
}
|
2165
|
-
|
2166
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2167
|
-
}
|
2168
|
-
|
2169
|
-
lxb_codepoint_t
|
2170
|
-
lxb_encoding_decode_euc_kr_single(lxb_encoding_decode_t *ctx,
|
2171
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2172
|
-
{
|
2173
|
-
lxb_char_t lead, byte;
|
2174
|
-
|
2175
|
-
if (ctx->u.lead != 0x00) {
|
2176
|
-
lead = (lxb_char_t) ctx->u.lead;
|
2177
|
-
ctx->u.lead = 0x00;
|
2178
|
-
|
2179
|
-
goto lead_state;
|
2180
|
-
}
|
2181
|
-
|
2182
|
-
lead = *(*data)++;
|
2183
|
-
|
2184
|
-
if (lead < 0x80) {
|
2185
|
-
return lead;
|
2186
|
-
}
|
2187
|
-
|
2188
|
-
if ((unsigned) (lead - 0x81) > (0xFE - 0x81)) {
|
2189
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2190
|
-
}
|
2191
|
-
|
2192
|
-
if (*data == end) {
|
2193
|
-
ctx->u.lead = lead;
|
2194
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2195
|
-
}
|
2196
|
-
|
2197
|
-
lead_state:
|
2198
|
-
|
2199
|
-
byte = *(*data)++;
|
2200
|
-
|
2201
|
-
if ((unsigned) (byte - 0x41) > (0xFE - 0x41)) {
|
2202
|
-
goto failed;
|
2203
|
-
}
|
2204
|
-
|
2205
|
-
/* Max index == (0xFE - 0x81) * 190 + (0xFE - 0x41) == 23939 */
|
2206
|
-
ctx->codepoint = (lead - 0x81) * 190 + (byte - 0x41);
|
2207
|
-
|
2208
|
-
if (ctx->codepoint >= sizeof(lxb_encoding_multi_index_euc_kr)
|
2209
|
-
/ sizeof(lxb_encoding_multi_index_t))
|
2210
|
-
{
|
2211
|
-
goto failed;
|
2212
|
-
}
|
2213
|
-
|
2214
|
-
ctx->codepoint = lxb_encoding_multi_index_euc_kr[ctx->codepoint].codepoint;
|
2215
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
2216
|
-
goto failed;
|
2217
|
-
}
|
2218
|
-
|
2219
|
-
return ctx->codepoint;
|
2220
|
-
|
2221
|
-
failed:
|
2222
|
-
|
2223
|
-
if (byte < 0x80) {
|
2224
|
-
(*data)--;
|
2225
|
-
}
|
2226
|
-
|
2227
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2228
|
-
}
|
2229
|
-
|
2230
|
-
lxb_codepoint_t
|
2231
|
-
lxb_encoding_decode_gbk_single(lxb_encoding_decode_t *ctx,
|
2232
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2233
|
-
{
|
2234
|
-
return lxb_encoding_decode_gb18030_single(ctx, data, end);
|
2235
|
-
}
|
2236
|
-
|
2237
|
-
lxb_codepoint_t
|
2238
|
-
lxb_encoding_decode_ibm866_single(lxb_encoding_decode_t *ctx,
|
2239
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2240
|
-
{
|
2241
|
-
if (**data < 0x80) {
|
2242
|
-
return *(*data)++;
|
2243
|
-
}
|
2244
|
-
|
2245
|
-
return lxb_encoding_single_index_ibm866[*(*data)++ - 0x80].codepoint;
|
2246
|
-
}
|
2247
|
-
|
2248
|
-
lxb_codepoint_t
|
2249
|
-
lxb_encoding_decode_iso_2022_jp_single(lxb_encoding_decode_t *ctx,
|
2250
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2251
|
-
{
|
2252
|
-
lxb_char_t byte;
|
2253
|
-
lxb_encoding_ctx_2022_jp_t *iso = &ctx->u.iso_2022_jp;
|
2254
|
-
|
2255
|
-
if (iso->prepand != 0x00) {
|
2256
|
-
byte = iso->prepand;
|
2257
|
-
iso->prepand = 0x00;
|
2258
|
-
|
2259
|
-
goto prepand;
|
2260
|
-
}
|
2261
|
-
|
2262
|
-
do {
|
2263
|
-
byte = *(*data)++;
|
2264
|
-
|
2265
|
-
prepand:
|
2266
|
-
|
2267
|
-
switch (iso->state) {
|
2268
|
-
case LXB_ENCODING_DECODE_2022_JP_ASCII:
|
2269
|
-
if (byte == 0x1B) {
|
2270
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
2271
|
-
|
2272
|
-
break;
|
2273
|
-
}
|
2274
|
-
|
2275
|
-
/* 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B */
|
2276
|
-
if ((unsigned) (byte - 0x00) <= (0x7F - 0x00)
|
2277
|
-
&& byte != 0x0E && byte != 0x0F)
|
2278
|
-
{
|
2279
|
-
iso->out_flag = false;
|
2280
|
-
|
2281
|
-
return byte;
|
2282
|
-
}
|
2283
|
-
|
2284
|
-
iso->out_flag = false;
|
2285
|
-
|
2286
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2287
|
-
|
2288
|
-
case LXB_ENCODING_DECODE_2022_JP_ROMAN:
|
2289
|
-
switch (byte) {
|
2290
|
-
case 0x1B:
|
2291
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
2292
|
-
|
2293
|
-
continue;
|
2294
|
-
|
2295
|
-
case 0x5C:
|
2296
|
-
iso->out_flag = false;
|
2297
|
-
|
2298
|
-
return 0x00A5;
|
2299
|
-
|
2300
|
-
case 0x7E:
|
2301
|
-
iso->out_flag = false;
|
2302
|
-
|
2303
|
-
return 0x203E;
|
2304
|
-
|
2305
|
-
case 0x0E:
|
2306
|
-
case 0x0F:
|
2307
|
-
break;
|
2308
|
-
|
2309
|
-
default:
|
2310
|
-
/* 0x00 to 0x7F */
|
2311
|
-
if ((unsigned) (byte - 0x00) <= (0x7F - 0x00)) {
|
2312
|
-
iso->out_flag = false;
|
2313
|
-
|
2314
|
-
return byte;
|
2315
|
-
}
|
2316
|
-
|
2317
|
-
break;
|
2318
|
-
}
|
2319
|
-
|
2320
|
-
iso->out_flag = false;
|
2321
|
-
|
2322
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2323
|
-
|
2324
|
-
case LXB_ENCODING_DECODE_2022_JP_KATAKANA:
|
2325
|
-
if (byte == 0x1B) {
|
2326
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
2327
|
-
|
2328
|
-
break;
|
2329
|
-
}
|
2330
|
-
|
2331
|
-
/* 0x21 to 0x5F */
|
2332
|
-
if ((unsigned) (byte - 0x21) <= (0x5F - 0x21)) {
|
2333
|
-
iso->out_flag = false;
|
2334
|
-
|
2335
|
-
return 0xFF61 - 0x21 + byte;
|
2336
|
-
}
|
2337
|
-
|
2338
|
-
iso->out_flag = false;
|
2339
|
-
|
2340
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2341
|
-
|
2342
|
-
case LXB_ENCODING_DECODE_2022_JP_LEAD:
|
2343
|
-
if (byte == 0x1B) {
|
2344
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
2345
|
-
|
2346
|
-
break;
|
2347
|
-
}
|
2348
|
-
|
2349
|
-
/* 0x21 to 0x7E */
|
2350
|
-
if ((unsigned) (byte - 0x21) <= (0x7E - 0x21)) {
|
2351
|
-
iso->out_flag = false;
|
2352
|
-
iso->lead = byte;
|
2353
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_TRAIL;
|
2354
|
-
|
2355
|
-
break;
|
2356
|
-
}
|
2357
|
-
|
2358
|
-
iso->out_flag = false;
|
2359
|
-
|
2360
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2361
|
-
|
2362
|
-
case LXB_ENCODING_DECODE_2022_JP_TRAIL:
|
2363
|
-
if (byte == 0x1B) {
|
2364
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE_START;
|
2365
|
-
|
2366
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2367
|
-
}
|
2368
|
-
|
2369
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_LEAD;
|
2370
|
-
|
2371
|
-
/* 0x21 to 0x7E */
|
2372
|
-
if ((unsigned) (byte - 0x21) <= (0x7E - 0x21)) {
|
2373
|
-
/* Max index == (0x7E - 0x21) * 94 + 0x7E - 0x21 == 8835 */
|
2374
|
-
ctx->codepoint = (iso->lead - 0x21) * 94 + byte - 0x21;
|
2375
|
-
|
2376
|
-
return lxb_encoding_multi_index_jis0208[ctx->codepoint].codepoint;
|
2377
|
-
}
|
2378
|
-
|
2379
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2380
|
-
|
2381
|
-
case LXB_ENCODING_DECODE_2022_JP_ESCAPE_START:
|
2382
|
-
if (byte == 0x24 || byte == 0x28) {
|
2383
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ESCAPE;
|
2384
|
-
iso->lead = byte;
|
2385
|
-
|
2386
|
-
break;
|
2387
|
-
}
|
2388
|
-
|
2389
|
-
(*data)--;
|
2390
|
-
|
2391
|
-
iso->out_flag = false;
|
2392
|
-
iso->state = ctx->u.iso_2022_jp.out_state;
|
2393
|
-
|
2394
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2395
|
-
|
2396
|
-
case LXB_ENCODING_DECODE_2022_JP_ESCAPE:
|
2397
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_UNSET;
|
2398
|
-
|
2399
|
-
if (iso->lead == 0x28) {
|
2400
|
-
if (byte == 0x42) {
|
2401
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ASCII;
|
2402
|
-
}
|
2403
|
-
else if (byte == 0x4A) {
|
2404
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_ROMAN;
|
2405
|
-
}
|
2406
|
-
else if (byte == 0x49) {
|
2407
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_KATAKANA;
|
2408
|
-
}
|
2409
|
-
}
|
2410
|
-
else if (iso->lead == 0x24) {
|
2411
|
-
if (byte == 0x40 || byte == 0x42) {
|
2412
|
-
iso->state = LXB_ENCODING_DECODE_2022_JP_LEAD;
|
2413
|
-
}
|
2414
|
-
}
|
2415
|
-
|
2416
|
-
if (iso->state == LXB_ENCODING_DECODE_2022_JP_UNSET) {
|
2417
|
-
iso->prepand = iso->lead;
|
2418
|
-
iso->lead = 0x00;
|
2419
|
-
|
2420
|
-
(*data)--;
|
2421
|
-
|
2422
|
-
iso->out_flag = false;
|
2423
|
-
iso->state = iso->out_state;
|
2424
|
-
|
2425
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2426
|
-
}
|
2427
|
-
|
2428
|
-
iso->lead = 0x00;
|
2429
|
-
iso->out_state = iso->state;
|
2430
|
-
|
2431
|
-
if (iso->out_flag) {
|
2432
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2433
|
-
}
|
2434
|
-
|
2435
|
-
iso->out_flag = true;
|
2436
|
-
|
2437
|
-
break;
|
2438
|
-
}
|
2439
|
-
}
|
2440
|
-
while (*data < end);
|
2441
|
-
|
2442
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2443
|
-
}
|
2444
|
-
|
2445
|
-
lxb_codepoint_t
|
2446
|
-
lxb_encoding_decode_iso_8859_10_single(lxb_encoding_decode_t *ctx,
|
2447
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2448
|
-
{
|
2449
|
-
if (**data < 0x80) {
|
2450
|
-
return *(*data)++;
|
2451
|
-
}
|
2452
|
-
|
2453
|
-
return lxb_encoding_single_index_iso_8859_10[*(*data)++ - 0x80].codepoint;
|
2454
|
-
}
|
2455
|
-
|
2456
|
-
lxb_codepoint_t
|
2457
|
-
lxb_encoding_decode_iso_8859_13_single(lxb_encoding_decode_t *ctx,
|
2458
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2459
|
-
{
|
2460
|
-
if (**data < 0x80) {
|
2461
|
-
return *(*data)++;
|
2462
|
-
}
|
2463
|
-
|
2464
|
-
return lxb_encoding_single_index_iso_8859_13[*(*data)++ - 0x80].codepoint;
|
2465
|
-
}
|
2466
|
-
|
2467
|
-
lxb_codepoint_t
|
2468
|
-
lxb_encoding_decode_iso_8859_14_single(lxb_encoding_decode_t *ctx,
|
2469
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2470
|
-
{
|
2471
|
-
if (**data < 0x80) {
|
2472
|
-
return *(*data)++;
|
2473
|
-
}
|
2474
|
-
|
2475
|
-
return lxb_encoding_single_index_iso_8859_14[*(*data)++ - 0x80].codepoint;
|
2476
|
-
}
|
2477
|
-
|
2478
|
-
lxb_codepoint_t
|
2479
|
-
lxb_encoding_decode_iso_8859_15_single(lxb_encoding_decode_t *ctx,
|
2480
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2481
|
-
{
|
2482
|
-
if (**data < 0x80) {
|
2483
|
-
return *(*data)++;
|
2484
|
-
}
|
2485
|
-
|
2486
|
-
return lxb_encoding_single_index_iso_8859_15[*(*data)++ - 0x80].codepoint;
|
2487
|
-
}
|
2488
|
-
|
2489
|
-
lxb_codepoint_t
|
2490
|
-
lxb_encoding_decode_iso_8859_16_single(lxb_encoding_decode_t *ctx,
|
2491
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2492
|
-
{
|
2493
|
-
if (**data < 0x80) {
|
2494
|
-
return *(*data)++;
|
2495
|
-
}
|
2496
|
-
|
2497
|
-
return lxb_encoding_single_index_iso_8859_16[*(*data)++ - 0x80].codepoint;
|
2498
|
-
}
|
2499
|
-
|
2500
|
-
lxb_codepoint_t
|
2501
|
-
lxb_encoding_decode_iso_8859_2_single(lxb_encoding_decode_t *ctx,
|
2502
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2503
|
-
{
|
2504
|
-
if (**data < 0x80) {
|
2505
|
-
return *(*data)++;
|
2506
|
-
}
|
2507
|
-
|
2508
|
-
return lxb_encoding_single_index_iso_8859_2[*(*data)++ - 0x80].codepoint;
|
2509
|
-
}
|
2510
|
-
|
2511
|
-
lxb_codepoint_t
|
2512
|
-
lxb_encoding_decode_iso_8859_3_single(lxb_encoding_decode_t *ctx,
|
2513
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2514
|
-
{
|
2515
|
-
if (**data < 0x80) {
|
2516
|
-
return *(*data)++;
|
2517
|
-
}
|
2518
|
-
|
2519
|
-
return lxb_encoding_single_index_iso_8859_3[*(*data)++ - 0x80].codepoint;
|
2520
|
-
}
|
2521
|
-
|
2522
|
-
lxb_codepoint_t
|
2523
|
-
lxb_encoding_decode_iso_8859_4_single(lxb_encoding_decode_t *ctx,
|
2524
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2525
|
-
{
|
2526
|
-
if (**data < 0x80) {
|
2527
|
-
return *(*data)++;
|
2528
|
-
}
|
2529
|
-
|
2530
|
-
return lxb_encoding_single_index_iso_8859_4[*(*data)++ - 0x80].codepoint;
|
2531
|
-
}
|
2532
|
-
|
2533
|
-
lxb_codepoint_t
|
2534
|
-
lxb_encoding_decode_iso_8859_5_single(lxb_encoding_decode_t *ctx,
|
2535
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2536
|
-
{
|
2537
|
-
if (**data < 0x80) {
|
2538
|
-
return *(*data)++;
|
2539
|
-
}
|
2540
|
-
|
2541
|
-
return lxb_encoding_single_index_iso_8859_5[*(*data)++ - 0x80].codepoint;
|
2542
|
-
}
|
2543
|
-
|
2544
|
-
lxb_codepoint_t
|
2545
|
-
lxb_encoding_decode_iso_8859_6_single(lxb_encoding_decode_t *ctx,
|
2546
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2547
|
-
{
|
2548
|
-
if (**data < 0x80) {
|
2549
|
-
return *(*data)++;
|
2550
|
-
}
|
2551
|
-
|
2552
|
-
return lxb_encoding_single_index_iso_8859_6[*(*data)++ - 0x80].codepoint;
|
2553
|
-
}
|
2554
|
-
|
2555
|
-
lxb_codepoint_t
|
2556
|
-
lxb_encoding_decode_iso_8859_7_single(lxb_encoding_decode_t *ctx,
|
2557
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2558
|
-
{
|
2559
|
-
if (**data < 0x80) {
|
2560
|
-
return *(*data)++;
|
2561
|
-
}
|
2562
|
-
|
2563
|
-
return lxb_encoding_single_index_iso_8859_7[*(*data)++ - 0x80].codepoint;
|
2564
|
-
}
|
2565
|
-
|
2566
|
-
lxb_codepoint_t
|
2567
|
-
lxb_encoding_decode_iso_8859_8_single(lxb_encoding_decode_t *ctx,
|
2568
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2569
|
-
{
|
2570
|
-
if (**data < 0x80) {
|
2571
|
-
return *(*data)++;
|
2572
|
-
}
|
2573
|
-
|
2574
|
-
return lxb_encoding_single_index_iso_8859_8[*(*data)++ - 0x80].codepoint;
|
2575
|
-
}
|
2576
|
-
|
2577
|
-
lxb_codepoint_t
|
2578
|
-
lxb_encoding_decode_iso_8859_8_i_single(lxb_encoding_decode_t *ctx,
|
2579
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2580
|
-
{
|
2581
|
-
if (**data < 0x80) {
|
2582
|
-
return *(*data)++;
|
2583
|
-
}
|
2584
|
-
|
2585
|
-
return lxb_encoding_single_index_iso_8859_8[*(*data)++ - 0x80].codepoint;
|
2586
|
-
}
|
2587
|
-
|
2588
|
-
lxb_codepoint_t
|
2589
|
-
lxb_encoding_decode_koi8_r_single(lxb_encoding_decode_t *ctx,
|
2590
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2591
|
-
{
|
2592
|
-
if (**data < 0x80) {
|
2593
|
-
return *(*data)++;
|
2594
|
-
}
|
2595
|
-
|
2596
|
-
return lxb_encoding_single_index_koi8_r[*(*data)++ - 0x80].codepoint;
|
2597
|
-
}
|
2598
|
-
|
2599
|
-
lxb_codepoint_t
|
2600
|
-
lxb_encoding_decode_koi8_u_single(lxb_encoding_decode_t *ctx,
|
2601
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2602
|
-
{
|
2603
|
-
if (**data < 0x80) {
|
2604
|
-
return *(*data)++;
|
2605
|
-
}
|
2606
|
-
|
2607
|
-
return lxb_encoding_single_index_koi8_u[*(*data)++ - 0x80].codepoint;
|
2608
|
-
}
|
2609
|
-
|
2610
|
-
lxb_codepoint_t
|
2611
|
-
lxb_encoding_decode_shift_jis_single(lxb_encoding_decode_t *ctx,
|
2612
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2613
|
-
{
|
2614
|
-
lxb_char_t byte, lead;
|
2615
|
-
|
2616
|
-
if (ctx->u.lead != 0x00) {
|
2617
|
-
lead = (lxb_char_t) ctx->u.lead;
|
2618
|
-
ctx->u.lead = 0x00;
|
2619
|
-
|
2620
|
-
goto lead_state;
|
2621
|
-
}
|
2622
|
-
|
2623
|
-
lead = *(*data)++;
|
2624
|
-
|
2625
|
-
if (lead <= 0x80) {
|
2626
|
-
return lead;
|
2627
|
-
}
|
2628
|
-
|
2629
|
-
if ((unsigned) (lead - 0xA1) <= (0xDF - 0xA1)) {
|
2630
|
-
return 0xFF61 - 0xA1 + lead;
|
2631
|
-
}
|
2632
|
-
|
2633
|
-
if ((unsigned) (lead - 0x81) > (0x9F - 0x81)
|
2634
|
-
&& lead != 0xE0 && lead != 0xFC)
|
2635
|
-
{
|
2636
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2637
|
-
}
|
2638
|
-
|
2639
|
-
if (*data >= end) {
|
2640
|
-
ctx->u.lead = lead;
|
2641
|
-
|
2642
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2643
|
-
}
|
2644
|
-
|
2645
|
-
lead_state:
|
2646
|
-
|
2647
|
-
byte = *(*data)++;
|
2648
|
-
|
2649
|
-
if (byte < 0x7F) {
|
2650
|
-
ctx->codepoint = 0x40;
|
2651
|
-
}
|
2652
|
-
else {
|
2653
|
-
ctx->codepoint = 0x41;
|
2654
|
-
}
|
2655
|
-
|
2656
|
-
if (lead < 0xA0) {
|
2657
|
-
ctx->second_codepoint = 0x81;
|
2658
|
-
}
|
2659
|
-
else {
|
2660
|
-
ctx->second_codepoint = 0xC1;
|
2661
|
-
}
|
2662
|
-
|
2663
|
-
if ((unsigned) (byte - 0x40) <= (0x7E - 0x40)
|
2664
|
-
|| (unsigned) (byte - 0x80) <= (0xFC - 0x80))
|
2665
|
-
{
|
2666
|
-
/* Max index == (0xFC - 0xC1) * 188 + 0xFC - 0x41 = 11279 */
|
2667
|
-
ctx->codepoint = (lead - ctx->second_codepoint) * 188
|
2668
|
-
+ byte - ctx->codepoint;
|
2669
|
-
|
2670
|
-
if (ctx->codepoint >= (sizeof(lxb_encoding_multi_index_jis0208)
|
2671
|
-
/ sizeof(lxb_encoding_multi_index_t)))
|
2672
|
-
{
|
2673
|
-
goto failed;
|
2674
|
-
}
|
2675
|
-
|
2676
|
-
if ((unsigned) (ctx->codepoint - 8836) <= (10715 - 8836)) {
|
2677
|
-
return 0xE000 - 8836 + ctx->codepoint;
|
2678
|
-
}
|
2679
|
-
|
2680
|
-
ctx->codepoint = lxb_encoding_multi_index_jis0208[ctx->codepoint].codepoint;
|
2681
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
2682
|
-
goto failed;
|
2683
|
-
}
|
2684
|
-
|
2685
|
-
return ctx->codepoint;
|
2686
|
-
}
|
2687
|
-
|
2688
|
-
failed:
|
2689
|
-
|
2690
|
-
if (byte < 0x80) {
|
2691
|
-
(*data)--;
|
2692
|
-
}
|
2693
|
-
|
2694
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2695
|
-
}
|
2696
|
-
|
2697
|
-
lxb_inline lxb_codepoint_t
|
2698
|
-
lxb_encoding_decode_utf_16_single(lxb_encoding_decode_t *ctx, bool is_be,
|
2699
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2700
|
-
{
|
2701
|
-
unsigned lead;
|
2702
|
-
lxb_codepoint_t unit;
|
2703
|
-
|
2704
|
-
if (ctx->u.lead != 0x00) {
|
2705
|
-
lead = ctx->u.lead - 0x01;
|
2706
|
-
ctx->u.lead = 0x00;
|
2707
|
-
|
2708
|
-
goto lead_state;
|
2709
|
-
}
|
2710
|
-
|
2711
|
-
pair_state:
|
2712
|
-
|
2713
|
-
lead = *(*data)++;
|
2714
|
-
|
2715
|
-
if (*data >= end) {
|
2716
|
-
ctx->u.lead = lead + 0x01;
|
2717
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2718
|
-
}
|
2719
|
-
|
2720
|
-
lead_state:
|
2721
|
-
|
2722
|
-
/* For UTF-16BE or UTF-16LE */
|
2723
|
-
if (is_be) {
|
2724
|
-
unit = (lead << 8) + *(*data)++;
|
2725
|
-
}
|
2726
|
-
else {
|
2727
|
-
unit = (*(*data)++ << 8) + lead;
|
2728
|
-
}
|
2729
|
-
|
2730
|
-
if (ctx->second_codepoint != 0x00) {
|
2731
|
-
if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
|
2732
|
-
ctx->codepoint = 0x10000 + ((ctx->second_codepoint - 0xD800) << 10)
|
2733
|
-
+ (unit - 0xDC00);
|
2734
|
-
|
2735
|
-
ctx->second_codepoint = 0x00;
|
2736
|
-
return ctx->codepoint;
|
2737
|
-
}
|
2738
|
-
|
2739
|
-
(*data)--;
|
2740
|
-
|
2741
|
-
ctx->u.lead = lead + 0x01;
|
2742
|
-
ctx->second_codepoint = 0x00;
|
2743
|
-
|
2744
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2745
|
-
}
|
2746
|
-
|
2747
|
-
/* Surrogate pair */
|
2748
|
-
if ((unsigned) (unit - 0xD800) <= (0xDFFF - 0xD800)) {
|
2749
|
-
if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
|
2750
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2751
|
-
}
|
2752
|
-
|
2753
|
-
ctx->second_codepoint = unit;
|
2754
|
-
|
2755
|
-
if (*data >= end) {
|
2756
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2757
|
-
}
|
2758
|
-
|
2759
|
-
goto pair_state;
|
2760
|
-
}
|
2761
|
-
|
2762
|
-
return unit;
|
2763
|
-
}
|
2764
|
-
|
2765
|
-
lxb_codepoint_t
|
2766
|
-
lxb_encoding_decode_utf_16be_single(lxb_encoding_decode_t *ctx,
|
2767
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2768
|
-
{
|
2769
|
-
return lxb_encoding_decode_utf_16_single(ctx, true, data, end);
|
2770
|
-
}
|
2771
|
-
|
2772
|
-
lxb_codepoint_t
|
2773
|
-
lxb_encoding_decode_utf_16le_single(lxb_encoding_decode_t *ctx,
|
2774
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2775
|
-
{
|
2776
|
-
return lxb_encoding_decode_utf_16_single(ctx, false, data, end);
|
2777
|
-
}
|
2778
|
-
|
2779
|
-
lxb_codepoint_t
|
2780
|
-
lxb_encoding_decode_utf_8_single(lxb_encoding_decode_t *ctx,
|
2781
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2782
|
-
{
|
2783
|
-
unsigned needed;
|
2784
|
-
lxb_char_t ch;
|
2785
|
-
const lxb_char_t *p;
|
2786
|
-
|
2787
|
-
if (ctx->u.utf_8.need != 0) {
|
2788
|
-
needed = ctx->u.utf_8.need;
|
2789
|
-
ctx->u.utf_8.need = 0;
|
2790
|
-
|
2791
|
-
if (ctx->u.utf_8.lower != 0x00) {
|
2792
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SINGLE(ctx->u.utf_8.lower,
|
2793
|
-
ctx->u.utf_8.upper);
|
2794
|
-
ctx->u.utf_8.lower = 0x00;
|
2795
|
-
}
|
2796
|
-
|
2797
|
-
goto decode;
|
2798
|
-
}
|
2799
|
-
|
2800
|
-
ch = *(*data)++;
|
2801
|
-
|
2802
|
-
if (ch < 0x80) {
|
2803
|
-
return ch;
|
2804
|
-
}
|
2805
|
-
else if (ch <= 0xDF) {
|
2806
|
-
if (ch < 0xC2) {
|
2807
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2808
|
-
}
|
2809
|
-
|
2810
|
-
needed = 1;
|
2811
|
-
ctx->codepoint = ch & 0x1F;
|
2812
|
-
}
|
2813
|
-
else if (ch < 0xF0) {
|
2814
|
-
needed = 2;
|
2815
|
-
ctx->codepoint = ch & 0x0F;
|
2816
|
-
|
2817
|
-
if (*data == end) {
|
2818
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET_SINGLE(0xE0, 0xED,
|
2819
|
-
0xA0, 0x9F);
|
2820
|
-
goto next;
|
2821
|
-
}
|
2822
|
-
|
2823
|
-
if (ch == 0xE0) {
|
2824
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SINGLE(0xA0, 0xBF);
|
2825
|
-
}
|
2826
|
-
else if (ch == 0xED) {
|
2827
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SINGLE(0x80, 0x9F);
|
2828
|
-
}
|
2829
|
-
}
|
2830
|
-
else if (ch < 0xF5) {
|
2831
|
-
needed = 3;
|
2832
|
-
ctx->codepoint = ch & 0x07;
|
2833
|
-
|
2834
|
-
if (*data == end) {
|
2835
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET_SINGLE(0xF0, 0xF4,
|
2836
|
-
0x90, 0x8F);
|
2837
|
-
|
2838
|
-
goto next;
|
2839
|
-
}
|
2840
|
-
|
2841
|
-
if (ch == 0xF0) {
|
2842
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SINGLE(0x90, 0xBF);
|
2843
|
-
}
|
2844
|
-
else if (ch == 0xF4) {
|
2845
|
-
LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SINGLE(0x80, 0x8F);
|
2846
|
-
}
|
2847
|
-
}
|
2848
|
-
else {
|
2849
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2850
|
-
}
|
2851
|
-
|
2852
|
-
decode:
|
2853
|
-
|
2854
|
-
for (p = *data; p < end; p++) {
|
2855
|
-
ch = *p;
|
2856
|
-
|
2857
|
-
if (ch < 0x80 || ch > 0xBF) {
|
2858
|
-
*data = p;
|
2859
|
-
|
2860
|
-
goto failed;
|
2861
|
-
}
|
2862
|
-
|
2863
|
-
ctx->codepoint = (ctx->codepoint << 6) | (ch & 0x3F);
|
2864
|
-
|
2865
|
-
if (--needed == 0) {
|
2866
|
-
*data = p + 1;
|
2867
|
-
|
2868
|
-
return ctx->codepoint;
|
2869
|
-
}
|
2870
|
-
}
|
2871
|
-
|
2872
|
-
*data = p;
|
2873
|
-
|
2874
|
-
next:
|
2875
|
-
|
2876
|
-
ctx->u.utf_8.need = needed;
|
2877
|
-
|
2878
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2879
|
-
|
2880
|
-
failed:
|
2881
|
-
|
2882
|
-
ctx->u.utf_8.lower = 0x00;
|
2883
|
-
ctx->u.utf_8.need = 0;
|
2884
|
-
|
2885
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2886
|
-
}
|
2887
|
-
|
2888
|
-
lxb_codepoint_t
|
2889
|
-
lxb_encoding_decode_gb18030_single(lxb_encoding_decode_t *ctx,
|
2890
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
2891
|
-
{
|
2892
|
-
uint32_t pointer;
|
2893
|
-
lxb_char_t first, second, third, offset;
|
2894
|
-
|
2895
|
-
/* Make compiler happy */
|
2896
|
-
second = 0x00;
|
2897
|
-
|
2898
|
-
if (ctx->u.gb18030.first != 0) {
|
2899
|
-
if (ctx->u.gb18030.third != 0x00) {
|
2900
|
-
first = ctx->u.gb18030.first;
|
2901
|
-
second = ctx->u.gb18030.second;
|
2902
|
-
third = ctx->u.gb18030.third;
|
2903
|
-
|
2904
|
-
memset(&ctx->u.gb18030, 0, sizeof(lxb_encoding_ctx_gb18030_t));
|
2905
|
-
|
2906
|
-
if (ctx->prepend) {
|
2907
|
-
/* The first is always < 0x80 */
|
2908
|
-
ctx->u.gb18030.first = third;
|
2909
|
-
|
2910
|
-
return second;
|
2911
|
-
}
|
2912
|
-
|
2913
|
-
goto third_state;
|
2914
|
-
}
|
2915
|
-
else if (ctx->u.gb18030.second != 0x00) {
|
2916
|
-
first = ctx->u.gb18030.first;
|
2917
|
-
second = ctx->u.gb18030.second;
|
2918
|
-
|
2919
|
-
memset(&ctx->u.gb18030, 0, sizeof(lxb_encoding_ctx_gb18030_t));
|
2920
|
-
|
2921
|
-
goto second_state;
|
2922
|
-
}
|
2923
|
-
|
2924
|
-
first = ctx->u.gb18030.first;
|
2925
|
-
ctx->u.gb18030.first = 0x00;
|
2926
|
-
|
2927
|
-
if (ctx->prepend) {
|
2928
|
-
ctx->prepend = false;
|
2929
|
-
goto prepend_first;
|
2930
|
-
}
|
2931
|
-
|
2932
|
-
goto first_state;
|
2933
|
-
}
|
2934
|
-
|
2935
|
-
first = *(*data)++;
|
2936
|
-
|
2937
|
-
prepend_first:
|
2938
|
-
|
2939
|
-
if (first < 0x80) {
|
2940
|
-
return first;
|
2941
|
-
}
|
2942
|
-
|
2943
|
-
if (first == 0x80) {
|
2944
|
-
return 0x20AC;
|
2945
|
-
}
|
2946
|
-
|
2947
|
-
/* Range 0x81 to 0xFE, inclusive */
|
2948
|
-
if ((unsigned) (first - 0x81) > (0xFE - 0x81)) {
|
2949
|
-
return LXB_ENCODING_DECODE_ERROR;
|
2950
|
-
}
|
2951
|
-
|
2952
|
-
if (*data == end) {
|
2953
|
-
ctx->u.gb18030.first = first;
|
2954
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2955
|
-
}
|
2956
|
-
|
2957
|
-
/* First */
|
2958
|
-
first_state:
|
2959
|
-
|
2960
|
-
second = *(*data)++;
|
2961
|
-
|
2962
|
-
/* Range 0x30 to 0x39, inclusive */
|
2963
|
-
if ((unsigned) (second - 0x30) > (0x39 - 0x30)) {
|
2964
|
-
offset = (second < 0x7F) ? 0x40 : 0x41;
|
2965
|
-
|
2966
|
-
/* Range 0x40 to 0x7E, inclusive, or 0x80 to 0xFE, inclusive */
|
2967
|
-
if ((unsigned) (second - 0x40) <= (0x7E - 0x40)
|
2968
|
-
|| (unsigned) (second - 0x80) <= (0xFE - 0x80))
|
2969
|
-
{
|
2970
|
-
pointer = (first - 0x81) * 190 + (second - offset);
|
2971
|
-
}
|
2972
|
-
else {
|
2973
|
-
goto failed;
|
2974
|
-
}
|
2975
|
-
|
2976
|
-
/* Max pointer value == (0xFE - 0x81) * 190 + (0xFE - 0x41) == 23939 */
|
2977
|
-
ctx->codepoint = lxb_encoding_multi_index_gb18030[pointer].codepoint;
|
2978
|
-
if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) {
|
2979
|
-
goto failed;
|
2980
|
-
}
|
2981
|
-
|
2982
|
-
return ctx->codepoint;
|
2983
|
-
}
|
2984
|
-
|
2985
|
-
if (*data == end) {
|
2986
|
-
ctx->u.gb18030.first = first;
|
2987
|
-
ctx->u.gb18030.second = second;
|
2988
|
-
|
2989
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
2990
|
-
}
|
2991
|
-
|
2992
|
-
/* Second */
|
2993
|
-
second_state:
|
2994
|
-
|
2995
|
-
third = *(*data)++;
|
2996
|
-
|
2997
|
-
/* Range 0x81 to 0xFE, inclusive */
|
2998
|
-
if ((unsigned) (third - 0x81) > (0xFE - 0x81)) {
|
2999
|
-
(*data)--;
|
3000
|
-
|
3001
|
-
ctx->prepend = true;
|
3002
|
-
ctx->u.gb18030.first = second;
|
3003
|
-
|
3004
|
-
return LXB_ENCODING_DECODE_ERROR;
|
3005
|
-
}
|
3006
|
-
|
3007
|
-
if (*data == end) {
|
3008
|
-
ctx->u.gb18030.first = first;
|
3009
|
-
ctx->u.gb18030.second = second;
|
3010
|
-
ctx->u.gb18030.third = third;
|
3011
|
-
|
3012
|
-
return LXB_ENCODING_DECODE_CONTINUE;
|
3013
|
-
}
|
3014
|
-
|
3015
|
-
/* Third */
|
3016
|
-
third_state:
|
3017
|
-
|
3018
|
-
/* Range 0x30 to 0x39, inclusive */
|
3019
|
-
if ((unsigned) (**data - 0x30) > (0x39 - 0x30)) {
|
3020
|
-
ctx->prepend = true;
|
3021
|
-
|
3022
|
-
/* First is a fake for trigger */
|
3023
|
-
ctx->u.gb18030.first = 0x01;
|
3024
|
-
ctx->u.gb18030.second = second;
|
3025
|
-
ctx->u.gb18030.third = third;
|
3026
|
-
|
3027
|
-
return LXB_ENCODING_DECODE_ERROR;
|
3028
|
-
}
|
3029
|
-
|
3030
|
-
pointer = ((first - 0x81) * (10 * 126 * 10))
|
3031
|
-
+ ((second - 0x30) * (10 * 126))
|
3032
|
-
+ ((third - 0x81) * 10) + (*(*data)++) - 0x30;
|
3033
|
-
|
3034
|
-
return lxb_encoding_decode_gb18030_range(pointer);
|
3035
|
-
|
3036
|
-
failed:
|
3037
|
-
|
3038
|
-
if (second < 0x80) {
|
3039
|
-
(*data)--;
|
3040
|
-
}
|
3041
|
-
|
3042
|
-
return LXB_ENCODING_DECODE_ERROR;
|
3043
|
-
}
|
3044
|
-
|
3045
|
-
lxb_codepoint_t
|
3046
|
-
lxb_encoding_decode_macintosh_single(lxb_encoding_decode_t *ctx,
|
3047
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3048
|
-
{
|
3049
|
-
if (**data < 0x80) {
|
3050
|
-
return *(*data)++;
|
3051
|
-
}
|
3052
|
-
|
3053
|
-
return lxb_encoding_single_index_macintosh[*(*data)++ - 0x80].codepoint;
|
3054
|
-
}
|
3055
|
-
|
3056
|
-
lxb_codepoint_t
|
3057
|
-
lxb_encoding_decode_replacement_single(lxb_encoding_decode_t *ctx,
|
3058
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3059
|
-
{
|
3060
|
-
return LXB_ENCODING_DECODE_ERROR;
|
3061
|
-
}
|
3062
|
-
|
3063
|
-
lxb_codepoint_t
|
3064
|
-
lxb_encoding_decode_windows_1250_single(lxb_encoding_decode_t *ctx,
|
3065
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3066
|
-
{
|
3067
|
-
if (**data < 0x80) {
|
3068
|
-
return *(*data)++;
|
3069
|
-
}
|
3070
|
-
|
3071
|
-
return lxb_encoding_single_index_windows_1250[*(*data)++ - 0x80].codepoint;
|
3072
|
-
}
|
3073
|
-
|
3074
|
-
lxb_codepoint_t
|
3075
|
-
lxb_encoding_decode_windows_1251_single(lxb_encoding_decode_t *ctx,
|
3076
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3077
|
-
{
|
3078
|
-
if (**data < 0x80) {
|
3079
|
-
return *(*data)++;
|
3080
|
-
}
|
3081
|
-
|
3082
|
-
return lxb_encoding_single_index_windows_1251[*(*data)++ - 0x80].codepoint;
|
3083
|
-
}
|
3084
|
-
|
3085
|
-
lxb_codepoint_t
|
3086
|
-
lxb_encoding_decode_windows_1252_single(lxb_encoding_decode_t *ctx,
|
3087
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3088
|
-
{
|
3089
|
-
if (**data < 0x80) {
|
3090
|
-
return *(*data)++;
|
3091
|
-
}
|
3092
|
-
|
3093
|
-
return lxb_encoding_single_index_windows_1252[*(*data)++ - 0x80].codepoint;
|
3094
|
-
}
|
3095
|
-
|
3096
|
-
lxb_codepoint_t
|
3097
|
-
lxb_encoding_decode_windows_1253_single(lxb_encoding_decode_t *ctx,
|
3098
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3099
|
-
{
|
3100
|
-
if (**data < 0x80) {
|
3101
|
-
return *(*data)++;
|
3102
|
-
}
|
3103
|
-
|
3104
|
-
return lxb_encoding_single_index_windows_1253[*(*data)++ - 0x80].codepoint;
|
3105
|
-
}
|
3106
|
-
|
3107
|
-
lxb_codepoint_t
|
3108
|
-
lxb_encoding_decode_windows_1254_single(lxb_encoding_decode_t *ctx,
|
3109
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3110
|
-
{
|
3111
|
-
if (**data < 0x80) {
|
3112
|
-
return *(*data)++;
|
3113
|
-
}
|
3114
|
-
|
3115
|
-
return lxb_encoding_single_index_windows_1254[*(*data)++ - 0x80].codepoint;
|
3116
|
-
}
|
3117
|
-
|
3118
|
-
lxb_codepoint_t
|
3119
|
-
lxb_encoding_decode_windows_1255_single(lxb_encoding_decode_t *ctx,
|
3120
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3121
|
-
{
|
3122
|
-
if (**data < 0x80) {
|
3123
|
-
return *(*data)++;
|
3124
|
-
}
|
3125
|
-
|
3126
|
-
return lxb_encoding_single_index_windows_1255[*(*data)++ - 0x80].codepoint;
|
3127
|
-
}
|
3128
|
-
|
3129
|
-
lxb_codepoint_t
|
3130
|
-
lxb_encoding_decode_windows_1256_single(lxb_encoding_decode_t *ctx,
|
3131
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3132
|
-
{
|
3133
|
-
if (**data < 0x80) {
|
3134
|
-
return *(*data)++;
|
3135
|
-
}
|
3136
|
-
|
3137
|
-
return lxb_encoding_single_index_windows_1256[*(*data)++ - 0x80].codepoint;
|
3138
|
-
}
|
3139
|
-
|
3140
|
-
lxb_codepoint_t
|
3141
|
-
lxb_encoding_decode_windows_1257_single(lxb_encoding_decode_t *ctx,
|
3142
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3143
|
-
{
|
3144
|
-
if (**data < 0x80) {
|
3145
|
-
return *(*data)++;
|
3146
|
-
}
|
3147
|
-
|
3148
|
-
return lxb_encoding_single_index_windows_1257[*(*data)++ - 0x80].codepoint;
|
3149
|
-
}
|
3150
|
-
|
3151
|
-
lxb_codepoint_t
|
3152
|
-
lxb_encoding_decode_windows_1258_single(lxb_encoding_decode_t *ctx,
|
3153
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3154
|
-
{
|
3155
|
-
if (**data < 0x80) {
|
3156
|
-
return *(*data)++;
|
3157
|
-
}
|
3158
|
-
|
3159
|
-
return lxb_encoding_single_index_windows_1258[*(*data)++ - 0x80].codepoint;
|
3160
|
-
}
|
3161
|
-
|
3162
|
-
lxb_codepoint_t
|
3163
|
-
lxb_encoding_decode_windows_874_single(lxb_encoding_decode_t *ctx,
|
3164
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3165
|
-
{
|
3166
|
-
if (**data < 0x80) {
|
3167
|
-
return *(*data)++;
|
3168
|
-
}
|
3169
|
-
|
3170
|
-
return lxb_encoding_single_index_windows_874[*(*data)++ - 0x80].codepoint;
|
3171
|
-
}
|
3172
|
-
|
3173
|
-
lxb_codepoint_t
|
3174
|
-
lxb_encoding_decode_x_mac_cyrillic_single(lxb_encoding_decode_t *ctx,
|
3175
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3176
|
-
{
|
3177
|
-
if (**data < 0x80) {
|
3178
|
-
return *(*data)++;
|
3179
|
-
}
|
3180
|
-
|
3181
|
-
return lxb_encoding_single_index_x_mac_cyrillic[*(*data)++ - 0x80].codepoint;
|
3182
|
-
}
|
3183
|
-
|
3184
|
-
lxb_codepoint_t
|
3185
|
-
lxb_encoding_decode_x_user_defined_single(lxb_encoding_decode_t *ctx,
|
3186
|
-
const lxb_char_t **data, const lxb_char_t *end)
|
3187
|
-
{
|
3188
|
-
if (**data < 0x80) {
|
3189
|
-
return *(*data)++;
|
3190
|
-
}
|
3191
|
-
|
3192
|
-
return 0xF780 + (*(*data)++) - 0x80;
|
3193
|
-
}
|