nokolexbor 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/nl_attribute.c +201 -0
- data/ext/nokolexbor/nl_cdata.c +8 -0
- data/ext/nokolexbor/nl_comment.c +6 -0
- data/ext/nokolexbor/nl_document.c +53 -7
- data/ext/nokolexbor/nl_document_fragment.c +9 -0
- data/ext/nokolexbor/nl_error.c +21 -19
- data/ext/nokolexbor/nl_node.c +317 -48
- data/ext/nokolexbor/nl_node_set.c +56 -1
- data/ext/nokolexbor/nl_processing_instruction.c +6 -0
- data/ext/nokolexbor/nl_text.c +6 -0
- data/ext/nokolexbor/nokolexbor.c +1 -0
- data/ext/nokolexbor/nokolexbor.h +2 -0
- data/lib/nokolexbor/document.rb +52 -5
- data/lib/nokolexbor/document_fragment.rb +11 -0
- data/lib/nokolexbor/node.rb +370 -24
- data/lib/nokolexbor/node_set.rb +56 -0
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +0 -1
- metadata +3 -25
- data/lib/nokolexbor/attribute.rb +0 -18
- data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
- data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
- data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
- data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
- data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
- data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
- data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
- data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
- data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
- data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
- data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
- data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
- data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
- data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
- data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
- data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
- data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
- data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
- data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
- data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
@@ -1,1931 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* Copyright (C) 2019 Alexander Borisov
|
3
|
-
*
|
4
|
-
* Author: Alexander Borisov <borisov@lexbor.com>
|
5
|
-
*/
|
6
|
-
|
7
|
-
#include "lexbor/encoding/encode.h"
|
8
|
-
#include "lexbor/encoding/single.h"
|
9
|
-
#include "lexbor/encoding/multi.h"
|
10
|
-
#include "lexbor/encoding/range.h"
|
11
|
-
|
12
|
-
|
13
|
-
#define LXB_ENCODING_ENCODE_APPEND(ctx, cp) \
|
14
|
-
do { \
|
15
|
-
if ((ctx)->buffer_used == (ctx)->buffer_length) { \
|
16
|
-
return LXB_STATUS_SMALL_BUFFER; \
|
17
|
-
} \
|
18
|
-
\
|
19
|
-
(ctx)->buffer_out[(ctx)->buffer_used++] = (lxb_char_t) cp; \
|
20
|
-
} \
|
21
|
-
while (0)
|
22
|
-
|
23
|
-
#define LXB_ENCODING_ENCODE_APPEND_P(ctx, cp) \
|
24
|
-
do { \
|
25
|
-
if ((ctx)->buffer_used == (ctx)->buffer_length) { \
|
26
|
-
*cps = p; \
|
27
|
-
return LXB_STATUS_SMALL_BUFFER; \
|
28
|
-
} \
|
29
|
-
\
|
30
|
-
(ctx)->buffer_out[(ctx)->buffer_used++] = (lxb_char_t) cp; \
|
31
|
-
} \
|
32
|
-
while (0)
|
33
|
-
|
34
|
-
#define LXB_ENCODING_ENCODE_ERROR(ctx) \
|
35
|
-
do { \
|
36
|
-
if (ctx->replace_to == NULL) { \
|
37
|
-
return LXB_STATUS_ERROR; \
|
38
|
-
} \
|
39
|
-
\
|
40
|
-
if ((ctx->buffer_used + ctx->replace_len) > ctx->buffer_length) { \
|
41
|
-
return LXB_STATUS_SMALL_BUFFER; \
|
42
|
-
} \
|
43
|
-
\
|
44
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used], ctx->replace_to, \
|
45
|
-
ctx->replace_len); \
|
46
|
-
\
|
47
|
-
ctx->buffer_used += ctx->replace_len; \
|
48
|
-
} \
|
49
|
-
while (0)
|
50
|
-
|
51
|
-
#define LXB_ENCODING_ENCODE_ERROR_P(ctx) \
|
52
|
-
do { \
|
53
|
-
if (ctx->replace_to == NULL) { \
|
54
|
-
*cps = p; \
|
55
|
-
return LXB_STATUS_ERROR; \
|
56
|
-
} \
|
57
|
-
\
|
58
|
-
if ((ctx->buffer_used + ctx->replace_len) > ctx->buffer_length) { \
|
59
|
-
*cps = p; \
|
60
|
-
return LXB_STATUS_SMALL_BUFFER; \
|
61
|
-
} \
|
62
|
-
\
|
63
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used], ctx->replace_to, \
|
64
|
-
ctx->replace_len); \
|
65
|
-
\
|
66
|
-
ctx->buffer_used += ctx->replace_len; \
|
67
|
-
} \
|
68
|
-
while (0)
|
69
|
-
|
70
|
-
#define LXB_ENCODING_ENCODE_SINGLE_BYTE(table, table_size) \
|
71
|
-
do { \
|
72
|
-
lxb_codepoint_t cp; \
|
73
|
-
const lxb_codepoint_t *p = *cps; \
|
74
|
-
const lexbor_shs_hash_t *hash; \
|
75
|
-
\
|
76
|
-
for (; p < end; p++) { \
|
77
|
-
cp = *p; \
|
78
|
-
\
|
79
|
-
if (cp < 0x80) { \
|
80
|
-
LXB_ENCODING_ENCODE_APPEND_P(ctx, cp); \
|
81
|
-
continue; \
|
82
|
-
} \
|
83
|
-
\
|
84
|
-
hash = lexbor_shs_hash_get_static(table, table_size, cp); \
|
85
|
-
if (hash == NULL) { \
|
86
|
-
LXB_ENCODING_ENCODE_ERROR_P(ctx); \
|
87
|
-
continue; \
|
88
|
-
} \
|
89
|
-
\
|
90
|
-
LXB_ENCODING_ENCODE_APPEND_P(ctx, (uintptr_t) hash->value); \
|
91
|
-
} \
|
92
|
-
\
|
93
|
-
return LXB_STATUS_OK; \
|
94
|
-
} \
|
95
|
-
while (0)
|
96
|
-
|
97
|
-
#define LXB_ENCODING_ENCODE_BYTE_SINGLE(table, table_size) \
|
98
|
-
const lexbor_shs_hash_t *hash; \
|
99
|
-
\
|
100
|
-
if (cp < 0x80) { \
|
101
|
-
*(*data)++ = (lxb_char_t) cp; \
|
102
|
-
return 1; \
|
103
|
-
} \
|
104
|
-
\
|
105
|
-
hash = lexbor_shs_hash_get_static(table, table_size, cp); \
|
106
|
-
if (hash == NULL) { \
|
107
|
-
return LXB_ENCODING_ENCODE_ERROR; \
|
108
|
-
} \
|
109
|
-
\
|
110
|
-
*(*data)++ = (lxb_char_t) (uintptr_t) hash->value; \
|
111
|
-
return 1
|
112
|
-
|
113
|
-
|
114
|
-
lxb_status_t
|
115
|
-
lxb_encoding_encode_default(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
116
|
-
const lxb_codepoint_t *end)
|
117
|
-
{
|
118
|
-
return lxb_encoding_encode_utf_8(ctx, cps, end);
|
119
|
-
}
|
120
|
-
|
121
|
-
lxb_status_t
|
122
|
-
lxb_encoding_encode_auto(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
123
|
-
const lxb_codepoint_t *end)
|
124
|
-
{
|
125
|
-
*cps = end;
|
126
|
-
return LXB_STATUS_ERROR;
|
127
|
-
}
|
128
|
-
|
129
|
-
lxb_status_t
|
130
|
-
lxb_encoding_encode_undefined(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
131
|
-
const lxb_codepoint_t *end)
|
132
|
-
{
|
133
|
-
*cps = end;
|
134
|
-
return LXB_STATUS_ERROR;
|
135
|
-
}
|
136
|
-
|
137
|
-
lxb_status_t
|
138
|
-
lxb_encoding_encode_big5(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
139
|
-
const lxb_codepoint_t *end)
|
140
|
-
{
|
141
|
-
lxb_codepoint_t cp;
|
142
|
-
const lexbor_shs_hash_t *hash;
|
143
|
-
|
144
|
-
for (; *cps < end; (*cps)++) {
|
145
|
-
cp = **cps;
|
146
|
-
|
147
|
-
if (cp < 0x80) {
|
148
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp);
|
149
|
-
continue;
|
150
|
-
}
|
151
|
-
|
152
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_big5,
|
153
|
-
LXB_ENCODING_MULTI_HASH_BIG5_SIZE, cp);
|
154
|
-
if (hash == NULL) {
|
155
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
156
|
-
continue;
|
157
|
-
}
|
158
|
-
|
159
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
160
|
-
return LXB_STATUS_SMALL_BUFFER;
|
161
|
-
}
|
162
|
-
|
163
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = ((uint32_t) (uintptr_t) hash->value) / 157 + 0x81;
|
164
|
-
|
165
|
-
if ((((uint32_t) (uintptr_t) hash->value) % 157) < 0x3F) {
|
166
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x40;
|
167
|
-
}
|
168
|
-
else {
|
169
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x62;
|
170
|
-
}
|
171
|
-
}
|
172
|
-
|
173
|
-
return LXB_STATUS_OK;
|
174
|
-
}
|
175
|
-
|
176
|
-
lxb_status_t
|
177
|
-
lxb_encoding_encode_euc_jp(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
178
|
-
const lxb_codepoint_t *end)
|
179
|
-
{
|
180
|
-
lxb_codepoint_t cp;
|
181
|
-
const lexbor_shs_hash_t *hash;
|
182
|
-
|
183
|
-
for (; *cps < end; (*cps)++) {
|
184
|
-
cp = **cps;
|
185
|
-
|
186
|
-
if (cp < 0x80) {
|
187
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp);
|
188
|
-
continue;
|
189
|
-
}
|
190
|
-
|
191
|
-
if (cp == 0x00A5) {
|
192
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, 0x5C);
|
193
|
-
continue;
|
194
|
-
}
|
195
|
-
|
196
|
-
if (cp == 0x203E) {
|
197
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, 0x7E);
|
198
|
-
continue;
|
199
|
-
}
|
200
|
-
|
201
|
-
if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
|
202
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
203
|
-
return LXB_STATUS_SMALL_BUFFER;
|
204
|
-
}
|
205
|
-
|
206
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = 0x8E;
|
207
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = cp - 0xFF61 + 0xA1;
|
208
|
-
|
209
|
-
continue;
|
210
|
-
}
|
211
|
-
|
212
|
-
if (cp == 0x2212) {
|
213
|
-
cp = 0xFF0D;
|
214
|
-
}
|
215
|
-
|
216
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
|
217
|
-
LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
|
218
|
-
if (hash == NULL) {
|
219
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
220
|
-
continue;
|
221
|
-
}
|
222
|
-
|
223
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
224
|
-
return LXB_STATUS_SMALL_BUFFER;
|
225
|
-
}
|
226
|
-
|
227
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 94 + 0xA1;
|
228
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value % 94 + 0xA1;
|
229
|
-
}
|
230
|
-
|
231
|
-
return LXB_STATUS_OK;
|
232
|
-
}
|
233
|
-
|
234
|
-
lxb_status_t
|
235
|
-
lxb_encoding_encode_euc_kr(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
236
|
-
const lxb_codepoint_t *end)
|
237
|
-
{
|
238
|
-
lxb_codepoint_t cp;
|
239
|
-
const lexbor_shs_hash_t *hash;
|
240
|
-
|
241
|
-
for (; *cps < end; (*cps)++) {
|
242
|
-
cp = **cps;
|
243
|
-
|
244
|
-
if (cp < 0x80) {
|
245
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp);
|
246
|
-
continue;
|
247
|
-
}
|
248
|
-
|
249
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_euc_kr,
|
250
|
-
LXB_ENCODING_MULTI_HASH_EUC_KR_SIZE, cp);
|
251
|
-
if (hash == NULL) {
|
252
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
253
|
-
continue;
|
254
|
-
}
|
255
|
-
|
256
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
257
|
-
return LXB_STATUS_SMALL_BUFFER;
|
258
|
-
}
|
259
|
-
|
260
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
|
261
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value % 190 + 0x41;
|
262
|
-
}
|
263
|
-
|
264
|
-
return LXB_STATUS_OK;
|
265
|
-
}
|
266
|
-
|
267
|
-
lxb_status_t
|
268
|
-
lxb_encoding_encode_gbk(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
269
|
-
const lxb_codepoint_t *end)
|
270
|
-
{
|
271
|
-
lxb_codepoint_t cp;
|
272
|
-
const lexbor_shs_hash_t *hash;
|
273
|
-
|
274
|
-
for (; *cps < end; (*cps)++) {
|
275
|
-
cp = **cps;
|
276
|
-
|
277
|
-
if (cp < 0x80) {
|
278
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp);
|
279
|
-
continue;
|
280
|
-
}
|
281
|
-
|
282
|
-
if (cp == 0xE5E5) {
|
283
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
284
|
-
continue;
|
285
|
-
}
|
286
|
-
|
287
|
-
if (cp == 0x20AC) {
|
288
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, 0x80);
|
289
|
-
continue;
|
290
|
-
}
|
291
|
-
|
292
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
|
293
|
-
LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
|
294
|
-
if (hash == NULL) {
|
295
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
296
|
-
continue;
|
297
|
-
}
|
298
|
-
|
299
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
300
|
-
return LXB_STATUS_SMALL_BUFFER;
|
301
|
-
}
|
302
|
-
|
303
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (uintptr_t) hash->value / 190 + 0x81;
|
304
|
-
|
305
|
-
if (((lxb_char_t) (uintptr_t) hash->value % 190) < 0x3F) {
|
306
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x40;
|
307
|
-
}
|
308
|
-
else {
|
309
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x41;
|
310
|
-
}
|
311
|
-
}
|
312
|
-
|
313
|
-
return LXB_STATUS_OK;
|
314
|
-
}
|
315
|
-
|
316
|
-
lxb_status_t
|
317
|
-
lxb_encoding_encode_ibm866(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
318
|
-
const lxb_codepoint_t *end)
|
319
|
-
{
|
320
|
-
|
321
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_ibm866,
|
322
|
-
LXB_ENCODING_SINGLE_HASH_IBM866_SIZE);
|
323
|
-
}
|
324
|
-
|
325
|
-
lxb_status_t
|
326
|
-
lxb_encoding_encode_iso_2022_jp(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
327
|
-
const lxb_codepoint_t *end)
|
328
|
-
{
|
329
|
-
int8_t size;
|
330
|
-
unsigned state;
|
331
|
-
lxb_codepoint_t cp;
|
332
|
-
const lexbor_shs_hash_t *hash;
|
333
|
-
|
334
|
-
size = 0;
|
335
|
-
state = ctx->state;
|
336
|
-
|
337
|
-
for (; *cps < end; (*cps)++) {
|
338
|
-
cp = **cps;
|
339
|
-
|
340
|
-
begin:
|
341
|
-
|
342
|
-
switch (ctx->state) {
|
343
|
-
case LXB_ENCODING_ENCODE_2022_JP_ASCII:
|
344
|
-
if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
|
345
|
-
goto failed;
|
346
|
-
}
|
347
|
-
|
348
|
-
if (cp < 0x80) {
|
349
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp);
|
350
|
-
continue;
|
351
|
-
}
|
352
|
-
|
353
|
-
if (cp == 0x00A5 || cp == 0x203E) {
|
354
|
-
/*
|
355
|
-
* Do not switch to the ROMAN stage with prepend code point
|
356
|
-
* to stream, add it immediately.
|
357
|
-
*/
|
358
|
-
if ((ctx->buffer_used + 4) > ctx->buffer_length) {
|
359
|
-
goto small_buffer;
|
360
|
-
}
|
361
|
-
|
362
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
|
363
|
-
|
364
|
-
if (cp == 0x00A5) {
|
365
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used],
|
366
|
-
"\x1B\x28\x4A\x5C", 4);
|
367
|
-
ctx->buffer_used += 4;
|
368
|
-
|
369
|
-
continue;
|
370
|
-
}
|
371
|
-
|
372
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used],
|
373
|
-
"\x1B\x28\x4A\x7E", 4);
|
374
|
-
ctx->buffer_used += 4;
|
375
|
-
|
376
|
-
continue;
|
377
|
-
}
|
378
|
-
|
379
|
-
break;
|
380
|
-
|
381
|
-
case LXB_ENCODING_ENCODE_2022_JP_ROMAN:
|
382
|
-
if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
|
383
|
-
goto failed;
|
384
|
-
}
|
385
|
-
|
386
|
-
if (cp < 0x80) {
|
387
|
-
switch (cp) {
|
388
|
-
case 0x005C:
|
389
|
-
case 0x007E:
|
390
|
-
break;
|
391
|
-
|
392
|
-
case 0x00A5:
|
393
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, 0x5C);
|
394
|
-
continue;
|
395
|
-
|
396
|
-
case 0x203E:
|
397
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, 0x7E);
|
398
|
-
continue;
|
399
|
-
|
400
|
-
default:
|
401
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp);
|
402
|
-
continue;
|
403
|
-
}
|
404
|
-
|
405
|
-
/*
|
406
|
-
* Do not switch to the ANSI stage with prepend code point
|
407
|
-
* to stream, add it immediately.
|
408
|
-
*/
|
409
|
-
if ((ctx->buffer_used + 4) > ctx->buffer_length) {
|
410
|
-
goto small_buffer;
|
411
|
-
}
|
412
|
-
|
413
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
|
414
|
-
|
415
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x28\x42", 3);
|
416
|
-
ctx->buffer_used += 3;
|
417
|
-
|
418
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) cp;
|
419
|
-
continue;
|
420
|
-
}
|
421
|
-
|
422
|
-
break;
|
423
|
-
|
424
|
-
case LXB_ENCODING_ENCODE_2022_JP_JIS0208:
|
425
|
-
if (cp < 0x80) {
|
426
|
-
if ((ctx->buffer_used + 4) > ctx->buffer_length) {
|
427
|
-
goto small_buffer;
|
428
|
-
}
|
429
|
-
|
430
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
|
431
|
-
|
432
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x28\x42", 3);
|
433
|
-
ctx->buffer_used += 3;
|
434
|
-
|
435
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) cp;
|
436
|
-
continue;
|
437
|
-
}
|
438
|
-
|
439
|
-
if (cp == 0x00A5 || cp == 0x203E) {
|
440
|
-
if ((ctx->buffer_used + 4) > ctx->buffer_length) {
|
441
|
-
goto small_buffer;
|
442
|
-
}
|
443
|
-
|
444
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
|
445
|
-
|
446
|
-
if (cp == 0x00A5) {
|
447
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used],
|
448
|
-
"\x1B\x28\x4A\x5C", 4);
|
449
|
-
ctx->buffer_used += 4;
|
450
|
-
|
451
|
-
continue;
|
452
|
-
}
|
453
|
-
|
454
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used],
|
455
|
-
"\x1B\x28\x4A\x7E", 4);
|
456
|
-
ctx->buffer_used += 4;
|
457
|
-
|
458
|
-
continue;
|
459
|
-
}
|
460
|
-
|
461
|
-
break;
|
462
|
-
}
|
463
|
-
|
464
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
465
|
-
goto small_buffer;
|
466
|
-
}
|
467
|
-
|
468
|
-
if (cp == 0x2212) {
|
469
|
-
cp = 0xFF0D;
|
470
|
-
}
|
471
|
-
|
472
|
-
if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
|
473
|
-
cp = lxb_encoding_multi_index_iso_2022_jp_katakana[cp - 0xFF61].codepoint;
|
474
|
-
}
|
475
|
-
|
476
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
|
477
|
-
LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
|
478
|
-
if (hash == NULL) {
|
479
|
-
goto failed;
|
480
|
-
}
|
481
|
-
|
482
|
-
if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_JIS0208) {
|
483
|
-
if ((ctx->buffer_used + 3) > ctx->buffer_length) {
|
484
|
-
goto small_buffer;
|
485
|
-
}
|
486
|
-
|
487
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x24\x42", 3);
|
488
|
-
ctx->buffer_used += 3;
|
489
|
-
|
490
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_JIS0208;
|
491
|
-
size += 3;
|
492
|
-
|
493
|
-
goto begin;
|
494
|
-
}
|
495
|
-
|
496
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 94 + 0x21;
|
497
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value % 94 + 0x21;
|
498
|
-
|
499
|
-
continue;
|
500
|
-
|
501
|
-
small_buffer:
|
502
|
-
|
503
|
-
ctx->state = state;
|
504
|
-
ctx->buffer_used -= size;
|
505
|
-
|
506
|
-
return LXB_STATUS_SMALL_BUFFER;
|
507
|
-
|
508
|
-
failed:
|
509
|
-
|
510
|
-
ctx->buffer_used -= size;
|
511
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
512
|
-
}
|
513
|
-
|
514
|
-
return LXB_STATUS_OK;
|
515
|
-
}
|
516
|
-
|
517
|
-
lxb_status_t
|
518
|
-
lxb_encoding_encode_iso_2022_jp_eof(lxb_encoding_encode_t *ctx)
|
519
|
-
{
|
520
|
-
if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_ASCII) {
|
521
|
-
if ((ctx->buffer_used + 3) > ctx->buffer_length) {
|
522
|
-
return LXB_STATUS_SMALL_BUFFER;
|
523
|
-
}
|
524
|
-
|
525
|
-
memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x28\x42", 3);
|
526
|
-
ctx->buffer_used += 3;
|
527
|
-
}
|
528
|
-
|
529
|
-
return LXB_STATUS_OK;
|
530
|
-
}
|
531
|
-
|
532
|
-
lxb_status_t
|
533
|
-
lxb_encoding_encode_iso_8859_10(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
534
|
-
const lxb_codepoint_t *end)
|
535
|
-
{
|
536
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_10,
|
537
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_10_SIZE);
|
538
|
-
}
|
539
|
-
|
540
|
-
lxb_status_t
|
541
|
-
lxb_encoding_encode_iso_8859_13(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
542
|
-
const lxb_codepoint_t *end)
|
543
|
-
{
|
544
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_13,
|
545
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_13_SIZE);
|
546
|
-
}
|
547
|
-
|
548
|
-
lxb_status_t
|
549
|
-
lxb_encoding_encode_iso_8859_14(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
550
|
-
const lxb_codepoint_t *end)
|
551
|
-
{
|
552
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_14,
|
553
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_14_SIZE);
|
554
|
-
}
|
555
|
-
|
556
|
-
lxb_status_t
|
557
|
-
lxb_encoding_encode_iso_8859_15(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
558
|
-
const lxb_codepoint_t *end)
|
559
|
-
{
|
560
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_15,
|
561
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_15_SIZE);
|
562
|
-
}
|
563
|
-
|
564
|
-
lxb_status_t
|
565
|
-
lxb_encoding_encode_iso_8859_16(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
566
|
-
const lxb_codepoint_t *end)
|
567
|
-
{
|
568
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_16,
|
569
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_16_SIZE);
|
570
|
-
}
|
571
|
-
|
572
|
-
lxb_status_t
|
573
|
-
lxb_encoding_encode_iso_8859_2(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
574
|
-
const lxb_codepoint_t *end)
|
575
|
-
{
|
576
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_2,
|
577
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_2_SIZE);
|
578
|
-
}
|
579
|
-
|
580
|
-
lxb_status_t
|
581
|
-
lxb_encoding_encode_iso_8859_3(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
582
|
-
const lxb_codepoint_t *end)
|
583
|
-
{
|
584
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_3,
|
585
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_3_SIZE);
|
586
|
-
}
|
587
|
-
|
588
|
-
lxb_status_t
|
589
|
-
lxb_encoding_encode_iso_8859_4(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
590
|
-
const lxb_codepoint_t *end)
|
591
|
-
{
|
592
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_4,
|
593
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_4_SIZE);
|
594
|
-
}
|
595
|
-
|
596
|
-
lxb_status_t
|
597
|
-
lxb_encoding_encode_iso_8859_5(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
598
|
-
const lxb_codepoint_t *end)
|
599
|
-
{
|
600
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_5,
|
601
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_5_SIZE);
|
602
|
-
}
|
603
|
-
|
604
|
-
lxb_status_t
|
605
|
-
lxb_encoding_encode_iso_8859_6(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
606
|
-
const lxb_codepoint_t *end)
|
607
|
-
{
|
608
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_6,
|
609
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_6_SIZE);
|
610
|
-
}
|
611
|
-
|
612
|
-
lxb_status_t
|
613
|
-
lxb_encoding_encode_iso_8859_7(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
614
|
-
const lxb_codepoint_t *end)
|
615
|
-
{
|
616
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_7,
|
617
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_7_SIZE);
|
618
|
-
}
|
619
|
-
|
620
|
-
lxb_status_t
|
621
|
-
lxb_encoding_encode_iso_8859_8(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
622
|
-
const lxb_codepoint_t *end)
|
623
|
-
{
|
624
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_8,
|
625
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
|
626
|
-
}
|
627
|
-
|
628
|
-
lxb_status_t
|
629
|
-
lxb_encoding_encode_iso_8859_8_i(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
630
|
-
const lxb_codepoint_t *end)
|
631
|
-
{
|
632
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_8,
|
633
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
|
634
|
-
}
|
635
|
-
|
636
|
-
lxb_status_t
|
637
|
-
lxb_encoding_encode_koi8_r(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
638
|
-
const lxb_codepoint_t *end)
|
639
|
-
{
|
640
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_koi8_r,
|
641
|
-
LXB_ENCODING_SINGLE_HASH_KOI8_R_SIZE);
|
642
|
-
}
|
643
|
-
|
644
|
-
lxb_status_t
|
645
|
-
lxb_encoding_encode_koi8_u(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
646
|
-
const lxb_codepoint_t *end)
|
647
|
-
{
|
648
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_koi8_u,
|
649
|
-
LXB_ENCODING_SINGLE_HASH_KOI8_U_SIZE);
|
650
|
-
}
|
651
|
-
|
652
|
-
lxb_inline const lexbor_shs_hash_t *
|
653
|
-
lxb_encoding_encode_shift_jis_index(lxb_codepoint_t cp)
|
654
|
-
{
|
655
|
-
const lexbor_shs_hash_t *entry;
|
656
|
-
|
657
|
-
entry = &lxb_encoding_multi_hash_jis0208[ (cp % LXB_ENCODING_MULTI_HASH_JIS0208_SIZE) + 1 ];
|
658
|
-
|
659
|
-
do {
|
660
|
-
if (entry->key == cp) {
|
661
|
-
if ((unsigned) ((uint32_t) (uintptr_t) entry->value - 8272) > (8835 - 8272)) {
|
662
|
-
return entry;
|
663
|
-
}
|
664
|
-
}
|
665
|
-
|
666
|
-
entry = &lxb_encoding_multi_hash_jis0208[entry->next];
|
667
|
-
}
|
668
|
-
while (entry != lxb_encoding_multi_hash_jis0208);
|
669
|
-
|
670
|
-
return NULL;
|
671
|
-
}
|
672
|
-
|
673
|
-
lxb_status_t
|
674
|
-
lxb_encoding_encode_shift_jis(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
675
|
-
const lxb_codepoint_t *end)
|
676
|
-
{
|
677
|
-
uint32_t lead, trail;
|
678
|
-
lxb_codepoint_t cp;
|
679
|
-
const lexbor_shs_hash_t *hash;
|
680
|
-
|
681
|
-
for (; *cps < end; (*cps)++) {
|
682
|
-
cp = **cps;
|
683
|
-
|
684
|
-
if (cp <= 0x80) {
|
685
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp);
|
686
|
-
continue;
|
687
|
-
}
|
688
|
-
|
689
|
-
if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
|
690
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp - 0xFF61 + 0xA1);
|
691
|
-
continue;
|
692
|
-
}
|
693
|
-
|
694
|
-
switch (cp) {
|
695
|
-
case 0x00A5:
|
696
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, 0x5C);
|
697
|
-
continue;
|
698
|
-
|
699
|
-
case 0x203E:
|
700
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, 0x7E);
|
701
|
-
continue;
|
702
|
-
|
703
|
-
case 0x2212:
|
704
|
-
cp = 0xFF0D;
|
705
|
-
break;
|
706
|
-
}
|
707
|
-
|
708
|
-
hash = lxb_encoding_encode_shift_jis_index(cp);
|
709
|
-
if (hash == NULL) {
|
710
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
711
|
-
continue;
|
712
|
-
}
|
713
|
-
|
714
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
715
|
-
return LXB_STATUS_SMALL_BUFFER;
|
716
|
-
}
|
717
|
-
|
718
|
-
lead = (uint32_t) (uintptr_t) hash->value / 188;
|
719
|
-
trail = (uint32_t) (uintptr_t) hash->value % 188;
|
720
|
-
|
721
|
-
ctx->buffer_out[ctx->buffer_used++ ] = lead + ((lead < 0x1F) ? 0x81 : 0xC1);
|
722
|
-
ctx->buffer_out[ctx->buffer_used++ ] = trail + ((trail < 0x3F) ? 0x40 : 0x41);
|
723
|
-
}
|
724
|
-
|
725
|
-
return LXB_STATUS_OK;
|
726
|
-
}
|
727
|
-
|
728
|
-
lxb_inline void
|
729
|
-
lxb_encoding_encode_utf_16_write(lxb_encoding_encode_t *ctx, bool is_be,
|
730
|
-
lxb_codepoint_t cp)
|
731
|
-
{
|
732
|
-
if (is_be) {
|
733
|
-
ctx->buffer_out[ctx->buffer_used++] = cp >> 8;
|
734
|
-
ctx->buffer_out[ctx->buffer_used++] = cp & 0x00FF;
|
735
|
-
|
736
|
-
return;
|
737
|
-
}
|
738
|
-
|
739
|
-
ctx->buffer_out[ctx->buffer_used++] = cp & 0x00FF;
|
740
|
-
ctx->buffer_out[ctx->buffer_used++] = cp >> 8;
|
741
|
-
}
|
742
|
-
|
743
|
-
lxb_inline int8_t
|
744
|
-
lxb_encoding_encode_utf_16(lxb_encoding_encode_t *ctx, bool is_be,
|
745
|
-
const lxb_codepoint_t **cps, const lxb_codepoint_t *end)
|
746
|
-
{
|
747
|
-
lxb_codepoint_t cp;
|
748
|
-
|
749
|
-
for (; *cps < end; (*cps)++) {
|
750
|
-
cp = **cps;
|
751
|
-
|
752
|
-
if (cp < 0x10000) {
|
753
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
754
|
-
return LXB_STATUS_SMALL_BUFFER;
|
755
|
-
}
|
756
|
-
|
757
|
-
lxb_encoding_encode_utf_16_write(ctx, is_be, cp);
|
758
|
-
|
759
|
-
continue;
|
760
|
-
}
|
761
|
-
|
762
|
-
if ((ctx->buffer_used + 4) > ctx->buffer_length) {
|
763
|
-
return LXB_STATUS_SMALL_BUFFER;
|
764
|
-
}
|
765
|
-
|
766
|
-
cp -= 0x10000;
|
767
|
-
|
768
|
-
lxb_encoding_encode_utf_16_write(ctx, is_be, (0xD800 | (cp >> 0x0A)));
|
769
|
-
lxb_encoding_encode_utf_16_write(ctx, is_be, (0xDC00 | (cp & 0x03FF)));
|
770
|
-
}
|
771
|
-
|
772
|
-
return LXB_STATUS_OK;
|
773
|
-
}
|
774
|
-
|
775
|
-
lxb_status_t
|
776
|
-
lxb_encoding_encode_utf_16be(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
777
|
-
const lxb_codepoint_t *end)
|
778
|
-
{
|
779
|
-
return lxb_encoding_encode_utf_16(ctx, true, cps, end);
|
780
|
-
}
|
781
|
-
|
782
|
-
lxb_status_t
|
783
|
-
lxb_encoding_encode_utf_16le(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
784
|
-
const lxb_codepoint_t *end)
|
785
|
-
{
|
786
|
-
return lxb_encoding_encode_utf_16(ctx, false, cps, end);
|
787
|
-
}
|
788
|
-
|
789
|
-
lxb_status_t
|
790
|
-
lxb_encoding_encode_utf_8(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
791
|
-
const lxb_codepoint_t *end)
|
792
|
-
{
|
793
|
-
lxb_codepoint_t cp;
|
794
|
-
const lxb_codepoint_t *p = *cps;
|
795
|
-
|
796
|
-
for (; p < end; p++) {
|
797
|
-
cp = *p;
|
798
|
-
|
799
|
-
if (cp < 0x80) {
|
800
|
-
if ((ctx->buffer_used + 1) > ctx->buffer_length) {
|
801
|
-
*cps = p;
|
802
|
-
|
803
|
-
return LXB_STATUS_SMALL_BUFFER;
|
804
|
-
}
|
805
|
-
|
806
|
-
/* 0xxxxxxx */
|
807
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) cp;
|
808
|
-
}
|
809
|
-
else if (cp < 0x800) {
|
810
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
811
|
-
*cps = p;
|
812
|
-
|
813
|
-
return LXB_STATUS_SMALL_BUFFER;
|
814
|
-
}
|
815
|
-
|
816
|
-
/* 110xxxxx 10xxxxxx */
|
817
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0xC0 | (cp >> 6 ));
|
818
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | (cp & 0x3F));
|
819
|
-
}
|
820
|
-
else if (cp < 0x10000) {
|
821
|
-
if ((ctx->buffer_used + 3) > ctx->buffer_length) {
|
822
|
-
*cps = p;
|
823
|
-
|
824
|
-
return LXB_STATUS_SMALL_BUFFER;
|
825
|
-
}
|
826
|
-
|
827
|
-
/* 1110xxxx 10xxxxxx 10xxxxxx */
|
828
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0xE0 | ((cp >> 12)));
|
829
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
|
830
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ( cp & 0x3F));
|
831
|
-
}
|
832
|
-
else if (cp < 0x110000) {
|
833
|
-
if ((ctx->buffer_used + 4) > ctx->buffer_length) {
|
834
|
-
*cps = p;
|
835
|
-
|
836
|
-
return LXB_STATUS_SMALL_BUFFER;
|
837
|
-
}
|
838
|
-
|
839
|
-
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
840
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0xF0 | ( cp >> 18));
|
841
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ((cp >> 12) & 0x3F));
|
842
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
|
843
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ( cp & 0x3F));
|
844
|
-
}
|
845
|
-
else {
|
846
|
-
*cps = p;
|
847
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
848
|
-
}
|
849
|
-
}
|
850
|
-
|
851
|
-
*cps = p;
|
852
|
-
|
853
|
-
return LXB_STATUS_OK;
|
854
|
-
}
|
855
|
-
|
856
|
-
lxb_inline uint32_t
|
857
|
-
lxb_encoding_encode_gb18030_range(lxb_codepoint_t cp)
|
858
|
-
{
|
859
|
-
size_t mid, left, right;
|
860
|
-
const lxb_encoding_range_index_t *range;
|
861
|
-
|
862
|
-
if (cp == 0xE7C7) {
|
863
|
-
return 7457;
|
864
|
-
}
|
865
|
-
|
866
|
-
left = 0;
|
867
|
-
right = LXB_ENCODING_RANGE_INDEX_GB18030_SIZE;
|
868
|
-
range = lxb_encoding_range_index_gb18030;
|
869
|
-
|
870
|
-
/* Some compilers say about uninitialized mid */
|
871
|
-
mid = 0;
|
872
|
-
|
873
|
-
while (left < right) {
|
874
|
-
mid = left + (right - left) / 2;
|
875
|
-
|
876
|
-
if (range[mid].codepoint < cp) {
|
877
|
-
left = mid + 1;
|
878
|
-
|
879
|
-
if (left < right && range[left].codepoint > cp) {
|
880
|
-
break;
|
881
|
-
}
|
882
|
-
}
|
883
|
-
else if (range[mid].codepoint > cp) {
|
884
|
-
right = mid - 1;
|
885
|
-
|
886
|
-
if (right > 0 && range[right].codepoint <= cp) {
|
887
|
-
mid = right;
|
888
|
-
break;
|
889
|
-
}
|
890
|
-
}
|
891
|
-
else {
|
892
|
-
break;
|
893
|
-
}
|
894
|
-
}
|
895
|
-
|
896
|
-
return range[mid].index + cp - range[mid].codepoint;
|
897
|
-
}
|
898
|
-
|
899
|
-
lxb_status_t
|
900
|
-
lxb_encoding_encode_gb18030(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
901
|
-
const lxb_codepoint_t *end)
|
902
|
-
{
|
903
|
-
uint32_t index;
|
904
|
-
lxb_codepoint_t cp;
|
905
|
-
const lexbor_shs_hash_t *hash;
|
906
|
-
|
907
|
-
for (; *cps < end; (*cps)++) {
|
908
|
-
cp = **cps;
|
909
|
-
|
910
|
-
if (cp < 0x80) {
|
911
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp);
|
912
|
-
continue;
|
913
|
-
}
|
914
|
-
|
915
|
-
if (cp == 0xE5E5) {
|
916
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
917
|
-
continue;
|
918
|
-
}
|
919
|
-
|
920
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
|
921
|
-
LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
|
922
|
-
if (hash != NULL) {
|
923
|
-
if ((ctx->buffer_used + 2) > ctx->buffer_length) {
|
924
|
-
return LXB_STATUS_SMALL_BUFFER;
|
925
|
-
}
|
926
|
-
|
927
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
|
928
|
-
|
929
|
-
if (((uint32_t) (uintptr_t) hash->value % 190) < 0x3F) {
|
930
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = ((uint32_t) (uintptr_t) hash->value % 190) + 0x40;
|
931
|
-
}
|
932
|
-
else {
|
933
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = ((uint32_t) (uintptr_t) hash->value % 190) + 0x41;
|
934
|
-
}
|
935
|
-
|
936
|
-
continue;
|
937
|
-
}
|
938
|
-
|
939
|
-
if ((ctx->buffer_used + 4) > ctx->buffer_length) {
|
940
|
-
return LXB_STATUS_SMALL_BUFFER;
|
941
|
-
}
|
942
|
-
|
943
|
-
index = lxb_encoding_encode_gb18030_range(cp);
|
944
|
-
|
945
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (index / (10 * 126 * 10)) + 0x81;
|
946
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = ((index % (10 * 126 * 10)) / (10 * 126)) + 0x30;
|
947
|
-
|
948
|
-
index = (index % (10 * 126 * 10)) % (10 * 126);
|
949
|
-
|
950
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (index / 10) + 0x81;
|
951
|
-
ctx->buffer_out[ ctx->buffer_used++ ] = (index % 10) + 0x30;
|
952
|
-
}
|
953
|
-
|
954
|
-
return LXB_STATUS_OK;
|
955
|
-
}
|
956
|
-
|
957
|
-
lxb_status_t
|
958
|
-
lxb_encoding_encode_macintosh(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
959
|
-
const lxb_codepoint_t *end)
|
960
|
-
{
|
961
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_macintosh,
|
962
|
-
LXB_ENCODING_SINGLE_HASH_MACINTOSH_SIZE);
|
963
|
-
}
|
964
|
-
|
965
|
-
lxb_status_t
|
966
|
-
lxb_encoding_encode_replacement(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
967
|
-
const lxb_codepoint_t *end)
|
968
|
-
{
|
969
|
-
*cps = end;
|
970
|
-
return LXB_STATUS_ERROR;
|
971
|
-
}
|
972
|
-
|
973
|
-
lxb_status_t
|
974
|
-
lxb_encoding_encode_windows_1250(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
975
|
-
const lxb_codepoint_t *end)
|
976
|
-
{
|
977
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1250,
|
978
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1250_SIZE);
|
979
|
-
}
|
980
|
-
|
981
|
-
lxb_status_t
|
982
|
-
lxb_encoding_encode_windows_1251(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
983
|
-
const lxb_codepoint_t *end)
|
984
|
-
{
|
985
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1251,
|
986
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1251_SIZE);
|
987
|
-
}
|
988
|
-
|
989
|
-
lxb_status_t
|
990
|
-
lxb_encoding_encode_windows_1252(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
991
|
-
const lxb_codepoint_t *end)
|
992
|
-
{
|
993
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1252,
|
994
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1252_SIZE);
|
995
|
-
}
|
996
|
-
|
997
|
-
lxb_status_t
|
998
|
-
lxb_encoding_encode_windows_1253(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
999
|
-
const lxb_codepoint_t *end)
|
1000
|
-
{
|
1001
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1253,
|
1002
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1253_SIZE);
|
1003
|
-
}
|
1004
|
-
|
1005
|
-
lxb_status_t
|
1006
|
-
lxb_encoding_encode_windows_1254(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
1007
|
-
const lxb_codepoint_t *end)
|
1008
|
-
{
|
1009
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1254,
|
1010
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1254_SIZE);
|
1011
|
-
}
|
1012
|
-
|
1013
|
-
lxb_status_t
|
1014
|
-
lxb_encoding_encode_windows_1255(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
1015
|
-
const lxb_codepoint_t *end)
|
1016
|
-
{
|
1017
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1255,
|
1018
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1255_SIZE);
|
1019
|
-
}
|
1020
|
-
|
1021
|
-
lxb_status_t
|
1022
|
-
lxb_encoding_encode_windows_1256(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
1023
|
-
const lxb_codepoint_t *end)
|
1024
|
-
{
|
1025
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1256,
|
1026
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1256_SIZE);
|
1027
|
-
}
|
1028
|
-
|
1029
|
-
lxb_status_t
|
1030
|
-
lxb_encoding_encode_windows_1257(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
1031
|
-
const lxb_codepoint_t *end)
|
1032
|
-
{
|
1033
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1257,
|
1034
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1257_SIZE);
|
1035
|
-
}
|
1036
|
-
|
1037
|
-
lxb_status_t
|
1038
|
-
lxb_encoding_encode_windows_1258(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
1039
|
-
const lxb_codepoint_t *end)
|
1040
|
-
{
|
1041
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1258,
|
1042
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1258_SIZE);
|
1043
|
-
}
|
1044
|
-
|
1045
|
-
lxb_status_t
|
1046
|
-
lxb_encoding_encode_windows_874(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
1047
|
-
const lxb_codepoint_t *end)
|
1048
|
-
{
|
1049
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_874,
|
1050
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_874_SIZE);
|
1051
|
-
}
|
1052
|
-
|
1053
|
-
lxb_status_t
|
1054
|
-
lxb_encoding_encode_x_mac_cyrillic(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
1055
|
-
const lxb_codepoint_t *end)
|
1056
|
-
{
|
1057
|
-
LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_x_mac_cyrillic,
|
1058
|
-
LXB_ENCODING_SINGLE_HASH_X_MAC_CYRILLIC_SIZE);
|
1059
|
-
}
|
1060
|
-
|
1061
|
-
lxb_status_t
|
1062
|
-
lxb_encoding_encode_x_user_defined(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
|
1063
|
-
const lxb_codepoint_t *end)
|
1064
|
-
{
|
1065
|
-
lxb_codepoint_t cp;
|
1066
|
-
|
1067
|
-
for (; *cps < end; (*cps)++) {
|
1068
|
-
cp = **cps;
|
1069
|
-
|
1070
|
-
if (cp < 0x80) {
|
1071
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, cp);
|
1072
|
-
}
|
1073
|
-
else if (cp >= 0xF780 && cp <= 0xF7FF) {
|
1074
|
-
LXB_ENCODING_ENCODE_APPEND(ctx, (cp - 0xF780 + 0x80));
|
1075
|
-
}
|
1076
|
-
else {
|
1077
|
-
LXB_ENCODING_ENCODE_ERROR(ctx);
|
1078
|
-
}
|
1079
|
-
}
|
1080
|
-
|
1081
|
-
return LXB_STATUS_OK;
|
1082
|
-
}
|
1083
|
-
|
1084
|
-
/*
|
1085
|
-
* Single
|
1086
|
-
*/
|
1087
|
-
int8_t
|
1088
|
-
lxb_encoding_encode_default_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1089
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1090
|
-
{
|
1091
|
-
return lxb_encoding_encode_utf_8_single(ctx, data, end, cp);
|
1092
|
-
}
|
1093
|
-
|
1094
|
-
int8_t
|
1095
|
-
lxb_encoding_encode_auto_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1096
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1097
|
-
{
|
1098
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1099
|
-
}
|
1100
|
-
|
1101
|
-
int8_t
|
1102
|
-
lxb_encoding_encode_undefined_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1103
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1104
|
-
{
|
1105
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1106
|
-
}
|
1107
|
-
|
1108
|
-
int8_t
|
1109
|
-
lxb_encoding_encode_big5_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1110
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1111
|
-
{
|
1112
|
-
const lexbor_shs_hash_t *hash;
|
1113
|
-
|
1114
|
-
if (cp < 0x80) {
|
1115
|
-
*(*data)++ = (lxb_char_t) cp;
|
1116
|
-
|
1117
|
-
return 1;
|
1118
|
-
}
|
1119
|
-
|
1120
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_big5,
|
1121
|
-
LXB_ENCODING_MULTI_HASH_BIG5_SIZE, cp);
|
1122
|
-
if (hash == NULL) {
|
1123
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1124
|
-
}
|
1125
|
-
|
1126
|
-
if ((*data + 2) > end) {
|
1127
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1128
|
-
}
|
1129
|
-
|
1130
|
-
*(*data)++ = ((uint32_t) (uintptr_t) hash->value) / 157 + 0x81;
|
1131
|
-
|
1132
|
-
if ((((uint32_t) (uintptr_t) hash->value) % 157) < 0x3F) {
|
1133
|
-
*(*data)++ = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x40;
|
1134
|
-
}
|
1135
|
-
else {
|
1136
|
-
*(*data)++ = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x62;
|
1137
|
-
}
|
1138
|
-
|
1139
|
-
return 2;
|
1140
|
-
}
|
1141
|
-
|
1142
|
-
int8_t
|
1143
|
-
lxb_encoding_encode_euc_jp_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1144
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1145
|
-
{
|
1146
|
-
const lexbor_shs_hash_t *hash;
|
1147
|
-
|
1148
|
-
if (cp < 0x80) {
|
1149
|
-
*(*data)++ = (lxb_char_t) cp;
|
1150
|
-
|
1151
|
-
return 1;
|
1152
|
-
}
|
1153
|
-
|
1154
|
-
if (cp == 0x00A5) {
|
1155
|
-
*(*data)++ = 0x5C;
|
1156
|
-
|
1157
|
-
return 1;
|
1158
|
-
}
|
1159
|
-
|
1160
|
-
if (cp == 0x203E) {
|
1161
|
-
*(*data)++ = 0x7E;
|
1162
|
-
|
1163
|
-
return 1;
|
1164
|
-
}
|
1165
|
-
|
1166
|
-
if ((*data + 2) > end) {
|
1167
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1168
|
-
}
|
1169
|
-
|
1170
|
-
if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
|
1171
|
-
*(*data)++ = 0x8E;
|
1172
|
-
*(*data)++ = cp - 0xFF61 + 0xA1;
|
1173
|
-
|
1174
|
-
return 2;
|
1175
|
-
}
|
1176
|
-
|
1177
|
-
if (cp == 0x2212) {
|
1178
|
-
cp = 0xFF0D;
|
1179
|
-
}
|
1180
|
-
|
1181
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
|
1182
|
-
LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
|
1183
|
-
if (hash == NULL) {
|
1184
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1185
|
-
}
|
1186
|
-
|
1187
|
-
*(*data)++ = (uint32_t) (uintptr_t) hash->value / 94 + 0xA1;
|
1188
|
-
*(*data)++ = (uint32_t) (uintptr_t) hash->value % 94 + 0xA1;
|
1189
|
-
|
1190
|
-
return 2;
|
1191
|
-
}
|
1192
|
-
|
1193
|
-
int8_t
|
1194
|
-
lxb_encoding_encode_euc_kr_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1195
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1196
|
-
{
|
1197
|
-
const lexbor_shs_hash_t *hash;
|
1198
|
-
|
1199
|
-
if (cp < 0x80) {
|
1200
|
-
*(*data)++ = (lxb_char_t) cp;
|
1201
|
-
|
1202
|
-
return 1;
|
1203
|
-
}
|
1204
|
-
|
1205
|
-
if ((*data + 2) > end) {
|
1206
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1207
|
-
}
|
1208
|
-
|
1209
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_euc_kr,
|
1210
|
-
LXB_ENCODING_MULTI_HASH_EUC_KR_SIZE, cp);
|
1211
|
-
if (hash == NULL) {
|
1212
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1213
|
-
}
|
1214
|
-
|
1215
|
-
*(*data)++ = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
|
1216
|
-
*(*data)++ = (uint32_t) (uintptr_t) hash->value % 190 + 0x41;
|
1217
|
-
|
1218
|
-
return 2;
|
1219
|
-
}
|
1220
|
-
|
1221
|
-
int8_t
|
1222
|
-
lxb_encoding_encode_gbk_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1223
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1224
|
-
{
|
1225
|
-
const lexbor_shs_hash_t *hash;
|
1226
|
-
|
1227
|
-
if (cp < 0x80) {
|
1228
|
-
*(*data)++ = (lxb_char_t) cp;
|
1229
|
-
|
1230
|
-
return 1;
|
1231
|
-
}
|
1232
|
-
|
1233
|
-
if (cp == 0xE5E5) {
|
1234
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1235
|
-
}
|
1236
|
-
|
1237
|
-
if (cp == 0x20AC) {
|
1238
|
-
*(*data)++ = 0x80;
|
1239
|
-
|
1240
|
-
return 1;
|
1241
|
-
}
|
1242
|
-
|
1243
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
|
1244
|
-
LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
|
1245
|
-
if (hash != NULL) {
|
1246
|
-
if ((*data + 2) > end) {
|
1247
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1248
|
-
}
|
1249
|
-
|
1250
|
-
*(*data)++ = (lxb_char_t) (uintptr_t) hash->value / 190 + 0x81;
|
1251
|
-
|
1252
|
-
if (((lxb_char_t) (uintptr_t) hash->value % 190) < 0x3F) {
|
1253
|
-
*(*data)++ = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x40;
|
1254
|
-
}
|
1255
|
-
else {
|
1256
|
-
*(*data)++ = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x41;
|
1257
|
-
}
|
1258
|
-
|
1259
|
-
return 2;
|
1260
|
-
}
|
1261
|
-
|
1262
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1263
|
-
}
|
1264
|
-
|
1265
|
-
int8_t
|
1266
|
-
lxb_encoding_encode_ibm866_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1267
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1268
|
-
{
|
1269
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_ibm866,
|
1270
|
-
LXB_ENCODING_SINGLE_HASH_IBM866_SIZE);
|
1271
|
-
}
|
1272
|
-
|
1273
|
-
int8_t
|
1274
|
-
lxb_encoding_encode_iso_2022_jp_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1275
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1276
|
-
{
|
1277
|
-
int8_t size;
|
1278
|
-
unsigned state;
|
1279
|
-
const lexbor_shs_hash_t *hash;
|
1280
|
-
|
1281
|
-
size = 0;
|
1282
|
-
state = ctx->state;
|
1283
|
-
|
1284
|
-
begin:
|
1285
|
-
|
1286
|
-
switch (ctx->state) {
|
1287
|
-
case LXB_ENCODING_ENCODE_2022_JP_ASCII:
|
1288
|
-
if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
|
1289
|
-
goto failed;
|
1290
|
-
}
|
1291
|
-
|
1292
|
-
if (cp < 0x80) {
|
1293
|
-
*(*data)++ = (lxb_char_t) cp;
|
1294
|
-
|
1295
|
-
return size + 1;
|
1296
|
-
}
|
1297
|
-
|
1298
|
-
if (cp == 0x00A5 || cp == 0x203E) {
|
1299
|
-
/*
|
1300
|
-
* Do not switch to the ROMAN stage with prepend code point
|
1301
|
-
* to stream, add it immediately.
|
1302
|
-
*/
|
1303
|
-
if ((*data + 4) > end) {
|
1304
|
-
goto small_buffer;
|
1305
|
-
}
|
1306
|
-
|
1307
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
|
1308
|
-
|
1309
|
-
if (cp == 0x00A5) {
|
1310
|
-
memcpy(*data, "\x1B\x28\x4A\x5C", 4);
|
1311
|
-
*data = *data + 4;
|
1312
|
-
|
1313
|
-
return size + 4;
|
1314
|
-
}
|
1315
|
-
|
1316
|
-
memcpy(*data, "\x1B\x28\x4A\x7E", 4);
|
1317
|
-
*data = *data + 4;
|
1318
|
-
|
1319
|
-
return size + 4;
|
1320
|
-
}
|
1321
|
-
|
1322
|
-
break;
|
1323
|
-
|
1324
|
-
case LXB_ENCODING_ENCODE_2022_JP_ROMAN:
|
1325
|
-
if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
|
1326
|
-
goto failed;
|
1327
|
-
}
|
1328
|
-
|
1329
|
-
if (cp < 0x80) {
|
1330
|
-
switch (cp) {
|
1331
|
-
case 0x005C:
|
1332
|
-
case 0x007E:
|
1333
|
-
break;
|
1334
|
-
|
1335
|
-
case 0x00A5:
|
1336
|
-
*(*data)++ = 0x5C;
|
1337
|
-
return size + 1;
|
1338
|
-
|
1339
|
-
case 0x203E:
|
1340
|
-
*(*data)++ = 0x7E;
|
1341
|
-
return size + 1;
|
1342
|
-
|
1343
|
-
default:
|
1344
|
-
*(*data)++ = (lxb_char_t) cp;
|
1345
|
-
return size + 1;
|
1346
|
-
}
|
1347
|
-
|
1348
|
-
/*
|
1349
|
-
* Do not switch to the ANSI stage with prepend code point
|
1350
|
-
* to stream, add it immediately.
|
1351
|
-
*/
|
1352
|
-
if ((*data + 4) > end) {
|
1353
|
-
goto small_buffer;
|
1354
|
-
}
|
1355
|
-
|
1356
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
|
1357
|
-
|
1358
|
-
memcpy(*data, "\x1B\x28\x42", 3);
|
1359
|
-
*data = *data + 3;
|
1360
|
-
|
1361
|
-
*(*data)++ = (lxb_char_t) cp;
|
1362
|
-
|
1363
|
-
return size + 4;
|
1364
|
-
}
|
1365
|
-
|
1366
|
-
break;
|
1367
|
-
|
1368
|
-
case LXB_ENCODING_ENCODE_2022_JP_JIS0208:
|
1369
|
-
if (cp < 0x80) {
|
1370
|
-
if ((*data + 4) > end) {
|
1371
|
-
goto small_buffer;
|
1372
|
-
}
|
1373
|
-
|
1374
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
|
1375
|
-
|
1376
|
-
memcpy(*data, "\x1B\x28\x42", 3);
|
1377
|
-
*data = *data + 3;
|
1378
|
-
|
1379
|
-
*(*data)++ = (lxb_char_t) cp;
|
1380
|
-
|
1381
|
-
return size + 4;
|
1382
|
-
}
|
1383
|
-
|
1384
|
-
if (cp == 0x00A5 || cp == 0x203E) {
|
1385
|
-
if ((*data + 4) > end) {
|
1386
|
-
goto small_buffer;
|
1387
|
-
}
|
1388
|
-
|
1389
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
|
1390
|
-
|
1391
|
-
if (cp == 0x00A5) {
|
1392
|
-
memcpy(*data, "\x1B\x28\x4A\x5C", 4);
|
1393
|
-
*data = *data + 4;
|
1394
|
-
|
1395
|
-
return size + 4;
|
1396
|
-
}
|
1397
|
-
|
1398
|
-
memcpy(*data, "\x1B\x28\x4A\x7E", 4);
|
1399
|
-
*data = *data + 4;
|
1400
|
-
|
1401
|
-
return size + 4;
|
1402
|
-
}
|
1403
|
-
|
1404
|
-
break;
|
1405
|
-
}
|
1406
|
-
|
1407
|
-
if ((*data + 2) > end) {
|
1408
|
-
goto small_buffer;
|
1409
|
-
}
|
1410
|
-
|
1411
|
-
if (cp == 0x2212) {
|
1412
|
-
cp = 0xFF0D;
|
1413
|
-
}
|
1414
|
-
|
1415
|
-
if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
|
1416
|
-
cp = lxb_encoding_multi_index_iso_2022_jp_katakana[cp - 0xFF61].codepoint;
|
1417
|
-
}
|
1418
|
-
|
1419
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
|
1420
|
-
LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
|
1421
|
-
if (hash == NULL) {
|
1422
|
-
goto failed;
|
1423
|
-
}
|
1424
|
-
|
1425
|
-
if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_JIS0208) {
|
1426
|
-
if ((*data + 3) > end) {
|
1427
|
-
goto small_buffer;
|
1428
|
-
}
|
1429
|
-
|
1430
|
-
memcpy(*data, "\x1B\x24\x42", 3);
|
1431
|
-
*data = *data + 3;
|
1432
|
-
|
1433
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_JIS0208;
|
1434
|
-
size += 3;
|
1435
|
-
|
1436
|
-
goto begin;
|
1437
|
-
}
|
1438
|
-
|
1439
|
-
*(*data)++ = (uint32_t) (uintptr_t) hash->value / 94 + 0x21;
|
1440
|
-
*(*data)++ = (uint32_t) (uintptr_t) hash->value % 94 + 0x21;
|
1441
|
-
|
1442
|
-
return size + 2;
|
1443
|
-
|
1444
|
-
small_buffer:
|
1445
|
-
|
1446
|
-
ctx->state = state;
|
1447
|
-
*data = *data - size;
|
1448
|
-
|
1449
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1450
|
-
|
1451
|
-
failed:
|
1452
|
-
|
1453
|
-
*data = *data - size;
|
1454
|
-
|
1455
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1456
|
-
}
|
1457
|
-
|
1458
|
-
int8_t
|
1459
|
-
lxb_encoding_encode_iso_2022_jp_eof_single(lxb_encoding_encode_t *ctx,
|
1460
|
-
lxb_char_t **data, const lxb_char_t *end)
|
1461
|
-
{
|
1462
|
-
if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_ASCII) {
|
1463
|
-
if ((*data + 3) > end) {
|
1464
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1465
|
-
}
|
1466
|
-
|
1467
|
-
memcpy(*data, "\x1B\x28\x42", 3);
|
1468
|
-
*data = *data + 3;
|
1469
|
-
|
1470
|
-
ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
|
1471
|
-
|
1472
|
-
return 3;
|
1473
|
-
}
|
1474
|
-
|
1475
|
-
return 0;
|
1476
|
-
}
|
1477
|
-
|
1478
|
-
int8_t
|
1479
|
-
lxb_encoding_encode_iso_8859_10_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1480
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1481
|
-
{
|
1482
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_10,
|
1483
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_10_SIZE);
|
1484
|
-
}
|
1485
|
-
|
1486
|
-
int8_t
|
1487
|
-
lxb_encoding_encode_iso_8859_13_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1488
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1489
|
-
{
|
1490
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_13,
|
1491
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_13_SIZE);
|
1492
|
-
}
|
1493
|
-
|
1494
|
-
int8_t
|
1495
|
-
lxb_encoding_encode_iso_8859_14_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1496
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1497
|
-
{
|
1498
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_14,
|
1499
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_14_SIZE);
|
1500
|
-
}
|
1501
|
-
|
1502
|
-
int8_t
|
1503
|
-
lxb_encoding_encode_iso_8859_15_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1504
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1505
|
-
{
|
1506
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_15,
|
1507
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_15_SIZE);
|
1508
|
-
}
|
1509
|
-
|
1510
|
-
int8_t
|
1511
|
-
lxb_encoding_encode_iso_8859_16_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1512
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1513
|
-
{
|
1514
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_16,
|
1515
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_16_SIZE);
|
1516
|
-
}
|
1517
|
-
|
1518
|
-
int8_t
|
1519
|
-
lxb_encoding_encode_iso_8859_2_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1520
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1521
|
-
{
|
1522
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_2,
|
1523
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_2_SIZE);
|
1524
|
-
}
|
1525
|
-
|
1526
|
-
int8_t
|
1527
|
-
lxb_encoding_encode_iso_8859_3_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1528
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1529
|
-
{
|
1530
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_3,
|
1531
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_3_SIZE);
|
1532
|
-
}
|
1533
|
-
|
1534
|
-
int8_t
|
1535
|
-
lxb_encoding_encode_iso_8859_4_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1536
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1537
|
-
{
|
1538
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_4,
|
1539
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_4_SIZE);
|
1540
|
-
}
|
1541
|
-
|
1542
|
-
int8_t
|
1543
|
-
lxb_encoding_encode_iso_8859_5_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1544
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1545
|
-
{
|
1546
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_5,
|
1547
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_5_SIZE);
|
1548
|
-
}
|
1549
|
-
|
1550
|
-
int8_t
|
1551
|
-
lxb_encoding_encode_iso_8859_6_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1552
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1553
|
-
{
|
1554
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_6,
|
1555
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_6_SIZE);
|
1556
|
-
}
|
1557
|
-
|
1558
|
-
int8_t
|
1559
|
-
lxb_encoding_encode_iso_8859_7_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1560
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1561
|
-
{
|
1562
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_7,
|
1563
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_7_SIZE);
|
1564
|
-
}
|
1565
|
-
|
1566
|
-
int8_t
|
1567
|
-
lxb_encoding_encode_iso_8859_8_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1568
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1569
|
-
{
|
1570
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_8,
|
1571
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
|
1572
|
-
}
|
1573
|
-
|
1574
|
-
int8_t
|
1575
|
-
lxb_encoding_encode_iso_8859_8_i_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1576
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1577
|
-
{
|
1578
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_8,
|
1579
|
-
LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
|
1580
|
-
}
|
1581
|
-
|
1582
|
-
int8_t
|
1583
|
-
lxb_encoding_encode_koi8_r_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1584
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1585
|
-
{
|
1586
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_koi8_r,
|
1587
|
-
LXB_ENCODING_SINGLE_HASH_KOI8_R_SIZE);
|
1588
|
-
}
|
1589
|
-
|
1590
|
-
int8_t
|
1591
|
-
lxb_encoding_encode_koi8_u_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1592
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1593
|
-
{
|
1594
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_koi8_u,
|
1595
|
-
LXB_ENCODING_SINGLE_HASH_KOI8_U_SIZE);
|
1596
|
-
}
|
1597
|
-
|
1598
|
-
int8_t
|
1599
|
-
lxb_encoding_encode_shift_jis_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1600
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1601
|
-
{
|
1602
|
-
uint32_t lead, trail;
|
1603
|
-
const lexbor_shs_hash_t *hash;
|
1604
|
-
|
1605
|
-
if (cp <= 0x80) {
|
1606
|
-
*(*data)++ = (lxb_char_t) cp;
|
1607
|
-
|
1608
|
-
return 1;
|
1609
|
-
}
|
1610
|
-
|
1611
|
-
if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
|
1612
|
-
*(*data)++ = cp - 0xFF61 + 0xA1;
|
1613
|
-
|
1614
|
-
return 1;
|
1615
|
-
}
|
1616
|
-
|
1617
|
-
switch (cp) {
|
1618
|
-
case 0x00A5:
|
1619
|
-
*(*data)++ = 0x5C;
|
1620
|
-
return 1;
|
1621
|
-
|
1622
|
-
case 0x203E:
|
1623
|
-
*(*data)++ = 0x7E;
|
1624
|
-
return 1;
|
1625
|
-
|
1626
|
-
case 0x2212:
|
1627
|
-
cp = 0xFF0D;
|
1628
|
-
break;
|
1629
|
-
}
|
1630
|
-
|
1631
|
-
hash = lxb_encoding_encode_shift_jis_index(cp);
|
1632
|
-
if (hash == NULL) {
|
1633
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1634
|
-
}
|
1635
|
-
|
1636
|
-
if ((*data + 2) > end) {
|
1637
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1638
|
-
}
|
1639
|
-
|
1640
|
-
lead = (uint32_t) (uintptr_t) hash->value / 188;
|
1641
|
-
trail = (uint32_t) (uintptr_t) hash->value % 188;
|
1642
|
-
|
1643
|
-
*(*data)++ = lead + ((lead < 0x1F) ? 0x81 : 0xC1);
|
1644
|
-
*(*data)++ = trail + ((trail < 0x3F) ? 0x40 : 0x41);
|
1645
|
-
|
1646
|
-
return 2;
|
1647
|
-
}
|
1648
|
-
|
1649
|
-
lxb_inline void
|
1650
|
-
lxb_encoding_encode_utf_16_write_single(bool is_be, lxb_char_t **data,
|
1651
|
-
lxb_codepoint_t cp)
|
1652
|
-
{
|
1653
|
-
if (is_be) {
|
1654
|
-
*(*data)++ = cp >> 8;
|
1655
|
-
*(*data)++ = cp & 0x00FF;
|
1656
|
-
|
1657
|
-
return;
|
1658
|
-
}
|
1659
|
-
|
1660
|
-
*(*data)++ = cp & 0x00FF;
|
1661
|
-
*(*data)++ = cp >> 8;
|
1662
|
-
}
|
1663
|
-
|
1664
|
-
lxb_inline int8_t
|
1665
|
-
lxb_encoding_encode_utf_16_single(lxb_encoding_encode_t *ctx, bool is_be,
|
1666
|
-
lxb_char_t **data, const lxb_char_t *end, lxb_codepoint_t cp)
|
1667
|
-
{
|
1668
|
-
if ((*data + 2) > end) {
|
1669
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1670
|
-
}
|
1671
|
-
|
1672
|
-
if (cp < 0x10000) {
|
1673
|
-
lxb_encoding_encode_utf_16_write_single(is_be, data, cp);
|
1674
|
-
|
1675
|
-
return 2;
|
1676
|
-
}
|
1677
|
-
|
1678
|
-
if ((*data + 4) > end) {
|
1679
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1680
|
-
}
|
1681
|
-
|
1682
|
-
cp -= 0x10000;
|
1683
|
-
|
1684
|
-
lxb_encoding_encode_utf_16_write_single(is_be, data, (0xD800 | (cp >> 0x0A)));
|
1685
|
-
lxb_encoding_encode_utf_16_write_single(is_be, data, (0xDC00 | (cp & 0x03FF)));
|
1686
|
-
|
1687
|
-
return 4;
|
1688
|
-
}
|
1689
|
-
|
1690
|
-
int8_t
|
1691
|
-
lxb_encoding_encode_utf_16be_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1692
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1693
|
-
{
|
1694
|
-
return lxb_encoding_encode_utf_16_single(ctx, true, data, end, cp);
|
1695
|
-
}
|
1696
|
-
|
1697
|
-
int8_t
|
1698
|
-
lxb_encoding_encode_utf_16le_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1699
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1700
|
-
{
|
1701
|
-
return lxb_encoding_encode_utf_16_single(ctx, false, data, end, cp);
|
1702
|
-
}
|
1703
|
-
|
1704
|
-
int8_t
|
1705
|
-
lxb_encoding_encode_utf_8_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1706
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1707
|
-
{
|
1708
|
-
if (cp < 0x80) {
|
1709
|
-
/* 0xxxxxxx */
|
1710
|
-
*(*data)++ = (lxb_char_t) cp;
|
1711
|
-
|
1712
|
-
return 1;
|
1713
|
-
}
|
1714
|
-
|
1715
|
-
if (cp < 0x800) {
|
1716
|
-
if ((*data + 2) > end) {
|
1717
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1718
|
-
}
|
1719
|
-
|
1720
|
-
/* 110xxxxx 10xxxxxx */
|
1721
|
-
*(*data)++ = (lxb_char_t) (0xC0 | (cp >> 6 ));
|
1722
|
-
*(*data)++ = (lxb_char_t) (0x80 | (cp & 0x3F));
|
1723
|
-
|
1724
|
-
return 2;
|
1725
|
-
}
|
1726
|
-
|
1727
|
-
if (cp < 0x10000) {
|
1728
|
-
if ((*data + 3) > end) {
|
1729
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1730
|
-
}
|
1731
|
-
|
1732
|
-
/* 1110xxxx 10xxxxxx 10xxxxxx */
|
1733
|
-
*(*data)++ = (lxb_char_t) (0xE0 | ((cp >> 12)));
|
1734
|
-
*(*data)++ = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
|
1735
|
-
*(*data)++ = (lxb_char_t) (0x80 | ( cp & 0x3F));
|
1736
|
-
|
1737
|
-
return 3;
|
1738
|
-
}
|
1739
|
-
|
1740
|
-
if (cp < 0x110000) {
|
1741
|
-
if ((*data + 4) > end) {
|
1742
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1743
|
-
}
|
1744
|
-
|
1745
|
-
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
1746
|
-
*(*data)++ = (lxb_char_t) (0xF0 | ( cp >> 18));
|
1747
|
-
*(*data)++ = (lxb_char_t) (0x80 | ((cp >> 12) & 0x3F));
|
1748
|
-
*(*data)++ = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
|
1749
|
-
*(*data)++ = (lxb_char_t) (0x80 | ( cp & 0x3F));
|
1750
|
-
|
1751
|
-
return 4;
|
1752
|
-
}
|
1753
|
-
|
1754
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1755
|
-
}
|
1756
|
-
|
1757
|
-
int8_t
|
1758
|
-
lxb_encoding_encode_gb18030_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1759
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1760
|
-
{
|
1761
|
-
uint32_t index;
|
1762
|
-
const lexbor_shs_hash_t *hash;
|
1763
|
-
|
1764
|
-
if (cp < 0x80) {
|
1765
|
-
*(*data)++ = (lxb_char_t) cp;
|
1766
|
-
|
1767
|
-
return 1;
|
1768
|
-
}
|
1769
|
-
|
1770
|
-
if (cp == 0xE5E5) {
|
1771
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1772
|
-
}
|
1773
|
-
|
1774
|
-
hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
|
1775
|
-
LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
|
1776
|
-
if (hash != NULL) {
|
1777
|
-
if ((*data + 2) > end) {
|
1778
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1779
|
-
}
|
1780
|
-
|
1781
|
-
*(*data)++ = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
|
1782
|
-
|
1783
|
-
if (((uint32_t) (uintptr_t) hash->value % 190) < 0x3F) {
|
1784
|
-
*(*data)++ = ((uint32_t) (uintptr_t) hash->value % 190) + 0x40;
|
1785
|
-
}
|
1786
|
-
else {
|
1787
|
-
*(*data)++ = ((uint32_t) (uintptr_t) hash->value % 190) + 0x41;
|
1788
|
-
}
|
1789
|
-
|
1790
|
-
return 2;
|
1791
|
-
}
|
1792
|
-
|
1793
|
-
if ((*data + 4) > end) {
|
1794
|
-
return LXB_ENCODING_ENCODE_SMALL_BUFFER;
|
1795
|
-
}
|
1796
|
-
|
1797
|
-
index = lxb_encoding_encode_gb18030_range(cp);
|
1798
|
-
|
1799
|
-
*(*data)++ = (index / (10 * 126 * 10)) + 0x81;
|
1800
|
-
*(*data)++ = ((index % (10 * 126 * 10)) / (10 * 126)) + 0x30;
|
1801
|
-
|
1802
|
-
index = (index % (10 * 126 * 10)) % (10 * 126);
|
1803
|
-
|
1804
|
-
*(*data)++ = (index / 10) + 0x81;
|
1805
|
-
*(*data)++ = (index % 10) + 0x30;
|
1806
|
-
|
1807
|
-
return 4;
|
1808
|
-
}
|
1809
|
-
|
1810
|
-
int8_t
|
1811
|
-
lxb_encoding_encode_macintosh_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1812
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1813
|
-
{
|
1814
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_macintosh,
|
1815
|
-
LXB_ENCODING_SINGLE_HASH_MACINTOSH_SIZE);
|
1816
|
-
}
|
1817
|
-
|
1818
|
-
int8_t
|
1819
|
-
lxb_encoding_encode_replacement_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1820
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1821
|
-
{
|
1822
|
-
(*data)++;
|
1823
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1824
|
-
}
|
1825
|
-
|
1826
|
-
int8_t
|
1827
|
-
lxb_encoding_encode_windows_1250_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1828
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1829
|
-
{
|
1830
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1250,
|
1831
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1250_SIZE);
|
1832
|
-
}
|
1833
|
-
|
1834
|
-
int8_t
|
1835
|
-
lxb_encoding_encode_windows_1251_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1836
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1837
|
-
{
|
1838
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1251,
|
1839
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1251_SIZE);
|
1840
|
-
}
|
1841
|
-
|
1842
|
-
int8_t
|
1843
|
-
lxb_encoding_encode_windows_1252_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1844
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1845
|
-
{
|
1846
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1252,
|
1847
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1252_SIZE);
|
1848
|
-
}
|
1849
|
-
|
1850
|
-
int8_t
|
1851
|
-
lxb_encoding_encode_windows_1253_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1852
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1853
|
-
{
|
1854
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1253,
|
1855
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1253_SIZE);
|
1856
|
-
}
|
1857
|
-
|
1858
|
-
int8_t
|
1859
|
-
lxb_encoding_encode_windows_1254_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1860
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1861
|
-
{
|
1862
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1254,
|
1863
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1254_SIZE);
|
1864
|
-
}
|
1865
|
-
|
1866
|
-
int8_t
|
1867
|
-
lxb_encoding_encode_windows_1255_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1868
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1869
|
-
{
|
1870
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1255,
|
1871
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1255_SIZE);
|
1872
|
-
}
|
1873
|
-
|
1874
|
-
int8_t
|
1875
|
-
lxb_encoding_encode_windows_1256_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1876
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1877
|
-
{
|
1878
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1256,
|
1879
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1256_SIZE);
|
1880
|
-
}
|
1881
|
-
|
1882
|
-
int8_t
|
1883
|
-
lxb_encoding_encode_windows_1257_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1884
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1885
|
-
{
|
1886
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1257,
|
1887
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1257_SIZE);
|
1888
|
-
}
|
1889
|
-
|
1890
|
-
int8_t
|
1891
|
-
lxb_encoding_encode_windows_1258_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1892
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1893
|
-
{
|
1894
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1258,
|
1895
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_1258_SIZE);
|
1896
|
-
}
|
1897
|
-
|
1898
|
-
int8_t
|
1899
|
-
lxb_encoding_encode_windows_874_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1900
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1901
|
-
{
|
1902
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_874,
|
1903
|
-
LXB_ENCODING_SINGLE_HASH_WINDOWS_874_SIZE);
|
1904
|
-
}
|
1905
|
-
|
1906
|
-
int8_t
|
1907
|
-
lxb_encoding_encode_x_mac_cyrillic_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1908
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1909
|
-
{
|
1910
|
-
LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_x_mac_cyrillic,
|
1911
|
-
LXB_ENCODING_SINGLE_HASH_X_MAC_CYRILLIC_SIZE);
|
1912
|
-
}
|
1913
|
-
|
1914
|
-
int8_t
|
1915
|
-
lxb_encoding_encode_x_user_defined_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
1916
|
-
const lxb_char_t *end, lxb_codepoint_t cp)
|
1917
|
-
{
|
1918
|
-
if (cp < 0x80) {
|
1919
|
-
*(*data)++ = (lxb_char_t) cp;
|
1920
|
-
|
1921
|
-
return 1;
|
1922
|
-
}
|
1923
|
-
|
1924
|
-
if (cp >= 0xF780 && cp <= 0xF7FF) {
|
1925
|
-
*(*data)++ = (lxb_char_t) (cp - 0xF780 + 0x80);
|
1926
|
-
|
1927
|
-
return 1;
|
1928
|
-
}
|
1929
|
-
|
1930
|
-
return LXB_ENCODING_ENCODE_ERROR;
|
1931
|
-
}
|