nokolexbor 0.3.4 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nokolexbor/nl_attribute.c +46 -0
  3. data/ext/nokolexbor/nl_cdata.c +8 -0
  4. data/ext/nokolexbor/nl_comment.c +6 -0
  5. data/ext/nokolexbor/nl_document.c +53 -7
  6. data/ext/nokolexbor/nl_document_fragment.c +9 -0
  7. data/ext/nokolexbor/nl_error.c +21 -19
  8. data/ext/nokolexbor/nl_node.c +255 -49
  9. data/ext/nokolexbor/nl_node_set.c +56 -1
  10. data/ext/nokolexbor/nl_processing_instruction.c +6 -0
  11. data/ext/nokolexbor/nl_text.c +6 -0
  12. data/ext/nokolexbor/nokolexbor.h +1 -0
  13. data/lib/nokolexbor/document.rb +52 -5
  14. data/lib/nokolexbor/document_fragment.rb +11 -0
  15. data/lib/nokolexbor/node.rb +367 -18
  16. data/lib/nokolexbor/node_set.rb +56 -0
  17. data/lib/nokolexbor/version.rb +1 -1
  18. metadata +2 -24
  19. data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
  20. data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
  21. data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
  22. data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
  23. data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
  24. data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
  25. data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
  26. data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
  27. data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
  28. data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
  29. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
  30. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
  31. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
  32. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
  33. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
  34. data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
  35. data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
  36. data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
  37. data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
  38. data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
  39. data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
  40. data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
@@ -1,1931 +0,0 @@
1
- /*
2
- * Copyright (C) 2019 Alexander Borisov
3
- *
4
- * Author: Alexander Borisov <borisov@lexbor.com>
5
- */
6
-
7
- #include "lexbor/encoding/encode.h"
8
- #include "lexbor/encoding/single.h"
9
- #include "lexbor/encoding/multi.h"
10
- #include "lexbor/encoding/range.h"
11
-
12
-
13
- #define LXB_ENCODING_ENCODE_APPEND(ctx, cp) \
14
- do { \
15
- if ((ctx)->buffer_used == (ctx)->buffer_length) { \
16
- return LXB_STATUS_SMALL_BUFFER; \
17
- } \
18
- \
19
- (ctx)->buffer_out[(ctx)->buffer_used++] = (lxb_char_t) cp; \
20
- } \
21
- while (0)
22
-
23
- #define LXB_ENCODING_ENCODE_APPEND_P(ctx, cp) \
24
- do { \
25
- if ((ctx)->buffer_used == (ctx)->buffer_length) { \
26
- *cps = p; \
27
- return LXB_STATUS_SMALL_BUFFER; \
28
- } \
29
- \
30
- (ctx)->buffer_out[(ctx)->buffer_used++] = (lxb_char_t) cp; \
31
- } \
32
- while (0)
33
-
34
- #define LXB_ENCODING_ENCODE_ERROR(ctx) \
35
- do { \
36
- if (ctx->replace_to == NULL) { \
37
- return LXB_STATUS_ERROR; \
38
- } \
39
- \
40
- if ((ctx->buffer_used + ctx->replace_len) > ctx->buffer_length) { \
41
- return LXB_STATUS_SMALL_BUFFER; \
42
- } \
43
- \
44
- memcpy(&ctx->buffer_out[ctx->buffer_used], ctx->replace_to, \
45
- ctx->replace_len); \
46
- \
47
- ctx->buffer_used += ctx->replace_len; \
48
- } \
49
- while (0)
50
-
51
- #define LXB_ENCODING_ENCODE_ERROR_P(ctx) \
52
- do { \
53
- if (ctx->replace_to == NULL) { \
54
- *cps = p; \
55
- return LXB_STATUS_ERROR; \
56
- } \
57
- \
58
- if ((ctx->buffer_used + ctx->replace_len) > ctx->buffer_length) { \
59
- *cps = p; \
60
- return LXB_STATUS_SMALL_BUFFER; \
61
- } \
62
- \
63
- memcpy(&ctx->buffer_out[ctx->buffer_used], ctx->replace_to, \
64
- ctx->replace_len); \
65
- \
66
- ctx->buffer_used += ctx->replace_len; \
67
- } \
68
- while (0)
69
-
70
- #define LXB_ENCODING_ENCODE_SINGLE_BYTE(table, table_size) \
71
- do { \
72
- lxb_codepoint_t cp; \
73
- const lxb_codepoint_t *p = *cps; \
74
- const lexbor_shs_hash_t *hash; \
75
- \
76
- for (; p < end; p++) { \
77
- cp = *p; \
78
- \
79
- if (cp < 0x80) { \
80
- LXB_ENCODING_ENCODE_APPEND_P(ctx, cp); \
81
- continue; \
82
- } \
83
- \
84
- hash = lexbor_shs_hash_get_static(table, table_size, cp); \
85
- if (hash == NULL) { \
86
- LXB_ENCODING_ENCODE_ERROR_P(ctx); \
87
- continue; \
88
- } \
89
- \
90
- LXB_ENCODING_ENCODE_APPEND_P(ctx, (uintptr_t) hash->value); \
91
- } \
92
- \
93
- return LXB_STATUS_OK; \
94
- } \
95
- while (0)
96
-
97
- #define LXB_ENCODING_ENCODE_BYTE_SINGLE(table, table_size) \
98
- const lexbor_shs_hash_t *hash; \
99
- \
100
- if (cp < 0x80) { \
101
- *(*data)++ = (lxb_char_t) cp; \
102
- return 1; \
103
- } \
104
- \
105
- hash = lexbor_shs_hash_get_static(table, table_size, cp); \
106
- if (hash == NULL) { \
107
- return LXB_ENCODING_ENCODE_ERROR; \
108
- } \
109
- \
110
- *(*data)++ = (lxb_char_t) (uintptr_t) hash->value; \
111
- return 1
112
-
113
-
114
- lxb_status_t
115
- lxb_encoding_encode_default(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
116
- const lxb_codepoint_t *end)
117
- {
118
- return lxb_encoding_encode_utf_8(ctx, cps, end);
119
- }
120
-
121
- lxb_status_t
122
- lxb_encoding_encode_auto(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
123
- const lxb_codepoint_t *end)
124
- {
125
- *cps = end;
126
- return LXB_STATUS_ERROR;
127
- }
128
-
129
- lxb_status_t
130
- lxb_encoding_encode_undefined(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
131
- const lxb_codepoint_t *end)
132
- {
133
- *cps = end;
134
- return LXB_STATUS_ERROR;
135
- }
136
-
137
- lxb_status_t
138
- lxb_encoding_encode_big5(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
139
- const lxb_codepoint_t *end)
140
- {
141
- lxb_codepoint_t cp;
142
- const lexbor_shs_hash_t *hash;
143
-
144
- for (; *cps < end; (*cps)++) {
145
- cp = **cps;
146
-
147
- if (cp < 0x80) {
148
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
149
- continue;
150
- }
151
-
152
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_big5,
153
- LXB_ENCODING_MULTI_HASH_BIG5_SIZE, cp);
154
- if (hash == NULL) {
155
- LXB_ENCODING_ENCODE_ERROR(ctx);
156
- continue;
157
- }
158
-
159
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
160
- return LXB_STATUS_SMALL_BUFFER;
161
- }
162
-
163
- ctx->buffer_out[ ctx->buffer_used++ ] = ((uint32_t) (uintptr_t) hash->value) / 157 + 0x81;
164
-
165
- if ((((uint32_t) (uintptr_t) hash->value) % 157) < 0x3F) {
166
- ctx->buffer_out[ ctx->buffer_used++ ] = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x40;
167
- }
168
- else {
169
- ctx->buffer_out[ ctx->buffer_used++ ] = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x62;
170
- }
171
- }
172
-
173
- return LXB_STATUS_OK;
174
- }
175
-
176
- lxb_status_t
177
- lxb_encoding_encode_euc_jp(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
178
- const lxb_codepoint_t *end)
179
- {
180
- lxb_codepoint_t cp;
181
- const lexbor_shs_hash_t *hash;
182
-
183
- for (; *cps < end; (*cps)++) {
184
- cp = **cps;
185
-
186
- if (cp < 0x80) {
187
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
188
- continue;
189
- }
190
-
191
- if (cp == 0x00A5) {
192
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x5C);
193
- continue;
194
- }
195
-
196
- if (cp == 0x203E) {
197
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x7E);
198
- continue;
199
- }
200
-
201
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
202
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
203
- return LXB_STATUS_SMALL_BUFFER;
204
- }
205
-
206
- ctx->buffer_out[ ctx->buffer_used++ ] = 0x8E;
207
- ctx->buffer_out[ ctx->buffer_used++ ] = cp - 0xFF61 + 0xA1;
208
-
209
- continue;
210
- }
211
-
212
- if (cp == 0x2212) {
213
- cp = 0xFF0D;
214
- }
215
-
216
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
217
- LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
218
- if (hash == NULL) {
219
- LXB_ENCODING_ENCODE_ERROR(ctx);
220
- continue;
221
- }
222
-
223
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
224
- return LXB_STATUS_SMALL_BUFFER;
225
- }
226
-
227
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 94 + 0xA1;
228
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value % 94 + 0xA1;
229
- }
230
-
231
- return LXB_STATUS_OK;
232
- }
233
-
234
- lxb_status_t
235
- lxb_encoding_encode_euc_kr(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
236
- const lxb_codepoint_t *end)
237
- {
238
- lxb_codepoint_t cp;
239
- const lexbor_shs_hash_t *hash;
240
-
241
- for (; *cps < end; (*cps)++) {
242
- cp = **cps;
243
-
244
- if (cp < 0x80) {
245
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
246
- continue;
247
- }
248
-
249
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_euc_kr,
250
- LXB_ENCODING_MULTI_HASH_EUC_KR_SIZE, cp);
251
- if (hash == NULL) {
252
- LXB_ENCODING_ENCODE_ERROR(ctx);
253
- continue;
254
- }
255
-
256
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
257
- return LXB_STATUS_SMALL_BUFFER;
258
- }
259
-
260
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
261
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value % 190 + 0x41;
262
- }
263
-
264
- return LXB_STATUS_OK;
265
- }
266
-
267
- lxb_status_t
268
- lxb_encoding_encode_gbk(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
269
- const lxb_codepoint_t *end)
270
- {
271
- lxb_codepoint_t cp;
272
- const lexbor_shs_hash_t *hash;
273
-
274
- for (; *cps < end; (*cps)++) {
275
- cp = **cps;
276
-
277
- if (cp < 0x80) {
278
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
279
- continue;
280
- }
281
-
282
- if (cp == 0xE5E5) {
283
- LXB_ENCODING_ENCODE_ERROR(ctx);
284
- continue;
285
- }
286
-
287
- if (cp == 0x20AC) {
288
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x80);
289
- continue;
290
- }
291
-
292
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
293
- LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
294
- if (hash == NULL) {
295
- LXB_ENCODING_ENCODE_ERROR(ctx);
296
- continue;
297
- }
298
-
299
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
300
- return LXB_STATUS_SMALL_BUFFER;
301
- }
302
-
303
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (uintptr_t) hash->value / 190 + 0x81;
304
-
305
- if (((lxb_char_t) (uintptr_t) hash->value % 190) < 0x3F) {
306
- ctx->buffer_out[ ctx->buffer_used++ ] = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x40;
307
- }
308
- else {
309
- ctx->buffer_out[ ctx->buffer_used++ ] = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x41;
310
- }
311
- }
312
-
313
- return LXB_STATUS_OK;
314
- }
315
-
316
- lxb_status_t
317
- lxb_encoding_encode_ibm866(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
318
- const lxb_codepoint_t *end)
319
- {
320
-
321
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_ibm866,
322
- LXB_ENCODING_SINGLE_HASH_IBM866_SIZE);
323
- }
324
-
325
- lxb_status_t
326
- lxb_encoding_encode_iso_2022_jp(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
327
- const lxb_codepoint_t *end)
328
- {
329
- int8_t size;
330
- unsigned state;
331
- lxb_codepoint_t cp;
332
- const lexbor_shs_hash_t *hash;
333
-
334
- size = 0;
335
- state = ctx->state;
336
-
337
- for (; *cps < end; (*cps)++) {
338
- cp = **cps;
339
-
340
- begin:
341
-
342
- switch (ctx->state) {
343
- case LXB_ENCODING_ENCODE_2022_JP_ASCII:
344
- if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
345
- goto failed;
346
- }
347
-
348
- if (cp < 0x80) {
349
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
350
- continue;
351
- }
352
-
353
- if (cp == 0x00A5 || cp == 0x203E) {
354
- /*
355
- * Do not switch to the ROMAN stage with prepend code point
356
- * to stream, add it immediately.
357
- */
358
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
359
- goto small_buffer;
360
- }
361
-
362
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
363
-
364
- if (cp == 0x00A5) {
365
- memcpy(&ctx->buffer_out[ctx->buffer_used],
366
- "\x1B\x28\x4A\x5C", 4);
367
- ctx->buffer_used += 4;
368
-
369
- continue;
370
- }
371
-
372
- memcpy(&ctx->buffer_out[ctx->buffer_used],
373
- "\x1B\x28\x4A\x7E", 4);
374
- ctx->buffer_used += 4;
375
-
376
- continue;
377
- }
378
-
379
- break;
380
-
381
- case LXB_ENCODING_ENCODE_2022_JP_ROMAN:
382
- if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
383
- goto failed;
384
- }
385
-
386
- if (cp < 0x80) {
387
- switch (cp) {
388
- case 0x005C:
389
- case 0x007E:
390
- break;
391
-
392
- case 0x00A5:
393
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x5C);
394
- continue;
395
-
396
- case 0x203E:
397
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x7E);
398
- continue;
399
-
400
- default:
401
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
402
- continue;
403
- }
404
-
405
- /*
406
- * Do not switch to the ANSI stage with prepend code point
407
- * to stream, add it immediately.
408
- */
409
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
410
- goto small_buffer;
411
- }
412
-
413
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
414
-
415
- memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x28\x42", 3);
416
- ctx->buffer_used += 3;
417
-
418
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) cp;
419
- continue;
420
- }
421
-
422
- break;
423
-
424
- case LXB_ENCODING_ENCODE_2022_JP_JIS0208:
425
- if (cp < 0x80) {
426
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
427
- goto small_buffer;
428
- }
429
-
430
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
431
-
432
- memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x28\x42", 3);
433
- ctx->buffer_used += 3;
434
-
435
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) cp;
436
- continue;
437
- }
438
-
439
- if (cp == 0x00A5 || cp == 0x203E) {
440
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
441
- goto small_buffer;
442
- }
443
-
444
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
445
-
446
- if (cp == 0x00A5) {
447
- memcpy(&ctx->buffer_out[ctx->buffer_used],
448
- "\x1B\x28\x4A\x5C", 4);
449
- ctx->buffer_used += 4;
450
-
451
- continue;
452
- }
453
-
454
- memcpy(&ctx->buffer_out[ctx->buffer_used],
455
- "\x1B\x28\x4A\x7E", 4);
456
- ctx->buffer_used += 4;
457
-
458
- continue;
459
- }
460
-
461
- break;
462
- }
463
-
464
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
465
- goto small_buffer;
466
- }
467
-
468
- if (cp == 0x2212) {
469
- cp = 0xFF0D;
470
- }
471
-
472
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
473
- cp = lxb_encoding_multi_index_iso_2022_jp_katakana[cp - 0xFF61].codepoint;
474
- }
475
-
476
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
477
- LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
478
- if (hash == NULL) {
479
- goto failed;
480
- }
481
-
482
- if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_JIS0208) {
483
- if ((ctx->buffer_used + 3) > ctx->buffer_length) {
484
- goto small_buffer;
485
- }
486
-
487
- memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x24\x42", 3);
488
- ctx->buffer_used += 3;
489
-
490
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_JIS0208;
491
- size += 3;
492
-
493
- goto begin;
494
- }
495
-
496
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 94 + 0x21;
497
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value % 94 + 0x21;
498
-
499
- continue;
500
-
501
- small_buffer:
502
-
503
- ctx->state = state;
504
- ctx->buffer_used -= size;
505
-
506
- return LXB_STATUS_SMALL_BUFFER;
507
-
508
- failed:
509
-
510
- ctx->buffer_used -= size;
511
- LXB_ENCODING_ENCODE_ERROR(ctx);
512
- }
513
-
514
- return LXB_STATUS_OK;
515
- }
516
-
517
- lxb_status_t
518
- lxb_encoding_encode_iso_2022_jp_eof(lxb_encoding_encode_t *ctx)
519
- {
520
- if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_ASCII) {
521
- if ((ctx->buffer_used + 3) > ctx->buffer_length) {
522
- return LXB_STATUS_SMALL_BUFFER;
523
- }
524
-
525
- memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x28\x42", 3);
526
- ctx->buffer_used += 3;
527
- }
528
-
529
- return LXB_STATUS_OK;
530
- }
531
-
532
- lxb_status_t
533
- lxb_encoding_encode_iso_8859_10(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
534
- const lxb_codepoint_t *end)
535
- {
536
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_10,
537
- LXB_ENCODING_SINGLE_HASH_ISO_8859_10_SIZE);
538
- }
539
-
540
- lxb_status_t
541
- lxb_encoding_encode_iso_8859_13(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
542
- const lxb_codepoint_t *end)
543
- {
544
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_13,
545
- LXB_ENCODING_SINGLE_HASH_ISO_8859_13_SIZE);
546
- }
547
-
548
- lxb_status_t
549
- lxb_encoding_encode_iso_8859_14(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
550
- const lxb_codepoint_t *end)
551
- {
552
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_14,
553
- LXB_ENCODING_SINGLE_HASH_ISO_8859_14_SIZE);
554
- }
555
-
556
- lxb_status_t
557
- lxb_encoding_encode_iso_8859_15(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
558
- const lxb_codepoint_t *end)
559
- {
560
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_15,
561
- LXB_ENCODING_SINGLE_HASH_ISO_8859_15_SIZE);
562
- }
563
-
564
- lxb_status_t
565
- lxb_encoding_encode_iso_8859_16(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
566
- const lxb_codepoint_t *end)
567
- {
568
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_16,
569
- LXB_ENCODING_SINGLE_HASH_ISO_8859_16_SIZE);
570
- }
571
-
572
- lxb_status_t
573
- lxb_encoding_encode_iso_8859_2(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
574
- const lxb_codepoint_t *end)
575
- {
576
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_2,
577
- LXB_ENCODING_SINGLE_HASH_ISO_8859_2_SIZE);
578
- }
579
-
580
- lxb_status_t
581
- lxb_encoding_encode_iso_8859_3(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
582
- const lxb_codepoint_t *end)
583
- {
584
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_3,
585
- LXB_ENCODING_SINGLE_HASH_ISO_8859_3_SIZE);
586
- }
587
-
588
- lxb_status_t
589
- lxb_encoding_encode_iso_8859_4(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
590
- const lxb_codepoint_t *end)
591
- {
592
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_4,
593
- LXB_ENCODING_SINGLE_HASH_ISO_8859_4_SIZE);
594
- }
595
-
596
- lxb_status_t
597
- lxb_encoding_encode_iso_8859_5(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
598
- const lxb_codepoint_t *end)
599
- {
600
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_5,
601
- LXB_ENCODING_SINGLE_HASH_ISO_8859_5_SIZE);
602
- }
603
-
604
- lxb_status_t
605
- lxb_encoding_encode_iso_8859_6(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
606
- const lxb_codepoint_t *end)
607
- {
608
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_6,
609
- LXB_ENCODING_SINGLE_HASH_ISO_8859_6_SIZE);
610
- }
611
-
612
- lxb_status_t
613
- lxb_encoding_encode_iso_8859_7(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
614
- const lxb_codepoint_t *end)
615
- {
616
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_7,
617
- LXB_ENCODING_SINGLE_HASH_ISO_8859_7_SIZE);
618
- }
619
-
620
- lxb_status_t
621
- lxb_encoding_encode_iso_8859_8(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
622
- const lxb_codepoint_t *end)
623
- {
624
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_8,
625
- LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
626
- }
627
-
628
- lxb_status_t
629
- lxb_encoding_encode_iso_8859_8_i(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
630
- const lxb_codepoint_t *end)
631
- {
632
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_8,
633
- LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
634
- }
635
-
636
- lxb_status_t
637
- lxb_encoding_encode_koi8_r(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
638
- const lxb_codepoint_t *end)
639
- {
640
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_koi8_r,
641
- LXB_ENCODING_SINGLE_HASH_KOI8_R_SIZE);
642
- }
643
-
644
- lxb_status_t
645
- lxb_encoding_encode_koi8_u(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
646
- const lxb_codepoint_t *end)
647
- {
648
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_koi8_u,
649
- LXB_ENCODING_SINGLE_HASH_KOI8_U_SIZE);
650
- }
651
-
652
- lxb_inline const lexbor_shs_hash_t *
653
- lxb_encoding_encode_shift_jis_index(lxb_codepoint_t cp)
654
- {
655
- const lexbor_shs_hash_t *entry;
656
-
657
- entry = &lxb_encoding_multi_hash_jis0208[ (cp % LXB_ENCODING_MULTI_HASH_JIS0208_SIZE) + 1 ];
658
-
659
- do {
660
- if (entry->key == cp) {
661
- if ((unsigned) ((uint32_t) (uintptr_t) entry->value - 8272) > (8835 - 8272)) {
662
- return entry;
663
- }
664
- }
665
-
666
- entry = &lxb_encoding_multi_hash_jis0208[entry->next];
667
- }
668
- while (entry != lxb_encoding_multi_hash_jis0208);
669
-
670
- return NULL;
671
- }
672
-
673
- lxb_status_t
674
- lxb_encoding_encode_shift_jis(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
675
- const lxb_codepoint_t *end)
676
- {
677
- uint32_t lead, trail;
678
- lxb_codepoint_t cp;
679
- const lexbor_shs_hash_t *hash;
680
-
681
- for (; *cps < end; (*cps)++) {
682
- cp = **cps;
683
-
684
- if (cp <= 0x80) {
685
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
686
- continue;
687
- }
688
-
689
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
690
- LXB_ENCODING_ENCODE_APPEND(ctx, cp - 0xFF61 + 0xA1);
691
- continue;
692
- }
693
-
694
- switch (cp) {
695
- case 0x00A5:
696
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x5C);
697
- continue;
698
-
699
- case 0x203E:
700
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x7E);
701
- continue;
702
-
703
- case 0x2212:
704
- cp = 0xFF0D;
705
- break;
706
- }
707
-
708
- hash = lxb_encoding_encode_shift_jis_index(cp);
709
- if (hash == NULL) {
710
- LXB_ENCODING_ENCODE_ERROR(ctx);
711
- continue;
712
- }
713
-
714
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
715
- return LXB_STATUS_SMALL_BUFFER;
716
- }
717
-
718
- lead = (uint32_t) (uintptr_t) hash->value / 188;
719
- trail = (uint32_t) (uintptr_t) hash->value % 188;
720
-
721
- ctx->buffer_out[ctx->buffer_used++ ] = lead + ((lead < 0x1F) ? 0x81 : 0xC1);
722
- ctx->buffer_out[ctx->buffer_used++ ] = trail + ((trail < 0x3F) ? 0x40 : 0x41);
723
- }
724
-
725
- return LXB_STATUS_OK;
726
- }
727
-
728
- lxb_inline void
729
- lxb_encoding_encode_utf_16_write(lxb_encoding_encode_t *ctx, bool is_be,
730
- lxb_codepoint_t cp)
731
- {
732
- if (is_be) {
733
- ctx->buffer_out[ctx->buffer_used++] = cp >> 8;
734
- ctx->buffer_out[ctx->buffer_used++] = cp & 0x00FF;
735
-
736
- return;
737
- }
738
-
739
- ctx->buffer_out[ctx->buffer_used++] = cp & 0x00FF;
740
- ctx->buffer_out[ctx->buffer_used++] = cp >> 8;
741
- }
742
-
743
- lxb_inline int8_t
744
- lxb_encoding_encode_utf_16(lxb_encoding_encode_t *ctx, bool is_be,
745
- const lxb_codepoint_t **cps, const lxb_codepoint_t *end)
746
- {
747
- lxb_codepoint_t cp;
748
-
749
- for (; *cps < end; (*cps)++) {
750
- cp = **cps;
751
-
752
- if (cp < 0x10000) {
753
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
754
- return LXB_STATUS_SMALL_BUFFER;
755
- }
756
-
757
- lxb_encoding_encode_utf_16_write(ctx, is_be, cp);
758
-
759
- continue;
760
- }
761
-
762
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
763
- return LXB_STATUS_SMALL_BUFFER;
764
- }
765
-
766
- cp -= 0x10000;
767
-
768
- lxb_encoding_encode_utf_16_write(ctx, is_be, (0xD800 | (cp >> 0x0A)));
769
- lxb_encoding_encode_utf_16_write(ctx, is_be, (0xDC00 | (cp & 0x03FF)));
770
- }
771
-
772
- return LXB_STATUS_OK;
773
- }
774
-
775
- lxb_status_t
776
- lxb_encoding_encode_utf_16be(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
777
- const lxb_codepoint_t *end)
778
- {
779
- return lxb_encoding_encode_utf_16(ctx, true, cps, end);
780
- }
781
-
782
- lxb_status_t
783
- lxb_encoding_encode_utf_16le(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
784
- const lxb_codepoint_t *end)
785
- {
786
- return lxb_encoding_encode_utf_16(ctx, false, cps, end);
787
- }
788
-
789
- lxb_status_t
790
- lxb_encoding_encode_utf_8(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
791
- const lxb_codepoint_t *end)
792
- {
793
- lxb_codepoint_t cp;
794
- const lxb_codepoint_t *p = *cps;
795
-
796
- for (; p < end; p++) {
797
- cp = *p;
798
-
799
- if (cp < 0x80) {
800
- if ((ctx->buffer_used + 1) > ctx->buffer_length) {
801
- *cps = p;
802
-
803
- return LXB_STATUS_SMALL_BUFFER;
804
- }
805
-
806
- /* 0xxxxxxx */
807
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) cp;
808
- }
809
- else if (cp < 0x800) {
810
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
811
- *cps = p;
812
-
813
- return LXB_STATUS_SMALL_BUFFER;
814
- }
815
-
816
- /* 110xxxxx 10xxxxxx */
817
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0xC0 | (cp >> 6 ));
818
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | (cp & 0x3F));
819
- }
820
- else if (cp < 0x10000) {
821
- if ((ctx->buffer_used + 3) > ctx->buffer_length) {
822
- *cps = p;
823
-
824
- return LXB_STATUS_SMALL_BUFFER;
825
- }
826
-
827
- /* 1110xxxx 10xxxxxx 10xxxxxx */
828
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0xE0 | ((cp >> 12)));
829
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
830
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ( cp & 0x3F));
831
- }
832
- else if (cp < 0x110000) {
833
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
834
- *cps = p;
835
-
836
- return LXB_STATUS_SMALL_BUFFER;
837
- }
838
-
839
- /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
840
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0xF0 | ( cp >> 18));
841
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ((cp >> 12) & 0x3F));
842
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
843
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ( cp & 0x3F));
844
- }
845
- else {
846
- *cps = p;
847
- LXB_ENCODING_ENCODE_ERROR(ctx);
848
- }
849
- }
850
-
851
- *cps = p;
852
-
853
- return LXB_STATUS_OK;
854
- }
855
-
856
- lxb_inline uint32_t
857
- lxb_encoding_encode_gb18030_range(lxb_codepoint_t cp)
858
- {
859
- size_t mid, left, right;
860
- const lxb_encoding_range_index_t *range;
861
-
862
- if (cp == 0xE7C7) {
863
- return 7457;
864
- }
865
-
866
- left = 0;
867
- right = LXB_ENCODING_RANGE_INDEX_GB18030_SIZE;
868
- range = lxb_encoding_range_index_gb18030;
869
-
870
- /* Some compilers say about uninitialized mid */
871
- mid = 0;
872
-
873
- while (left < right) {
874
- mid = left + (right - left) / 2;
875
-
876
- if (range[mid].codepoint < cp) {
877
- left = mid + 1;
878
-
879
- if (left < right && range[left].codepoint > cp) {
880
- break;
881
- }
882
- }
883
- else if (range[mid].codepoint > cp) {
884
- right = mid - 1;
885
-
886
- if (right > 0 && range[right].codepoint <= cp) {
887
- mid = right;
888
- break;
889
- }
890
- }
891
- else {
892
- break;
893
- }
894
- }
895
-
896
- return range[mid].index + cp - range[mid].codepoint;
897
- }
898
-
899
- lxb_status_t
900
- lxb_encoding_encode_gb18030(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
901
- const lxb_codepoint_t *end)
902
- {
903
- uint32_t index;
904
- lxb_codepoint_t cp;
905
- const lexbor_shs_hash_t *hash;
906
-
907
- for (; *cps < end; (*cps)++) {
908
- cp = **cps;
909
-
910
- if (cp < 0x80) {
911
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
912
- continue;
913
- }
914
-
915
- if (cp == 0xE5E5) {
916
- LXB_ENCODING_ENCODE_ERROR(ctx);
917
- continue;
918
- }
919
-
920
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
921
- LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
922
- if (hash != NULL) {
923
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
924
- return LXB_STATUS_SMALL_BUFFER;
925
- }
926
-
927
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
928
-
929
- if (((uint32_t) (uintptr_t) hash->value % 190) < 0x3F) {
930
- ctx->buffer_out[ ctx->buffer_used++ ] = ((uint32_t) (uintptr_t) hash->value % 190) + 0x40;
931
- }
932
- else {
933
- ctx->buffer_out[ ctx->buffer_used++ ] = ((uint32_t) (uintptr_t) hash->value % 190) + 0x41;
934
- }
935
-
936
- continue;
937
- }
938
-
939
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
940
- return LXB_STATUS_SMALL_BUFFER;
941
- }
942
-
943
- index = lxb_encoding_encode_gb18030_range(cp);
944
-
945
- ctx->buffer_out[ ctx->buffer_used++ ] = (index / (10 * 126 * 10)) + 0x81;
946
- ctx->buffer_out[ ctx->buffer_used++ ] = ((index % (10 * 126 * 10)) / (10 * 126)) + 0x30;
947
-
948
- index = (index % (10 * 126 * 10)) % (10 * 126);
949
-
950
- ctx->buffer_out[ ctx->buffer_used++ ] = (index / 10) + 0x81;
951
- ctx->buffer_out[ ctx->buffer_used++ ] = (index % 10) + 0x30;
952
- }
953
-
954
- return LXB_STATUS_OK;
955
- }
956
-
957
- lxb_status_t
958
- lxb_encoding_encode_macintosh(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
959
- const lxb_codepoint_t *end)
960
- {
961
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_macintosh,
962
- LXB_ENCODING_SINGLE_HASH_MACINTOSH_SIZE);
963
- }
964
-
965
- lxb_status_t
966
- lxb_encoding_encode_replacement(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
967
- const lxb_codepoint_t *end)
968
- {
969
- *cps = end;
970
- return LXB_STATUS_ERROR;
971
- }
972
-
973
- lxb_status_t
974
- lxb_encoding_encode_windows_1250(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
975
- const lxb_codepoint_t *end)
976
- {
977
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1250,
978
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1250_SIZE);
979
- }
980
-
981
- lxb_status_t
982
- lxb_encoding_encode_windows_1251(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
983
- const lxb_codepoint_t *end)
984
- {
985
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1251,
986
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1251_SIZE);
987
- }
988
-
989
- lxb_status_t
990
- lxb_encoding_encode_windows_1252(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
991
- const lxb_codepoint_t *end)
992
- {
993
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1252,
994
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1252_SIZE);
995
- }
996
-
997
- lxb_status_t
998
- lxb_encoding_encode_windows_1253(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
999
- const lxb_codepoint_t *end)
1000
- {
1001
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1253,
1002
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1253_SIZE);
1003
- }
1004
-
1005
- lxb_status_t
1006
- lxb_encoding_encode_windows_1254(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1007
- const lxb_codepoint_t *end)
1008
- {
1009
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1254,
1010
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1254_SIZE);
1011
- }
1012
-
1013
- lxb_status_t
1014
- lxb_encoding_encode_windows_1255(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1015
- const lxb_codepoint_t *end)
1016
- {
1017
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1255,
1018
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1255_SIZE);
1019
- }
1020
-
1021
- lxb_status_t
1022
- lxb_encoding_encode_windows_1256(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1023
- const lxb_codepoint_t *end)
1024
- {
1025
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1256,
1026
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1256_SIZE);
1027
- }
1028
-
1029
- lxb_status_t
1030
- lxb_encoding_encode_windows_1257(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1031
- const lxb_codepoint_t *end)
1032
- {
1033
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1257,
1034
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1257_SIZE);
1035
- }
1036
-
1037
- lxb_status_t
1038
- lxb_encoding_encode_windows_1258(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1039
- const lxb_codepoint_t *end)
1040
- {
1041
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1258,
1042
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1258_SIZE);
1043
- }
1044
-
1045
- lxb_status_t
1046
- lxb_encoding_encode_windows_874(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1047
- const lxb_codepoint_t *end)
1048
- {
1049
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_874,
1050
- LXB_ENCODING_SINGLE_HASH_WINDOWS_874_SIZE);
1051
- }
1052
-
1053
- lxb_status_t
1054
- lxb_encoding_encode_x_mac_cyrillic(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1055
- const lxb_codepoint_t *end)
1056
- {
1057
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_x_mac_cyrillic,
1058
- LXB_ENCODING_SINGLE_HASH_X_MAC_CYRILLIC_SIZE);
1059
- }
1060
-
1061
- lxb_status_t
1062
- lxb_encoding_encode_x_user_defined(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1063
- const lxb_codepoint_t *end)
1064
- {
1065
- lxb_codepoint_t cp;
1066
-
1067
- for (; *cps < end; (*cps)++) {
1068
- cp = **cps;
1069
-
1070
- if (cp < 0x80) {
1071
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
1072
- }
1073
- else if (cp >= 0xF780 && cp <= 0xF7FF) {
1074
- LXB_ENCODING_ENCODE_APPEND(ctx, (cp - 0xF780 + 0x80));
1075
- }
1076
- else {
1077
- LXB_ENCODING_ENCODE_ERROR(ctx);
1078
- }
1079
- }
1080
-
1081
- return LXB_STATUS_OK;
1082
- }
1083
-
1084
- /*
1085
- * Single
1086
- */
1087
- int8_t
1088
- lxb_encoding_encode_default_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1089
- const lxb_char_t *end, lxb_codepoint_t cp)
1090
- {
1091
- return lxb_encoding_encode_utf_8_single(ctx, data, end, cp);
1092
- }
1093
-
1094
- int8_t
1095
- lxb_encoding_encode_auto_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1096
- const lxb_char_t *end, lxb_codepoint_t cp)
1097
- {
1098
- return LXB_ENCODING_ENCODE_ERROR;
1099
- }
1100
-
1101
- int8_t
1102
- lxb_encoding_encode_undefined_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1103
- const lxb_char_t *end, lxb_codepoint_t cp)
1104
- {
1105
- return LXB_ENCODING_ENCODE_ERROR;
1106
- }
1107
-
1108
- int8_t
1109
- lxb_encoding_encode_big5_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1110
- const lxb_char_t *end, lxb_codepoint_t cp)
1111
- {
1112
- const lexbor_shs_hash_t *hash;
1113
-
1114
- if (cp < 0x80) {
1115
- *(*data)++ = (lxb_char_t) cp;
1116
-
1117
- return 1;
1118
- }
1119
-
1120
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_big5,
1121
- LXB_ENCODING_MULTI_HASH_BIG5_SIZE, cp);
1122
- if (hash == NULL) {
1123
- return LXB_ENCODING_ENCODE_ERROR;
1124
- }
1125
-
1126
- if ((*data + 2) > end) {
1127
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1128
- }
1129
-
1130
- *(*data)++ = ((uint32_t) (uintptr_t) hash->value) / 157 + 0x81;
1131
-
1132
- if ((((uint32_t) (uintptr_t) hash->value) % 157) < 0x3F) {
1133
- *(*data)++ = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x40;
1134
- }
1135
- else {
1136
- *(*data)++ = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x62;
1137
- }
1138
-
1139
- return 2;
1140
- }
1141
-
1142
- int8_t
1143
- lxb_encoding_encode_euc_jp_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1144
- const lxb_char_t *end, lxb_codepoint_t cp)
1145
- {
1146
- const lexbor_shs_hash_t *hash;
1147
-
1148
- if (cp < 0x80) {
1149
- *(*data)++ = (lxb_char_t) cp;
1150
-
1151
- return 1;
1152
- }
1153
-
1154
- if (cp == 0x00A5) {
1155
- *(*data)++ = 0x5C;
1156
-
1157
- return 1;
1158
- }
1159
-
1160
- if (cp == 0x203E) {
1161
- *(*data)++ = 0x7E;
1162
-
1163
- return 1;
1164
- }
1165
-
1166
- if ((*data + 2) > end) {
1167
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1168
- }
1169
-
1170
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
1171
- *(*data)++ = 0x8E;
1172
- *(*data)++ = cp - 0xFF61 + 0xA1;
1173
-
1174
- return 2;
1175
- }
1176
-
1177
- if (cp == 0x2212) {
1178
- cp = 0xFF0D;
1179
- }
1180
-
1181
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
1182
- LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
1183
- if (hash == NULL) {
1184
- return LXB_ENCODING_ENCODE_ERROR;
1185
- }
1186
-
1187
- *(*data)++ = (uint32_t) (uintptr_t) hash->value / 94 + 0xA1;
1188
- *(*data)++ = (uint32_t) (uintptr_t) hash->value % 94 + 0xA1;
1189
-
1190
- return 2;
1191
- }
1192
-
1193
- int8_t
1194
- lxb_encoding_encode_euc_kr_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1195
- const lxb_char_t *end, lxb_codepoint_t cp)
1196
- {
1197
- const lexbor_shs_hash_t *hash;
1198
-
1199
- if (cp < 0x80) {
1200
- *(*data)++ = (lxb_char_t) cp;
1201
-
1202
- return 1;
1203
- }
1204
-
1205
- if ((*data + 2) > end) {
1206
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1207
- }
1208
-
1209
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_euc_kr,
1210
- LXB_ENCODING_MULTI_HASH_EUC_KR_SIZE, cp);
1211
- if (hash == NULL) {
1212
- return LXB_ENCODING_ENCODE_ERROR;
1213
- }
1214
-
1215
- *(*data)++ = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
1216
- *(*data)++ = (uint32_t) (uintptr_t) hash->value % 190 + 0x41;
1217
-
1218
- return 2;
1219
- }
1220
-
1221
- int8_t
1222
- lxb_encoding_encode_gbk_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1223
- const lxb_char_t *end, lxb_codepoint_t cp)
1224
- {
1225
- const lexbor_shs_hash_t *hash;
1226
-
1227
- if (cp < 0x80) {
1228
- *(*data)++ = (lxb_char_t) cp;
1229
-
1230
- return 1;
1231
- }
1232
-
1233
- if (cp == 0xE5E5) {
1234
- return LXB_ENCODING_ENCODE_ERROR;
1235
- }
1236
-
1237
- if (cp == 0x20AC) {
1238
- *(*data)++ = 0x80;
1239
-
1240
- return 1;
1241
- }
1242
-
1243
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
1244
- LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
1245
- if (hash != NULL) {
1246
- if ((*data + 2) > end) {
1247
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1248
- }
1249
-
1250
- *(*data)++ = (lxb_char_t) (uintptr_t) hash->value / 190 + 0x81;
1251
-
1252
- if (((lxb_char_t) (uintptr_t) hash->value % 190) < 0x3F) {
1253
- *(*data)++ = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x40;
1254
- }
1255
- else {
1256
- *(*data)++ = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x41;
1257
- }
1258
-
1259
- return 2;
1260
- }
1261
-
1262
- return LXB_ENCODING_ENCODE_ERROR;
1263
- }
1264
-
1265
- int8_t
1266
- lxb_encoding_encode_ibm866_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1267
- const lxb_char_t *end, lxb_codepoint_t cp)
1268
- {
1269
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_ibm866,
1270
- LXB_ENCODING_SINGLE_HASH_IBM866_SIZE);
1271
- }
1272
-
1273
- int8_t
1274
- lxb_encoding_encode_iso_2022_jp_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1275
- const lxb_char_t *end, lxb_codepoint_t cp)
1276
- {
1277
- int8_t size;
1278
- unsigned state;
1279
- const lexbor_shs_hash_t *hash;
1280
-
1281
- size = 0;
1282
- state = ctx->state;
1283
-
1284
- begin:
1285
-
1286
- switch (ctx->state) {
1287
- case LXB_ENCODING_ENCODE_2022_JP_ASCII:
1288
- if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
1289
- goto failed;
1290
- }
1291
-
1292
- if (cp < 0x80) {
1293
- *(*data)++ = (lxb_char_t) cp;
1294
-
1295
- return size + 1;
1296
- }
1297
-
1298
- if (cp == 0x00A5 || cp == 0x203E) {
1299
- /*
1300
- * Do not switch to the ROMAN stage with prepend code point
1301
- * to stream, add it immediately.
1302
- */
1303
- if ((*data + 4) > end) {
1304
- goto small_buffer;
1305
- }
1306
-
1307
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
1308
-
1309
- if (cp == 0x00A5) {
1310
- memcpy(*data, "\x1B\x28\x4A\x5C", 4);
1311
- *data = *data + 4;
1312
-
1313
- return size + 4;
1314
- }
1315
-
1316
- memcpy(*data, "\x1B\x28\x4A\x7E", 4);
1317
- *data = *data + 4;
1318
-
1319
- return size + 4;
1320
- }
1321
-
1322
- break;
1323
-
1324
- case LXB_ENCODING_ENCODE_2022_JP_ROMAN:
1325
- if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
1326
- goto failed;
1327
- }
1328
-
1329
- if (cp < 0x80) {
1330
- switch (cp) {
1331
- case 0x005C:
1332
- case 0x007E:
1333
- break;
1334
-
1335
- case 0x00A5:
1336
- *(*data)++ = 0x5C;
1337
- return size + 1;
1338
-
1339
- case 0x203E:
1340
- *(*data)++ = 0x7E;
1341
- return size + 1;
1342
-
1343
- default:
1344
- *(*data)++ = (lxb_char_t) cp;
1345
- return size + 1;
1346
- }
1347
-
1348
- /*
1349
- * Do not switch to the ANSI stage with prepend code point
1350
- * to stream, add it immediately.
1351
- */
1352
- if ((*data + 4) > end) {
1353
- goto small_buffer;
1354
- }
1355
-
1356
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
1357
-
1358
- memcpy(*data, "\x1B\x28\x42", 3);
1359
- *data = *data + 3;
1360
-
1361
- *(*data)++ = (lxb_char_t) cp;
1362
-
1363
- return size + 4;
1364
- }
1365
-
1366
- break;
1367
-
1368
- case LXB_ENCODING_ENCODE_2022_JP_JIS0208:
1369
- if (cp < 0x80) {
1370
- if ((*data + 4) > end) {
1371
- goto small_buffer;
1372
- }
1373
-
1374
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
1375
-
1376
- memcpy(*data, "\x1B\x28\x42", 3);
1377
- *data = *data + 3;
1378
-
1379
- *(*data)++ = (lxb_char_t) cp;
1380
-
1381
- return size + 4;
1382
- }
1383
-
1384
- if (cp == 0x00A5 || cp == 0x203E) {
1385
- if ((*data + 4) > end) {
1386
- goto small_buffer;
1387
- }
1388
-
1389
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
1390
-
1391
- if (cp == 0x00A5) {
1392
- memcpy(*data, "\x1B\x28\x4A\x5C", 4);
1393
- *data = *data + 4;
1394
-
1395
- return size + 4;
1396
- }
1397
-
1398
- memcpy(*data, "\x1B\x28\x4A\x7E", 4);
1399
- *data = *data + 4;
1400
-
1401
- return size + 4;
1402
- }
1403
-
1404
- break;
1405
- }
1406
-
1407
- if ((*data + 2) > end) {
1408
- goto small_buffer;
1409
- }
1410
-
1411
- if (cp == 0x2212) {
1412
- cp = 0xFF0D;
1413
- }
1414
-
1415
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
1416
- cp = lxb_encoding_multi_index_iso_2022_jp_katakana[cp - 0xFF61].codepoint;
1417
- }
1418
-
1419
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
1420
- LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
1421
- if (hash == NULL) {
1422
- goto failed;
1423
- }
1424
-
1425
- if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_JIS0208) {
1426
- if ((*data + 3) > end) {
1427
- goto small_buffer;
1428
- }
1429
-
1430
- memcpy(*data, "\x1B\x24\x42", 3);
1431
- *data = *data + 3;
1432
-
1433
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_JIS0208;
1434
- size += 3;
1435
-
1436
- goto begin;
1437
- }
1438
-
1439
- *(*data)++ = (uint32_t) (uintptr_t) hash->value / 94 + 0x21;
1440
- *(*data)++ = (uint32_t) (uintptr_t) hash->value % 94 + 0x21;
1441
-
1442
- return size + 2;
1443
-
1444
- small_buffer:
1445
-
1446
- ctx->state = state;
1447
- *data = *data - size;
1448
-
1449
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1450
-
1451
- failed:
1452
-
1453
- *data = *data - size;
1454
-
1455
- return LXB_ENCODING_ENCODE_ERROR;
1456
- }
1457
-
1458
- int8_t
1459
- lxb_encoding_encode_iso_2022_jp_eof_single(lxb_encoding_encode_t *ctx,
1460
- lxb_char_t **data, const lxb_char_t *end)
1461
- {
1462
- if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_ASCII) {
1463
- if ((*data + 3) > end) {
1464
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1465
- }
1466
-
1467
- memcpy(*data, "\x1B\x28\x42", 3);
1468
- *data = *data + 3;
1469
-
1470
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
1471
-
1472
- return 3;
1473
- }
1474
-
1475
- return 0;
1476
- }
1477
-
1478
- int8_t
1479
- lxb_encoding_encode_iso_8859_10_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1480
- const lxb_char_t *end, lxb_codepoint_t cp)
1481
- {
1482
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_10,
1483
- LXB_ENCODING_SINGLE_HASH_ISO_8859_10_SIZE);
1484
- }
1485
-
1486
- int8_t
1487
- lxb_encoding_encode_iso_8859_13_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1488
- const lxb_char_t *end, lxb_codepoint_t cp)
1489
- {
1490
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_13,
1491
- LXB_ENCODING_SINGLE_HASH_ISO_8859_13_SIZE);
1492
- }
1493
-
1494
- int8_t
1495
- lxb_encoding_encode_iso_8859_14_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1496
- const lxb_char_t *end, lxb_codepoint_t cp)
1497
- {
1498
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_14,
1499
- LXB_ENCODING_SINGLE_HASH_ISO_8859_14_SIZE);
1500
- }
1501
-
1502
- int8_t
1503
- lxb_encoding_encode_iso_8859_15_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1504
- const lxb_char_t *end, lxb_codepoint_t cp)
1505
- {
1506
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_15,
1507
- LXB_ENCODING_SINGLE_HASH_ISO_8859_15_SIZE);
1508
- }
1509
-
1510
- int8_t
1511
- lxb_encoding_encode_iso_8859_16_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1512
- const lxb_char_t *end, lxb_codepoint_t cp)
1513
- {
1514
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_16,
1515
- LXB_ENCODING_SINGLE_HASH_ISO_8859_16_SIZE);
1516
- }
1517
-
1518
- int8_t
1519
- lxb_encoding_encode_iso_8859_2_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1520
- const lxb_char_t *end, lxb_codepoint_t cp)
1521
- {
1522
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_2,
1523
- LXB_ENCODING_SINGLE_HASH_ISO_8859_2_SIZE);
1524
- }
1525
-
1526
- int8_t
1527
- lxb_encoding_encode_iso_8859_3_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1528
- const lxb_char_t *end, lxb_codepoint_t cp)
1529
- {
1530
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_3,
1531
- LXB_ENCODING_SINGLE_HASH_ISO_8859_3_SIZE);
1532
- }
1533
-
1534
- int8_t
1535
- lxb_encoding_encode_iso_8859_4_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1536
- const lxb_char_t *end, lxb_codepoint_t cp)
1537
- {
1538
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_4,
1539
- LXB_ENCODING_SINGLE_HASH_ISO_8859_4_SIZE);
1540
- }
1541
-
1542
- int8_t
1543
- lxb_encoding_encode_iso_8859_5_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1544
- const lxb_char_t *end, lxb_codepoint_t cp)
1545
- {
1546
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_5,
1547
- LXB_ENCODING_SINGLE_HASH_ISO_8859_5_SIZE);
1548
- }
1549
-
1550
- int8_t
1551
- lxb_encoding_encode_iso_8859_6_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1552
- const lxb_char_t *end, lxb_codepoint_t cp)
1553
- {
1554
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_6,
1555
- LXB_ENCODING_SINGLE_HASH_ISO_8859_6_SIZE);
1556
- }
1557
-
1558
- int8_t
1559
- lxb_encoding_encode_iso_8859_7_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1560
- const lxb_char_t *end, lxb_codepoint_t cp)
1561
- {
1562
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_7,
1563
- LXB_ENCODING_SINGLE_HASH_ISO_8859_7_SIZE);
1564
- }
1565
-
1566
- int8_t
1567
- lxb_encoding_encode_iso_8859_8_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1568
- const lxb_char_t *end, lxb_codepoint_t cp)
1569
- {
1570
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_8,
1571
- LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
1572
- }
1573
-
1574
- int8_t
1575
- lxb_encoding_encode_iso_8859_8_i_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1576
- const lxb_char_t *end, lxb_codepoint_t cp)
1577
- {
1578
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_8,
1579
- LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
1580
- }
1581
-
1582
- int8_t
1583
- lxb_encoding_encode_koi8_r_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1584
- const lxb_char_t *end, lxb_codepoint_t cp)
1585
- {
1586
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_koi8_r,
1587
- LXB_ENCODING_SINGLE_HASH_KOI8_R_SIZE);
1588
- }
1589
-
1590
- int8_t
1591
- lxb_encoding_encode_koi8_u_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1592
- const lxb_char_t *end, lxb_codepoint_t cp)
1593
- {
1594
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_koi8_u,
1595
- LXB_ENCODING_SINGLE_HASH_KOI8_U_SIZE);
1596
- }
1597
-
1598
- int8_t
1599
- lxb_encoding_encode_shift_jis_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1600
- const lxb_char_t *end, lxb_codepoint_t cp)
1601
- {
1602
- uint32_t lead, trail;
1603
- const lexbor_shs_hash_t *hash;
1604
-
1605
- if (cp <= 0x80) {
1606
- *(*data)++ = (lxb_char_t) cp;
1607
-
1608
- return 1;
1609
- }
1610
-
1611
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
1612
- *(*data)++ = cp - 0xFF61 + 0xA1;
1613
-
1614
- return 1;
1615
- }
1616
-
1617
- switch (cp) {
1618
- case 0x00A5:
1619
- *(*data)++ = 0x5C;
1620
- return 1;
1621
-
1622
- case 0x203E:
1623
- *(*data)++ = 0x7E;
1624
- return 1;
1625
-
1626
- case 0x2212:
1627
- cp = 0xFF0D;
1628
- break;
1629
- }
1630
-
1631
- hash = lxb_encoding_encode_shift_jis_index(cp);
1632
- if (hash == NULL) {
1633
- return LXB_ENCODING_ENCODE_ERROR;
1634
- }
1635
-
1636
- if ((*data + 2) > end) {
1637
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1638
- }
1639
-
1640
- lead = (uint32_t) (uintptr_t) hash->value / 188;
1641
- trail = (uint32_t) (uintptr_t) hash->value % 188;
1642
-
1643
- *(*data)++ = lead + ((lead < 0x1F) ? 0x81 : 0xC1);
1644
- *(*data)++ = trail + ((trail < 0x3F) ? 0x40 : 0x41);
1645
-
1646
- return 2;
1647
- }
1648
-
1649
- lxb_inline void
1650
- lxb_encoding_encode_utf_16_write_single(bool is_be, lxb_char_t **data,
1651
- lxb_codepoint_t cp)
1652
- {
1653
- if (is_be) {
1654
- *(*data)++ = cp >> 8;
1655
- *(*data)++ = cp & 0x00FF;
1656
-
1657
- return;
1658
- }
1659
-
1660
- *(*data)++ = cp & 0x00FF;
1661
- *(*data)++ = cp >> 8;
1662
- }
1663
-
1664
- lxb_inline int8_t
1665
- lxb_encoding_encode_utf_16_single(lxb_encoding_encode_t *ctx, bool is_be,
1666
- lxb_char_t **data, const lxb_char_t *end, lxb_codepoint_t cp)
1667
- {
1668
- if ((*data + 2) > end) {
1669
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1670
- }
1671
-
1672
- if (cp < 0x10000) {
1673
- lxb_encoding_encode_utf_16_write_single(is_be, data, cp);
1674
-
1675
- return 2;
1676
- }
1677
-
1678
- if ((*data + 4) > end) {
1679
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1680
- }
1681
-
1682
- cp -= 0x10000;
1683
-
1684
- lxb_encoding_encode_utf_16_write_single(is_be, data, (0xD800 | (cp >> 0x0A)));
1685
- lxb_encoding_encode_utf_16_write_single(is_be, data, (0xDC00 | (cp & 0x03FF)));
1686
-
1687
- return 4;
1688
- }
1689
-
1690
- int8_t
1691
- lxb_encoding_encode_utf_16be_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1692
- const lxb_char_t *end, lxb_codepoint_t cp)
1693
- {
1694
- return lxb_encoding_encode_utf_16_single(ctx, true, data, end, cp);
1695
- }
1696
-
1697
- int8_t
1698
- lxb_encoding_encode_utf_16le_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1699
- const lxb_char_t *end, lxb_codepoint_t cp)
1700
- {
1701
- return lxb_encoding_encode_utf_16_single(ctx, false, data, end, cp);
1702
- }
1703
-
1704
- int8_t
1705
- lxb_encoding_encode_utf_8_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1706
- const lxb_char_t *end, lxb_codepoint_t cp)
1707
- {
1708
- if (cp < 0x80) {
1709
- /* 0xxxxxxx */
1710
- *(*data)++ = (lxb_char_t) cp;
1711
-
1712
- return 1;
1713
- }
1714
-
1715
- if (cp < 0x800) {
1716
- if ((*data + 2) > end) {
1717
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1718
- }
1719
-
1720
- /* 110xxxxx 10xxxxxx */
1721
- *(*data)++ = (lxb_char_t) (0xC0 | (cp >> 6 ));
1722
- *(*data)++ = (lxb_char_t) (0x80 | (cp & 0x3F));
1723
-
1724
- return 2;
1725
- }
1726
-
1727
- if (cp < 0x10000) {
1728
- if ((*data + 3) > end) {
1729
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1730
- }
1731
-
1732
- /* 1110xxxx 10xxxxxx 10xxxxxx */
1733
- *(*data)++ = (lxb_char_t) (0xE0 | ((cp >> 12)));
1734
- *(*data)++ = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
1735
- *(*data)++ = (lxb_char_t) (0x80 | ( cp & 0x3F));
1736
-
1737
- return 3;
1738
- }
1739
-
1740
- if (cp < 0x110000) {
1741
- if ((*data + 4) > end) {
1742
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1743
- }
1744
-
1745
- /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1746
- *(*data)++ = (lxb_char_t) (0xF0 | ( cp >> 18));
1747
- *(*data)++ = (lxb_char_t) (0x80 | ((cp >> 12) & 0x3F));
1748
- *(*data)++ = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
1749
- *(*data)++ = (lxb_char_t) (0x80 | ( cp & 0x3F));
1750
-
1751
- return 4;
1752
- }
1753
-
1754
- return LXB_ENCODING_ENCODE_ERROR;
1755
- }
1756
-
1757
- int8_t
1758
- lxb_encoding_encode_gb18030_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1759
- const lxb_char_t *end, lxb_codepoint_t cp)
1760
- {
1761
- uint32_t index;
1762
- const lexbor_shs_hash_t *hash;
1763
-
1764
- if (cp < 0x80) {
1765
- *(*data)++ = (lxb_char_t) cp;
1766
-
1767
- return 1;
1768
- }
1769
-
1770
- if (cp == 0xE5E5) {
1771
- return LXB_ENCODING_ENCODE_ERROR;
1772
- }
1773
-
1774
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
1775
- LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
1776
- if (hash != NULL) {
1777
- if ((*data + 2) > end) {
1778
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1779
- }
1780
-
1781
- *(*data)++ = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
1782
-
1783
- if (((uint32_t) (uintptr_t) hash->value % 190) < 0x3F) {
1784
- *(*data)++ = ((uint32_t) (uintptr_t) hash->value % 190) + 0x40;
1785
- }
1786
- else {
1787
- *(*data)++ = ((uint32_t) (uintptr_t) hash->value % 190) + 0x41;
1788
- }
1789
-
1790
- return 2;
1791
- }
1792
-
1793
- if ((*data + 4) > end) {
1794
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1795
- }
1796
-
1797
- index = lxb_encoding_encode_gb18030_range(cp);
1798
-
1799
- *(*data)++ = (index / (10 * 126 * 10)) + 0x81;
1800
- *(*data)++ = ((index % (10 * 126 * 10)) / (10 * 126)) + 0x30;
1801
-
1802
- index = (index % (10 * 126 * 10)) % (10 * 126);
1803
-
1804
- *(*data)++ = (index / 10) + 0x81;
1805
- *(*data)++ = (index % 10) + 0x30;
1806
-
1807
- return 4;
1808
- }
1809
-
1810
- int8_t
1811
- lxb_encoding_encode_macintosh_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1812
- const lxb_char_t *end, lxb_codepoint_t cp)
1813
- {
1814
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_macintosh,
1815
- LXB_ENCODING_SINGLE_HASH_MACINTOSH_SIZE);
1816
- }
1817
-
1818
- int8_t
1819
- lxb_encoding_encode_replacement_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1820
- const lxb_char_t *end, lxb_codepoint_t cp)
1821
- {
1822
- (*data)++;
1823
- return LXB_ENCODING_ENCODE_ERROR;
1824
- }
1825
-
1826
- int8_t
1827
- lxb_encoding_encode_windows_1250_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1828
- const lxb_char_t *end, lxb_codepoint_t cp)
1829
- {
1830
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1250,
1831
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1250_SIZE);
1832
- }
1833
-
1834
- int8_t
1835
- lxb_encoding_encode_windows_1251_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1836
- const lxb_char_t *end, lxb_codepoint_t cp)
1837
- {
1838
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1251,
1839
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1251_SIZE);
1840
- }
1841
-
1842
- int8_t
1843
- lxb_encoding_encode_windows_1252_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1844
- const lxb_char_t *end, lxb_codepoint_t cp)
1845
- {
1846
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1252,
1847
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1252_SIZE);
1848
- }
1849
-
1850
- int8_t
1851
- lxb_encoding_encode_windows_1253_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1852
- const lxb_char_t *end, lxb_codepoint_t cp)
1853
- {
1854
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1253,
1855
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1253_SIZE);
1856
- }
1857
-
1858
- int8_t
1859
- lxb_encoding_encode_windows_1254_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1860
- const lxb_char_t *end, lxb_codepoint_t cp)
1861
- {
1862
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1254,
1863
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1254_SIZE);
1864
- }
1865
-
1866
- int8_t
1867
- lxb_encoding_encode_windows_1255_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1868
- const lxb_char_t *end, lxb_codepoint_t cp)
1869
- {
1870
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1255,
1871
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1255_SIZE);
1872
- }
1873
-
1874
- int8_t
1875
- lxb_encoding_encode_windows_1256_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1876
- const lxb_char_t *end, lxb_codepoint_t cp)
1877
- {
1878
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1256,
1879
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1256_SIZE);
1880
- }
1881
-
1882
- int8_t
1883
- lxb_encoding_encode_windows_1257_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1884
- const lxb_char_t *end, lxb_codepoint_t cp)
1885
- {
1886
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1257,
1887
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1257_SIZE);
1888
- }
1889
-
1890
- int8_t
1891
- lxb_encoding_encode_windows_1258_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1892
- const lxb_char_t *end, lxb_codepoint_t cp)
1893
- {
1894
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1258,
1895
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1258_SIZE);
1896
- }
1897
-
1898
- int8_t
1899
- lxb_encoding_encode_windows_874_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1900
- const lxb_char_t *end, lxb_codepoint_t cp)
1901
- {
1902
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_874,
1903
- LXB_ENCODING_SINGLE_HASH_WINDOWS_874_SIZE);
1904
- }
1905
-
1906
- int8_t
1907
- lxb_encoding_encode_x_mac_cyrillic_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1908
- const lxb_char_t *end, lxb_codepoint_t cp)
1909
- {
1910
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_x_mac_cyrillic,
1911
- LXB_ENCODING_SINGLE_HASH_X_MAC_CYRILLIC_SIZE);
1912
- }
1913
-
1914
- int8_t
1915
- lxb_encoding_encode_x_user_defined_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1916
- const lxb_char_t *end, lxb_codepoint_t cp)
1917
- {
1918
- if (cp < 0x80) {
1919
- *(*data)++ = (lxb_char_t) cp;
1920
-
1921
- return 1;
1922
- }
1923
-
1924
- if (cp >= 0xF780 && cp <= 0xF7FF) {
1925
- *(*data)++ = (lxb_char_t) (cp - 0xF780 + 0x80);
1926
-
1927
- return 1;
1928
- }
1929
-
1930
- return LXB_ENCODING_ENCODE_ERROR;
1931
- }