nokolexbor 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nokolexbor/nl_attribute.c +46 -0
  3. data/ext/nokolexbor/nl_cdata.c +8 -0
  4. data/ext/nokolexbor/nl_comment.c +6 -0
  5. data/ext/nokolexbor/nl_document.c +53 -7
  6. data/ext/nokolexbor/nl_document_fragment.c +9 -0
  7. data/ext/nokolexbor/nl_error.c +21 -19
  8. data/ext/nokolexbor/nl_node.c +255 -49
  9. data/ext/nokolexbor/nl_node_set.c +56 -1
  10. data/ext/nokolexbor/nl_processing_instruction.c +6 -0
  11. data/ext/nokolexbor/nl_text.c +6 -0
  12. data/ext/nokolexbor/nokolexbor.h +1 -0
  13. data/lib/nokolexbor/document.rb +52 -5
  14. data/lib/nokolexbor/document_fragment.rb +11 -0
  15. data/lib/nokolexbor/node.rb +367 -18
  16. data/lib/nokolexbor/node_set.rb +56 -0
  17. data/lib/nokolexbor/version.rb +1 -1
  18. metadata +2 -24
  19. data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
  20. data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
  21. data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
  22. data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
  23. data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
  24. data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
  25. data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
  26. data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
  27. data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
  28. data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
  29. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
  30. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
  31. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
  32. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
  33. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
  34. data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
  35. data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
  36. data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
  37. data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
  38. data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
  39. data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
  40. data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
@@ -1,1931 +0,0 @@
1
- /*
2
- * Copyright (C) 2019 Alexander Borisov
3
- *
4
- * Author: Alexander Borisov <borisov@lexbor.com>
5
- */
6
-
7
- #include "lexbor/encoding/encode.h"
8
- #include "lexbor/encoding/single.h"
9
- #include "lexbor/encoding/multi.h"
10
- #include "lexbor/encoding/range.h"
11
-
12
-
13
- #define LXB_ENCODING_ENCODE_APPEND(ctx, cp) \
14
- do { \
15
- if ((ctx)->buffer_used == (ctx)->buffer_length) { \
16
- return LXB_STATUS_SMALL_BUFFER; \
17
- } \
18
- \
19
- (ctx)->buffer_out[(ctx)->buffer_used++] = (lxb_char_t) cp; \
20
- } \
21
- while (0)
22
-
23
- #define LXB_ENCODING_ENCODE_APPEND_P(ctx, cp) \
24
- do { \
25
- if ((ctx)->buffer_used == (ctx)->buffer_length) { \
26
- *cps = p; \
27
- return LXB_STATUS_SMALL_BUFFER; \
28
- } \
29
- \
30
- (ctx)->buffer_out[(ctx)->buffer_used++] = (lxb_char_t) cp; \
31
- } \
32
- while (0)
33
-
34
- #define LXB_ENCODING_ENCODE_ERROR(ctx) \
35
- do { \
36
- if (ctx->replace_to == NULL) { \
37
- return LXB_STATUS_ERROR; \
38
- } \
39
- \
40
- if ((ctx->buffer_used + ctx->replace_len) > ctx->buffer_length) { \
41
- return LXB_STATUS_SMALL_BUFFER; \
42
- } \
43
- \
44
- memcpy(&ctx->buffer_out[ctx->buffer_used], ctx->replace_to, \
45
- ctx->replace_len); \
46
- \
47
- ctx->buffer_used += ctx->replace_len; \
48
- } \
49
- while (0)
50
-
51
- #define LXB_ENCODING_ENCODE_ERROR_P(ctx) \
52
- do { \
53
- if (ctx->replace_to == NULL) { \
54
- *cps = p; \
55
- return LXB_STATUS_ERROR; \
56
- } \
57
- \
58
- if ((ctx->buffer_used + ctx->replace_len) > ctx->buffer_length) { \
59
- *cps = p; \
60
- return LXB_STATUS_SMALL_BUFFER; \
61
- } \
62
- \
63
- memcpy(&ctx->buffer_out[ctx->buffer_used], ctx->replace_to, \
64
- ctx->replace_len); \
65
- \
66
- ctx->buffer_used += ctx->replace_len; \
67
- } \
68
- while (0)
69
-
70
- #define LXB_ENCODING_ENCODE_SINGLE_BYTE(table, table_size) \
71
- do { \
72
- lxb_codepoint_t cp; \
73
- const lxb_codepoint_t *p = *cps; \
74
- const lexbor_shs_hash_t *hash; \
75
- \
76
- for (; p < end; p++) { \
77
- cp = *p; \
78
- \
79
- if (cp < 0x80) { \
80
- LXB_ENCODING_ENCODE_APPEND_P(ctx, cp); \
81
- continue; \
82
- } \
83
- \
84
- hash = lexbor_shs_hash_get_static(table, table_size, cp); \
85
- if (hash == NULL) { \
86
- LXB_ENCODING_ENCODE_ERROR_P(ctx); \
87
- continue; \
88
- } \
89
- \
90
- LXB_ENCODING_ENCODE_APPEND_P(ctx, (uintptr_t) hash->value); \
91
- } \
92
- \
93
- return LXB_STATUS_OK; \
94
- } \
95
- while (0)
96
-
97
- #define LXB_ENCODING_ENCODE_BYTE_SINGLE(table, table_size) \
98
- const lexbor_shs_hash_t *hash; \
99
- \
100
- if (cp < 0x80) { \
101
- *(*data)++ = (lxb_char_t) cp; \
102
- return 1; \
103
- } \
104
- \
105
- hash = lexbor_shs_hash_get_static(table, table_size, cp); \
106
- if (hash == NULL) { \
107
- return LXB_ENCODING_ENCODE_ERROR; \
108
- } \
109
- \
110
- *(*data)++ = (lxb_char_t) (uintptr_t) hash->value; \
111
- return 1
112
-
113
-
114
- lxb_status_t
115
- lxb_encoding_encode_default(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
116
- const lxb_codepoint_t *end)
117
- {
118
- return lxb_encoding_encode_utf_8(ctx, cps, end);
119
- }
120
-
121
- lxb_status_t
122
- lxb_encoding_encode_auto(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
123
- const lxb_codepoint_t *end)
124
- {
125
- *cps = end;
126
- return LXB_STATUS_ERROR;
127
- }
128
-
129
- lxb_status_t
130
- lxb_encoding_encode_undefined(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
131
- const lxb_codepoint_t *end)
132
- {
133
- *cps = end;
134
- return LXB_STATUS_ERROR;
135
- }
136
-
137
- lxb_status_t
138
- lxb_encoding_encode_big5(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
139
- const lxb_codepoint_t *end)
140
- {
141
- lxb_codepoint_t cp;
142
- const lexbor_shs_hash_t *hash;
143
-
144
- for (; *cps < end; (*cps)++) {
145
- cp = **cps;
146
-
147
- if (cp < 0x80) {
148
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
149
- continue;
150
- }
151
-
152
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_big5,
153
- LXB_ENCODING_MULTI_HASH_BIG5_SIZE, cp);
154
- if (hash == NULL) {
155
- LXB_ENCODING_ENCODE_ERROR(ctx);
156
- continue;
157
- }
158
-
159
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
160
- return LXB_STATUS_SMALL_BUFFER;
161
- }
162
-
163
- ctx->buffer_out[ ctx->buffer_used++ ] = ((uint32_t) (uintptr_t) hash->value) / 157 + 0x81;
164
-
165
- if ((((uint32_t) (uintptr_t) hash->value) % 157) < 0x3F) {
166
- ctx->buffer_out[ ctx->buffer_used++ ] = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x40;
167
- }
168
- else {
169
- ctx->buffer_out[ ctx->buffer_used++ ] = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x62;
170
- }
171
- }
172
-
173
- return LXB_STATUS_OK;
174
- }
175
-
176
- lxb_status_t
177
- lxb_encoding_encode_euc_jp(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
178
- const lxb_codepoint_t *end)
179
- {
180
- lxb_codepoint_t cp;
181
- const lexbor_shs_hash_t *hash;
182
-
183
- for (; *cps < end; (*cps)++) {
184
- cp = **cps;
185
-
186
- if (cp < 0x80) {
187
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
188
- continue;
189
- }
190
-
191
- if (cp == 0x00A5) {
192
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x5C);
193
- continue;
194
- }
195
-
196
- if (cp == 0x203E) {
197
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x7E);
198
- continue;
199
- }
200
-
201
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
202
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
203
- return LXB_STATUS_SMALL_BUFFER;
204
- }
205
-
206
- ctx->buffer_out[ ctx->buffer_used++ ] = 0x8E;
207
- ctx->buffer_out[ ctx->buffer_used++ ] = cp - 0xFF61 + 0xA1;
208
-
209
- continue;
210
- }
211
-
212
- if (cp == 0x2212) {
213
- cp = 0xFF0D;
214
- }
215
-
216
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
217
- LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
218
- if (hash == NULL) {
219
- LXB_ENCODING_ENCODE_ERROR(ctx);
220
- continue;
221
- }
222
-
223
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
224
- return LXB_STATUS_SMALL_BUFFER;
225
- }
226
-
227
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 94 + 0xA1;
228
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value % 94 + 0xA1;
229
- }
230
-
231
- return LXB_STATUS_OK;
232
- }
233
-
234
- lxb_status_t
235
- lxb_encoding_encode_euc_kr(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
236
- const lxb_codepoint_t *end)
237
- {
238
- lxb_codepoint_t cp;
239
- const lexbor_shs_hash_t *hash;
240
-
241
- for (; *cps < end; (*cps)++) {
242
- cp = **cps;
243
-
244
- if (cp < 0x80) {
245
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
246
- continue;
247
- }
248
-
249
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_euc_kr,
250
- LXB_ENCODING_MULTI_HASH_EUC_KR_SIZE, cp);
251
- if (hash == NULL) {
252
- LXB_ENCODING_ENCODE_ERROR(ctx);
253
- continue;
254
- }
255
-
256
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
257
- return LXB_STATUS_SMALL_BUFFER;
258
- }
259
-
260
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
261
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value % 190 + 0x41;
262
- }
263
-
264
- return LXB_STATUS_OK;
265
- }
266
-
267
- lxb_status_t
268
- lxb_encoding_encode_gbk(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
269
- const lxb_codepoint_t *end)
270
- {
271
- lxb_codepoint_t cp;
272
- const lexbor_shs_hash_t *hash;
273
-
274
- for (; *cps < end; (*cps)++) {
275
- cp = **cps;
276
-
277
- if (cp < 0x80) {
278
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
279
- continue;
280
- }
281
-
282
- if (cp == 0xE5E5) {
283
- LXB_ENCODING_ENCODE_ERROR(ctx);
284
- continue;
285
- }
286
-
287
- if (cp == 0x20AC) {
288
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x80);
289
- continue;
290
- }
291
-
292
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
293
- LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
294
- if (hash == NULL) {
295
- LXB_ENCODING_ENCODE_ERROR(ctx);
296
- continue;
297
- }
298
-
299
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
300
- return LXB_STATUS_SMALL_BUFFER;
301
- }
302
-
303
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (uintptr_t) hash->value / 190 + 0x81;
304
-
305
- if (((lxb_char_t) (uintptr_t) hash->value % 190) < 0x3F) {
306
- ctx->buffer_out[ ctx->buffer_used++ ] = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x40;
307
- }
308
- else {
309
- ctx->buffer_out[ ctx->buffer_used++ ] = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x41;
310
- }
311
- }
312
-
313
- return LXB_STATUS_OK;
314
- }
315
-
316
- lxb_status_t
317
- lxb_encoding_encode_ibm866(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
318
- const lxb_codepoint_t *end)
319
- {
320
-
321
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_ibm866,
322
- LXB_ENCODING_SINGLE_HASH_IBM866_SIZE);
323
- }
324
-
325
- lxb_status_t
326
- lxb_encoding_encode_iso_2022_jp(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
327
- const lxb_codepoint_t *end)
328
- {
329
- int8_t size;
330
- unsigned state;
331
- lxb_codepoint_t cp;
332
- const lexbor_shs_hash_t *hash;
333
-
334
- size = 0;
335
- state = ctx->state;
336
-
337
- for (; *cps < end; (*cps)++) {
338
- cp = **cps;
339
-
340
- begin:
341
-
342
- switch (ctx->state) {
343
- case LXB_ENCODING_ENCODE_2022_JP_ASCII:
344
- if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
345
- goto failed;
346
- }
347
-
348
- if (cp < 0x80) {
349
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
350
- continue;
351
- }
352
-
353
- if (cp == 0x00A5 || cp == 0x203E) {
354
- /*
355
- * Do not switch to the ROMAN stage with prepend code point
356
- * to stream, add it immediately.
357
- */
358
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
359
- goto small_buffer;
360
- }
361
-
362
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
363
-
364
- if (cp == 0x00A5) {
365
- memcpy(&ctx->buffer_out[ctx->buffer_used],
366
- "\x1B\x28\x4A\x5C", 4);
367
- ctx->buffer_used += 4;
368
-
369
- continue;
370
- }
371
-
372
- memcpy(&ctx->buffer_out[ctx->buffer_used],
373
- "\x1B\x28\x4A\x7E", 4);
374
- ctx->buffer_used += 4;
375
-
376
- continue;
377
- }
378
-
379
- break;
380
-
381
- case LXB_ENCODING_ENCODE_2022_JP_ROMAN:
382
- if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
383
- goto failed;
384
- }
385
-
386
- if (cp < 0x80) {
387
- switch (cp) {
388
- case 0x005C:
389
- case 0x007E:
390
- break;
391
-
392
- case 0x00A5:
393
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x5C);
394
- continue;
395
-
396
- case 0x203E:
397
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x7E);
398
- continue;
399
-
400
- default:
401
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
402
- continue;
403
- }
404
-
405
- /*
406
- * Do not switch to the ANSI stage with prepend code point
407
- * to stream, add it immediately.
408
- */
409
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
410
- goto small_buffer;
411
- }
412
-
413
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
414
-
415
- memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x28\x42", 3);
416
- ctx->buffer_used += 3;
417
-
418
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) cp;
419
- continue;
420
- }
421
-
422
- break;
423
-
424
- case LXB_ENCODING_ENCODE_2022_JP_JIS0208:
425
- if (cp < 0x80) {
426
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
427
- goto small_buffer;
428
- }
429
-
430
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
431
-
432
- memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x28\x42", 3);
433
- ctx->buffer_used += 3;
434
-
435
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) cp;
436
- continue;
437
- }
438
-
439
- if (cp == 0x00A5 || cp == 0x203E) {
440
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
441
- goto small_buffer;
442
- }
443
-
444
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
445
-
446
- if (cp == 0x00A5) {
447
- memcpy(&ctx->buffer_out[ctx->buffer_used],
448
- "\x1B\x28\x4A\x5C", 4);
449
- ctx->buffer_used += 4;
450
-
451
- continue;
452
- }
453
-
454
- memcpy(&ctx->buffer_out[ctx->buffer_used],
455
- "\x1B\x28\x4A\x7E", 4);
456
- ctx->buffer_used += 4;
457
-
458
- continue;
459
- }
460
-
461
- break;
462
- }
463
-
464
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
465
- goto small_buffer;
466
- }
467
-
468
- if (cp == 0x2212) {
469
- cp = 0xFF0D;
470
- }
471
-
472
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
473
- cp = lxb_encoding_multi_index_iso_2022_jp_katakana[cp - 0xFF61].codepoint;
474
- }
475
-
476
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
477
- LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
478
- if (hash == NULL) {
479
- goto failed;
480
- }
481
-
482
- if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_JIS0208) {
483
- if ((ctx->buffer_used + 3) > ctx->buffer_length) {
484
- goto small_buffer;
485
- }
486
-
487
- memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x24\x42", 3);
488
- ctx->buffer_used += 3;
489
-
490
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_JIS0208;
491
- size += 3;
492
-
493
- goto begin;
494
- }
495
-
496
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 94 + 0x21;
497
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value % 94 + 0x21;
498
-
499
- continue;
500
-
501
- small_buffer:
502
-
503
- ctx->state = state;
504
- ctx->buffer_used -= size;
505
-
506
- return LXB_STATUS_SMALL_BUFFER;
507
-
508
- failed:
509
-
510
- ctx->buffer_used -= size;
511
- LXB_ENCODING_ENCODE_ERROR(ctx);
512
- }
513
-
514
- return LXB_STATUS_OK;
515
- }
516
-
517
- lxb_status_t
518
- lxb_encoding_encode_iso_2022_jp_eof(lxb_encoding_encode_t *ctx)
519
- {
520
- if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_ASCII) {
521
- if ((ctx->buffer_used + 3) > ctx->buffer_length) {
522
- return LXB_STATUS_SMALL_BUFFER;
523
- }
524
-
525
- memcpy(&ctx->buffer_out[ctx->buffer_used], "\x1B\x28\x42", 3);
526
- ctx->buffer_used += 3;
527
- }
528
-
529
- return LXB_STATUS_OK;
530
- }
531
-
532
- lxb_status_t
533
- lxb_encoding_encode_iso_8859_10(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
534
- const lxb_codepoint_t *end)
535
- {
536
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_10,
537
- LXB_ENCODING_SINGLE_HASH_ISO_8859_10_SIZE);
538
- }
539
-
540
- lxb_status_t
541
- lxb_encoding_encode_iso_8859_13(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
542
- const lxb_codepoint_t *end)
543
- {
544
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_13,
545
- LXB_ENCODING_SINGLE_HASH_ISO_8859_13_SIZE);
546
- }
547
-
548
- lxb_status_t
549
- lxb_encoding_encode_iso_8859_14(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
550
- const lxb_codepoint_t *end)
551
- {
552
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_14,
553
- LXB_ENCODING_SINGLE_HASH_ISO_8859_14_SIZE);
554
- }
555
-
556
- lxb_status_t
557
- lxb_encoding_encode_iso_8859_15(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
558
- const lxb_codepoint_t *end)
559
- {
560
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_15,
561
- LXB_ENCODING_SINGLE_HASH_ISO_8859_15_SIZE);
562
- }
563
-
564
- lxb_status_t
565
- lxb_encoding_encode_iso_8859_16(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
566
- const lxb_codepoint_t *end)
567
- {
568
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_16,
569
- LXB_ENCODING_SINGLE_HASH_ISO_8859_16_SIZE);
570
- }
571
-
572
- lxb_status_t
573
- lxb_encoding_encode_iso_8859_2(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
574
- const lxb_codepoint_t *end)
575
- {
576
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_2,
577
- LXB_ENCODING_SINGLE_HASH_ISO_8859_2_SIZE);
578
- }
579
-
580
- lxb_status_t
581
- lxb_encoding_encode_iso_8859_3(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
582
- const lxb_codepoint_t *end)
583
- {
584
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_3,
585
- LXB_ENCODING_SINGLE_HASH_ISO_8859_3_SIZE);
586
- }
587
-
588
- lxb_status_t
589
- lxb_encoding_encode_iso_8859_4(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
590
- const lxb_codepoint_t *end)
591
- {
592
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_4,
593
- LXB_ENCODING_SINGLE_HASH_ISO_8859_4_SIZE);
594
- }
595
-
596
- lxb_status_t
597
- lxb_encoding_encode_iso_8859_5(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
598
- const lxb_codepoint_t *end)
599
- {
600
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_5,
601
- LXB_ENCODING_SINGLE_HASH_ISO_8859_5_SIZE);
602
- }
603
-
604
- lxb_status_t
605
- lxb_encoding_encode_iso_8859_6(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
606
- const lxb_codepoint_t *end)
607
- {
608
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_6,
609
- LXB_ENCODING_SINGLE_HASH_ISO_8859_6_SIZE);
610
- }
611
-
612
- lxb_status_t
613
- lxb_encoding_encode_iso_8859_7(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
614
- const lxb_codepoint_t *end)
615
- {
616
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_7,
617
- LXB_ENCODING_SINGLE_HASH_ISO_8859_7_SIZE);
618
- }
619
-
620
- lxb_status_t
621
- lxb_encoding_encode_iso_8859_8(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
622
- const lxb_codepoint_t *end)
623
- {
624
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_8,
625
- LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
626
- }
627
-
628
- lxb_status_t
629
- lxb_encoding_encode_iso_8859_8_i(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
630
- const lxb_codepoint_t *end)
631
- {
632
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_iso_8859_8,
633
- LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
634
- }
635
-
636
- lxb_status_t
637
- lxb_encoding_encode_koi8_r(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
638
- const lxb_codepoint_t *end)
639
- {
640
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_koi8_r,
641
- LXB_ENCODING_SINGLE_HASH_KOI8_R_SIZE);
642
- }
643
-
644
- lxb_status_t
645
- lxb_encoding_encode_koi8_u(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
646
- const lxb_codepoint_t *end)
647
- {
648
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_koi8_u,
649
- LXB_ENCODING_SINGLE_HASH_KOI8_U_SIZE);
650
- }
651
-
652
- lxb_inline const lexbor_shs_hash_t *
653
- lxb_encoding_encode_shift_jis_index(lxb_codepoint_t cp)
654
- {
655
- const lexbor_shs_hash_t *entry;
656
-
657
- entry = &lxb_encoding_multi_hash_jis0208[ (cp % LXB_ENCODING_MULTI_HASH_JIS0208_SIZE) + 1 ];
658
-
659
- do {
660
- if (entry->key == cp) {
661
- if ((unsigned) ((uint32_t) (uintptr_t) entry->value - 8272) > (8835 - 8272)) {
662
- return entry;
663
- }
664
- }
665
-
666
- entry = &lxb_encoding_multi_hash_jis0208[entry->next];
667
- }
668
- while (entry != lxb_encoding_multi_hash_jis0208);
669
-
670
- return NULL;
671
- }
672
-
673
- lxb_status_t
674
- lxb_encoding_encode_shift_jis(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
675
- const lxb_codepoint_t *end)
676
- {
677
- uint32_t lead, trail;
678
- lxb_codepoint_t cp;
679
- const lexbor_shs_hash_t *hash;
680
-
681
- for (; *cps < end; (*cps)++) {
682
- cp = **cps;
683
-
684
- if (cp <= 0x80) {
685
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
686
- continue;
687
- }
688
-
689
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
690
- LXB_ENCODING_ENCODE_APPEND(ctx, cp - 0xFF61 + 0xA1);
691
- continue;
692
- }
693
-
694
- switch (cp) {
695
- case 0x00A5:
696
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x5C);
697
- continue;
698
-
699
- case 0x203E:
700
- LXB_ENCODING_ENCODE_APPEND(ctx, 0x7E);
701
- continue;
702
-
703
- case 0x2212:
704
- cp = 0xFF0D;
705
- break;
706
- }
707
-
708
- hash = lxb_encoding_encode_shift_jis_index(cp);
709
- if (hash == NULL) {
710
- LXB_ENCODING_ENCODE_ERROR(ctx);
711
- continue;
712
- }
713
-
714
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
715
- return LXB_STATUS_SMALL_BUFFER;
716
- }
717
-
718
- lead = (uint32_t) (uintptr_t) hash->value / 188;
719
- trail = (uint32_t) (uintptr_t) hash->value % 188;
720
-
721
- ctx->buffer_out[ctx->buffer_used++ ] = lead + ((lead < 0x1F) ? 0x81 : 0xC1);
722
- ctx->buffer_out[ctx->buffer_used++ ] = trail + ((trail < 0x3F) ? 0x40 : 0x41);
723
- }
724
-
725
- return LXB_STATUS_OK;
726
- }
727
-
728
- lxb_inline void
729
- lxb_encoding_encode_utf_16_write(lxb_encoding_encode_t *ctx, bool is_be,
730
- lxb_codepoint_t cp)
731
- {
732
- if (is_be) {
733
- ctx->buffer_out[ctx->buffer_used++] = cp >> 8;
734
- ctx->buffer_out[ctx->buffer_used++] = cp & 0x00FF;
735
-
736
- return;
737
- }
738
-
739
- ctx->buffer_out[ctx->buffer_used++] = cp & 0x00FF;
740
- ctx->buffer_out[ctx->buffer_used++] = cp >> 8;
741
- }
742
-
743
- lxb_inline int8_t
744
- lxb_encoding_encode_utf_16(lxb_encoding_encode_t *ctx, bool is_be,
745
- const lxb_codepoint_t **cps, const lxb_codepoint_t *end)
746
- {
747
- lxb_codepoint_t cp;
748
-
749
- for (; *cps < end; (*cps)++) {
750
- cp = **cps;
751
-
752
- if (cp < 0x10000) {
753
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
754
- return LXB_STATUS_SMALL_BUFFER;
755
- }
756
-
757
- lxb_encoding_encode_utf_16_write(ctx, is_be, cp);
758
-
759
- continue;
760
- }
761
-
762
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
763
- return LXB_STATUS_SMALL_BUFFER;
764
- }
765
-
766
- cp -= 0x10000;
767
-
768
- lxb_encoding_encode_utf_16_write(ctx, is_be, (0xD800 | (cp >> 0x0A)));
769
- lxb_encoding_encode_utf_16_write(ctx, is_be, (0xDC00 | (cp & 0x03FF)));
770
- }
771
-
772
- return LXB_STATUS_OK;
773
- }
774
-
775
- lxb_status_t
776
- lxb_encoding_encode_utf_16be(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
777
- const lxb_codepoint_t *end)
778
- {
779
- return lxb_encoding_encode_utf_16(ctx, true, cps, end);
780
- }
781
-
782
- lxb_status_t
783
- lxb_encoding_encode_utf_16le(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
784
- const lxb_codepoint_t *end)
785
- {
786
- return lxb_encoding_encode_utf_16(ctx, false, cps, end);
787
- }
788
-
789
- lxb_status_t
790
- lxb_encoding_encode_utf_8(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
791
- const lxb_codepoint_t *end)
792
- {
793
- lxb_codepoint_t cp;
794
- const lxb_codepoint_t *p = *cps;
795
-
796
- for (; p < end; p++) {
797
- cp = *p;
798
-
799
- if (cp < 0x80) {
800
- if ((ctx->buffer_used + 1) > ctx->buffer_length) {
801
- *cps = p;
802
-
803
- return LXB_STATUS_SMALL_BUFFER;
804
- }
805
-
806
- /* 0xxxxxxx */
807
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) cp;
808
- }
809
- else if (cp < 0x800) {
810
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
811
- *cps = p;
812
-
813
- return LXB_STATUS_SMALL_BUFFER;
814
- }
815
-
816
- /* 110xxxxx 10xxxxxx */
817
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0xC0 | (cp >> 6 ));
818
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | (cp & 0x3F));
819
- }
820
- else if (cp < 0x10000) {
821
- if ((ctx->buffer_used + 3) > ctx->buffer_length) {
822
- *cps = p;
823
-
824
- return LXB_STATUS_SMALL_BUFFER;
825
- }
826
-
827
- /* 1110xxxx 10xxxxxx 10xxxxxx */
828
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0xE0 | ((cp >> 12)));
829
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
830
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ( cp & 0x3F));
831
- }
832
- else if (cp < 0x110000) {
833
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
834
- *cps = p;
835
-
836
- return LXB_STATUS_SMALL_BUFFER;
837
- }
838
-
839
- /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
840
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0xF0 | ( cp >> 18));
841
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ((cp >> 12) & 0x3F));
842
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
843
- ctx->buffer_out[ ctx->buffer_used++ ] = (lxb_char_t) (0x80 | ( cp & 0x3F));
844
- }
845
- else {
846
- *cps = p;
847
- LXB_ENCODING_ENCODE_ERROR(ctx);
848
- }
849
- }
850
-
851
- *cps = p;
852
-
853
- return LXB_STATUS_OK;
854
- }
855
-
856
- lxb_inline uint32_t
857
- lxb_encoding_encode_gb18030_range(lxb_codepoint_t cp)
858
- {
859
- size_t mid, left, right;
860
- const lxb_encoding_range_index_t *range;
861
-
862
- if (cp == 0xE7C7) {
863
- return 7457;
864
- }
865
-
866
- left = 0;
867
- right = LXB_ENCODING_RANGE_INDEX_GB18030_SIZE;
868
- range = lxb_encoding_range_index_gb18030;
869
-
870
- /* Some compilers say about uninitialized mid */
871
- mid = 0;
872
-
873
- while (left < right) {
874
- mid = left + (right - left) / 2;
875
-
876
- if (range[mid].codepoint < cp) {
877
- left = mid + 1;
878
-
879
- if (left < right && range[left].codepoint > cp) {
880
- break;
881
- }
882
- }
883
- else if (range[mid].codepoint > cp) {
884
- right = mid - 1;
885
-
886
- if (right > 0 && range[right].codepoint <= cp) {
887
- mid = right;
888
- break;
889
- }
890
- }
891
- else {
892
- break;
893
- }
894
- }
895
-
896
- return range[mid].index + cp - range[mid].codepoint;
897
- }
898
-
899
- lxb_status_t
900
- lxb_encoding_encode_gb18030(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
901
- const lxb_codepoint_t *end)
902
- {
903
- uint32_t index;
904
- lxb_codepoint_t cp;
905
- const lexbor_shs_hash_t *hash;
906
-
907
- for (; *cps < end; (*cps)++) {
908
- cp = **cps;
909
-
910
- if (cp < 0x80) {
911
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
912
- continue;
913
- }
914
-
915
- if (cp == 0xE5E5) {
916
- LXB_ENCODING_ENCODE_ERROR(ctx);
917
- continue;
918
- }
919
-
920
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
921
- LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
922
- if (hash != NULL) {
923
- if ((ctx->buffer_used + 2) > ctx->buffer_length) {
924
- return LXB_STATUS_SMALL_BUFFER;
925
- }
926
-
927
- ctx->buffer_out[ ctx->buffer_used++ ] = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
928
-
929
- if (((uint32_t) (uintptr_t) hash->value % 190) < 0x3F) {
930
- ctx->buffer_out[ ctx->buffer_used++ ] = ((uint32_t) (uintptr_t) hash->value % 190) + 0x40;
931
- }
932
- else {
933
- ctx->buffer_out[ ctx->buffer_used++ ] = ((uint32_t) (uintptr_t) hash->value % 190) + 0x41;
934
- }
935
-
936
- continue;
937
- }
938
-
939
- if ((ctx->buffer_used + 4) > ctx->buffer_length) {
940
- return LXB_STATUS_SMALL_BUFFER;
941
- }
942
-
943
- index = lxb_encoding_encode_gb18030_range(cp);
944
-
945
- ctx->buffer_out[ ctx->buffer_used++ ] = (index / (10 * 126 * 10)) + 0x81;
946
- ctx->buffer_out[ ctx->buffer_used++ ] = ((index % (10 * 126 * 10)) / (10 * 126)) + 0x30;
947
-
948
- index = (index % (10 * 126 * 10)) % (10 * 126);
949
-
950
- ctx->buffer_out[ ctx->buffer_used++ ] = (index / 10) + 0x81;
951
- ctx->buffer_out[ ctx->buffer_used++ ] = (index % 10) + 0x30;
952
- }
953
-
954
- return LXB_STATUS_OK;
955
- }
956
-
957
- lxb_status_t
958
- lxb_encoding_encode_macintosh(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
959
- const lxb_codepoint_t *end)
960
- {
961
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_macintosh,
962
- LXB_ENCODING_SINGLE_HASH_MACINTOSH_SIZE);
963
- }
964
-
965
- lxb_status_t
966
- lxb_encoding_encode_replacement(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
967
- const lxb_codepoint_t *end)
968
- {
969
- *cps = end;
970
- return LXB_STATUS_ERROR;
971
- }
972
-
973
- lxb_status_t
974
- lxb_encoding_encode_windows_1250(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
975
- const lxb_codepoint_t *end)
976
- {
977
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1250,
978
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1250_SIZE);
979
- }
980
-
981
- lxb_status_t
982
- lxb_encoding_encode_windows_1251(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
983
- const lxb_codepoint_t *end)
984
- {
985
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1251,
986
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1251_SIZE);
987
- }
988
-
989
- lxb_status_t
990
- lxb_encoding_encode_windows_1252(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
991
- const lxb_codepoint_t *end)
992
- {
993
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1252,
994
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1252_SIZE);
995
- }
996
-
997
- lxb_status_t
998
- lxb_encoding_encode_windows_1253(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
999
- const lxb_codepoint_t *end)
1000
- {
1001
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1253,
1002
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1253_SIZE);
1003
- }
1004
-
1005
- lxb_status_t
1006
- lxb_encoding_encode_windows_1254(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1007
- const lxb_codepoint_t *end)
1008
- {
1009
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1254,
1010
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1254_SIZE);
1011
- }
1012
-
1013
- lxb_status_t
1014
- lxb_encoding_encode_windows_1255(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1015
- const lxb_codepoint_t *end)
1016
- {
1017
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1255,
1018
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1255_SIZE);
1019
- }
1020
-
1021
- lxb_status_t
1022
- lxb_encoding_encode_windows_1256(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1023
- const lxb_codepoint_t *end)
1024
- {
1025
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1256,
1026
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1256_SIZE);
1027
- }
1028
-
1029
- lxb_status_t
1030
- lxb_encoding_encode_windows_1257(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1031
- const lxb_codepoint_t *end)
1032
- {
1033
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1257,
1034
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1257_SIZE);
1035
- }
1036
-
1037
- lxb_status_t
1038
- lxb_encoding_encode_windows_1258(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1039
- const lxb_codepoint_t *end)
1040
- {
1041
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_1258,
1042
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1258_SIZE);
1043
- }
1044
-
1045
- lxb_status_t
1046
- lxb_encoding_encode_windows_874(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1047
- const lxb_codepoint_t *end)
1048
- {
1049
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_windows_874,
1050
- LXB_ENCODING_SINGLE_HASH_WINDOWS_874_SIZE);
1051
- }
1052
-
1053
- lxb_status_t
1054
- lxb_encoding_encode_x_mac_cyrillic(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1055
- const lxb_codepoint_t *end)
1056
- {
1057
- LXB_ENCODING_ENCODE_SINGLE_BYTE(lxb_encoding_single_hash_x_mac_cyrillic,
1058
- LXB_ENCODING_SINGLE_HASH_X_MAC_CYRILLIC_SIZE);
1059
- }
1060
-
1061
- lxb_status_t
1062
- lxb_encoding_encode_x_user_defined(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cps,
1063
- const lxb_codepoint_t *end)
1064
- {
1065
- lxb_codepoint_t cp;
1066
-
1067
- for (; *cps < end; (*cps)++) {
1068
- cp = **cps;
1069
-
1070
- if (cp < 0x80) {
1071
- LXB_ENCODING_ENCODE_APPEND(ctx, cp);
1072
- }
1073
- else if (cp >= 0xF780 && cp <= 0xF7FF) {
1074
- LXB_ENCODING_ENCODE_APPEND(ctx, (cp - 0xF780 + 0x80));
1075
- }
1076
- else {
1077
- LXB_ENCODING_ENCODE_ERROR(ctx);
1078
- }
1079
- }
1080
-
1081
- return LXB_STATUS_OK;
1082
- }
1083
-
1084
- /*
1085
- * Single
1086
- */
1087
- int8_t
1088
- lxb_encoding_encode_default_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1089
- const lxb_char_t *end, lxb_codepoint_t cp)
1090
- {
1091
- return lxb_encoding_encode_utf_8_single(ctx, data, end, cp);
1092
- }
1093
-
1094
- int8_t
1095
- lxb_encoding_encode_auto_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1096
- const lxb_char_t *end, lxb_codepoint_t cp)
1097
- {
1098
- return LXB_ENCODING_ENCODE_ERROR;
1099
- }
1100
-
1101
- int8_t
1102
- lxb_encoding_encode_undefined_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1103
- const lxb_char_t *end, lxb_codepoint_t cp)
1104
- {
1105
- return LXB_ENCODING_ENCODE_ERROR;
1106
- }
1107
-
1108
- int8_t
1109
- lxb_encoding_encode_big5_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1110
- const lxb_char_t *end, lxb_codepoint_t cp)
1111
- {
1112
- const lexbor_shs_hash_t *hash;
1113
-
1114
- if (cp < 0x80) {
1115
- *(*data)++ = (lxb_char_t) cp;
1116
-
1117
- return 1;
1118
- }
1119
-
1120
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_big5,
1121
- LXB_ENCODING_MULTI_HASH_BIG5_SIZE, cp);
1122
- if (hash == NULL) {
1123
- return LXB_ENCODING_ENCODE_ERROR;
1124
- }
1125
-
1126
- if ((*data + 2) > end) {
1127
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1128
- }
1129
-
1130
- *(*data)++ = ((uint32_t) (uintptr_t) hash->value) / 157 + 0x81;
1131
-
1132
- if ((((uint32_t) (uintptr_t) hash->value) % 157) < 0x3F) {
1133
- *(*data)++ = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x40;
1134
- }
1135
- else {
1136
- *(*data)++ = (((uint32_t) (uintptr_t) hash->value) % 157) + 0x62;
1137
- }
1138
-
1139
- return 2;
1140
- }
1141
-
1142
- int8_t
1143
- lxb_encoding_encode_euc_jp_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1144
- const lxb_char_t *end, lxb_codepoint_t cp)
1145
- {
1146
- const lexbor_shs_hash_t *hash;
1147
-
1148
- if (cp < 0x80) {
1149
- *(*data)++ = (lxb_char_t) cp;
1150
-
1151
- return 1;
1152
- }
1153
-
1154
- if (cp == 0x00A5) {
1155
- *(*data)++ = 0x5C;
1156
-
1157
- return 1;
1158
- }
1159
-
1160
- if (cp == 0x203E) {
1161
- *(*data)++ = 0x7E;
1162
-
1163
- return 1;
1164
- }
1165
-
1166
- if ((*data + 2) > end) {
1167
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1168
- }
1169
-
1170
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
1171
- *(*data)++ = 0x8E;
1172
- *(*data)++ = cp - 0xFF61 + 0xA1;
1173
-
1174
- return 2;
1175
- }
1176
-
1177
- if (cp == 0x2212) {
1178
- cp = 0xFF0D;
1179
- }
1180
-
1181
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
1182
- LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
1183
- if (hash == NULL) {
1184
- return LXB_ENCODING_ENCODE_ERROR;
1185
- }
1186
-
1187
- *(*data)++ = (uint32_t) (uintptr_t) hash->value / 94 + 0xA1;
1188
- *(*data)++ = (uint32_t) (uintptr_t) hash->value % 94 + 0xA1;
1189
-
1190
- return 2;
1191
- }
1192
-
1193
- int8_t
1194
- lxb_encoding_encode_euc_kr_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1195
- const lxb_char_t *end, lxb_codepoint_t cp)
1196
- {
1197
- const lexbor_shs_hash_t *hash;
1198
-
1199
- if (cp < 0x80) {
1200
- *(*data)++ = (lxb_char_t) cp;
1201
-
1202
- return 1;
1203
- }
1204
-
1205
- if ((*data + 2) > end) {
1206
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1207
- }
1208
-
1209
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_euc_kr,
1210
- LXB_ENCODING_MULTI_HASH_EUC_KR_SIZE, cp);
1211
- if (hash == NULL) {
1212
- return LXB_ENCODING_ENCODE_ERROR;
1213
- }
1214
-
1215
- *(*data)++ = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
1216
- *(*data)++ = (uint32_t) (uintptr_t) hash->value % 190 + 0x41;
1217
-
1218
- return 2;
1219
- }
1220
-
1221
- int8_t
1222
- lxb_encoding_encode_gbk_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1223
- const lxb_char_t *end, lxb_codepoint_t cp)
1224
- {
1225
- const lexbor_shs_hash_t *hash;
1226
-
1227
- if (cp < 0x80) {
1228
- *(*data)++ = (lxb_char_t) cp;
1229
-
1230
- return 1;
1231
- }
1232
-
1233
- if (cp == 0xE5E5) {
1234
- return LXB_ENCODING_ENCODE_ERROR;
1235
- }
1236
-
1237
- if (cp == 0x20AC) {
1238
- *(*data)++ = 0x80;
1239
-
1240
- return 1;
1241
- }
1242
-
1243
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
1244
- LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
1245
- if (hash != NULL) {
1246
- if ((*data + 2) > end) {
1247
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1248
- }
1249
-
1250
- *(*data)++ = (lxb_char_t) (uintptr_t) hash->value / 190 + 0x81;
1251
-
1252
- if (((lxb_char_t) (uintptr_t) hash->value % 190) < 0x3F) {
1253
- *(*data)++ = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x40;
1254
- }
1255
- else {
1256
- *(*data)++ = ((lxb_char_t) (uintptr_t) hash->value % 190) + 0x41;
1257
- }
1258
-
1259
- return 2;
1260
- }
1261
-
1262
- return LXB_ENCODING_ENCODE_ERROR;
1263
- }
1264
-
1265
- int8_t
1266
- lxb_encoding_encode_ibm866_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1267
- const lxb_char_t *end, lxb_codepoint_t cp)
1268
- {
1269
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_ibm866,
1270
- LXB_ENCODING_SINGLE_HASH_IBM866_SIZE);
1271
- }
1272
-
1273
- int8_t
1274
- lxb_encoding_encode_iso_2022_jp_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1275
- const lxb_char_t *end, lxb_codepoint_t cp)
1276
- {
1277
- int8_t size;
1278
- unsigned state;
1279
- const lexbor_shs_hash_t *hash;
1280
-
1281
- size = 0;
1282
- state = ctx->state;
1283
-
1284
- begin:
1285
-
1286
- switch (ctx->state) {
1287
- case LXB_ENCODING_ENCODE_2022_JP_ASCII:
1288
- if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
1289
- goto failed;
1290
- }
1291
-
1292
- if (cp < 0x80) {
1293
- *(*data)++ = (lxb_char_t) cp;
1294
-
1295
- return size + 1;
1296
- }
1297
-
1298
- if (cp == 0x00A5 || cp == 0x203E) {
1299
- /*
1300
- * Do not switch to the ROMAN stage with prepend code point
1301
- * to stream, add it immediately.
1302
- */
1303
- if ((*data + 4) > end) {
1304
- goto small_buffer;
1305
- }
1306
-
1307
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
1308
-
1309
- if (cp == 0x00A5) {
1310
- memcpy(*data, "\x1B\x28\x4A\x5C", 4);
1311
- *data = *data + 4;
1312
-
1313
- return size + 4;
1314
- }
1315
-
1316
- memcpy(*data, "\x1B\x28\x4A\x7E", 4);
1317
- *data = *data + 4;
1318
-
1319
- return size + 4;
1320
- }
1321
-
1322
- break;
1323
-
1324
- case LXB_ENCODING_ENCODE_2022_JP_ROMAN:
1325
- if (cp == 0x000E || cp == 0x000F || cp == 0x001B) {
1326
- goto failed;
1327
- }
1328
-
1329
- if (cp < 0x80) {
1330
- switch (cp) {
1331
- case 0x005C:
1332
- case 0x007E:
1333
- break;
1334
-
1335
- case 0x00A5:
1336
- *(*data)++ = 0x5C;
1337
- return size + 1;
1338
-
1339
- case 0x203E:
1340
- *(*data)++ = 0x7E;
1341
- return size + 1;
1342
-
1343
- default:
1344
- *(*data)++ = (lxb_char_t) cp;
1345
- return size + 1;
1346
- }
1347
-
1348
- /*
1349
- * Do not switch to the ANSI stage with prepend code point
1350
- * to stream, add it immediately.
1351
- */
1352
- if ((*data + 4) > end) {
1353
- goto small_buffer;
1354
- }
1355
-
1356
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
1357
-
1358
- memcpy(*data, "\x1B\x28\x42", 3);
1359
- *data = *data + 3;
1360
-
1361
- *(*data)++ = (lxb_char_t) cp;
1362
-
1363
- return size + 4;
1364
- }
1365
-
1366
- break;
1367
-
1368
- case LXB_ENCODING_ENCODE_2022_JP_JIS0208:
1369
- if (cp < 0x80) {
1370
- if ((*data + 4) > end) {
1371
- goto small_buffer;
1372
- }
1373
-
1374
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
1375
-
1376
- memcpy(*data, "\x1B\x28\x42", 3);
1377
- *data = *data + 3;
1378
-
1379
- *(*data)++ = (lxb_char_t) cp;
1380
-
1381
- return size + 4;
1382
- }
1383
-
1384
- if (cp == 0x00A5 || cp == 0x203E) {
1385
- if ((*data + 4) > end) {
1386
- goto small_buffer;
1387
- }
1388
-
1389
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ROMAN;
1390
-
1391
- if (cp == 0x00A5) {
1392
- memcpy(*data, "\x1B\x28\x4A\x5C", 4);
1393
- *data = *data + 4;
1394
-
1395
- return size + 4;
1396
- }
1397
-
1398
- memcpy(*data, "\x1B\x28\x4A\x7E", 4);
1399
- *data = *data + 4;
1400
-
1401
- return size + 4;
1402
- }
1403
-
1404
- break;
1405
- }
1406
-
1407
- if ((*data + 2) > end) {
1408
- goto small_buffer;
1409
- }
1410
-
1411
- if (cp == 0x2212) {
1412
- cp = 0xFF0D;
1413
- }
1414
-
1415
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
1416
- cp = lxb_encoding_multi_index_iso_2022_jp_katakana[cp - 0xFF61].codepoint;
1417
- }
1418
-
1419
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_jis0208,
1420
- LXB_ENCODING_MULTI_HASH_JIS0208_SIZE, cp);
1421
- if (hash == NULL) {
1422
- goto failed;
1423
- }
1424
-
1425
- if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_JIS0208) {
1426
- if ((*data + 3) > end) {
1427
- goto small_buffer;
1428
- }
1429
-
1430
- memcpy(*data, "\x1B\x24\x42", 3);
1431
- *data = *data + 3;
1432
-
1433
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_JIS0208;
1434
- size += 3;
1435
-
1436
- goto begin;
1437
- }
1438
-
1439
- *(*data)++ = (uint32_t) (uintptr_t) hash->value / 94 + 0x21;
1440
- *(*data)++ = (uint32_t) (uintptr_t) hash->value % 94 + 0x21;
1441
-
1442
- return size + 2;
1443
-
1444
- small_buffer:
1445
-
1446
- ctx->state = state;
1447
- *data = *data - size;
1448
-
1449
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1450
-
1451
- failed:
1452
-
1453
- *data = *data - size;
1454
-
1455
- return LXB_ENCODING_ENCODE_ERROR;
1456
- }
1457
-
1458
- int8_t
1459
- lxb_encoding_encode_iso_2022_jp_eof_single(lxb_encoding_encode_t *ctx,
1460
- lxb_char_t **data, const lxb_char_t *end)
1461
- {
1462
- if (ctx->state != LXB_ENCODING_ENCODE_2022_JP_ASCII) {
1463
- if ((*data + 3) > end) {
1464
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1465
- }
1466
-
1467
- memcpy(*data, "\x1B\x28\x42", 3);
1468
- *data = *data + 3;
1469
-
1470
- ctx->state = LXB_ENCODING_ENCODE_2022_JP_ASCII;
1471
-
1472
- return 3;
1473
- }
1474
-
1475
- return 0;
1476
- }
1477
-
1478
- int8_t
1479
- lxb_encoding_encode_iso_8859_10_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1480
- const lxb_char_t *end, lxb_codepoint_t cp)
1481
- {
1482
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_10,
1483
- LXB_ENCODING_SINGLE_HASH_ISO_8859_10_SIZE);
1484
- }
1485
-
1486
- int8_t
1487
- lxb_encoding_encode_iso_8859_13_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1488
- const lxb_char_t *end, lxb_codepoint_t cp)
1489
- {
1490
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_13,
1491
- LXB_ENCODING_SINGLE_HASH_ISO_8859_13_SIZE);
1492
- }
1493
-
1494
- int8_t
1495
- lxb_encoding_encode_iso_8859_14_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1496
- const lxb_char_t *end, lxb_codepoint_t cp)
1497
- {
1498
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_14,
1499
- LXB_ENCODING_SINGLE_HASH_ISO_8859_14_SIZE);
1500
- }
1501
-
1502
- int8_t
1503
- lxb_encoding_encode_iso_8859_15_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1504
- const lxb_char_t *end, lxb_codepoint_t cp)
1505
- {
1506
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_15,
1507
- LXB_ENCODING_SINGLE_HASH_ISO_8859_15_SIZE);
1508
- }
1509
-
1510
- int8_t
1511
- lxb_encoding_encode_iso_8859_16_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1512
- const lxb_char_t *end, lxb_codepoint_t cp)
1513
- {
1514
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_16,
1515
- LXB_ENCODING_SINGLE_HASH_ISO_8859_16_SIZE);
1516
- }
1517
-
1518
- int8_t
1519
- lxb_encoding_encode_iso_8859_2_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1520
- const lxb_char_t *end, lxb_codepoint_t cp)
1521
- {
1522
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_2,
1523
- LXB_ENCODING_SINGLE_HASH_ISO_8859_2_SIZE);
1524
- }
1525
-
1526
- int8_t
1527
- lxb_encoding_encode_iso_8859_3_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1528
- const lxb_char_t *end, lxb_codepoint_t cp)
1529
- {
1530
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_3,
1531
- LXB_ENCODING_SINGLE_HASH_ISO_8859_3_SIZE);
1532
- }
1533
-
1534
- int8_t
1535
- lxb_encoding_encode_iso_8859_4_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1536
- const lxb_char_t *end, lxb_codepoint_t cp)
1537
- {
1538
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_4,
1539
- LXB_ENCODING_SINGLE_HASH_ISO_8859_4_SIZE);
1540
- }
1541
-
1542
- int8_t
1543
- lxb_encoding_encode_iso_8859_5_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1544
- const lxb_char_t *end, lxb_codepoint_t cp)
1545
- {
1546
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_5,
1547
- LXB_ENCODING_SINGLE_HASH_ISO_8859_5_SIZE);
1548
- }
1549
-
1550
- int8_t
1551
- lxb_encoding_encode_iso_8859_6_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1552
- const lxb_char_t *end, lxb_codepoint_t cp)
1553
- {
1554
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_6,
1555
- LXB_ENCODING_SINGLE_HASH_ISO_8859_6_SIZE);
1556
- }
1557
-
1558
- int8_t
1559
- lxb_encoding_encode_iso_8859_7_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1560
- const lxb_char_t *end, lxb_codepoint_t cp)
1561
- {
1562
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_7,
1563
- LXB_ENCODING_SINGLE_HASH_ISO_8859_7_SIZE);
1564
- }
1565
-
1566
- int8_t
1567
- lxb_encoding_encode_iso_8859_8_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1568
- const lxb_char_t *end, lxb_codepoint_t cp)
1569
- {
1570
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_8,
1571
- LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
1572
- }
1573
-
1574
- int8_t
1575
- lxb_encoding_encode_iso_8859_8_i_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1576
- const lxb_char_t *end, lxb_codepoint_t cp)
1577
- {
1578
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_iso_8859_8,
1579
- LXB_ENCODING_SINGLE_HASH_ISO_8859_8_SIZE);
1580
- }
1581
-
1582
- int8_t
1583
- lxb_encoding_encode_koi8_r_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1584
- const lxb_char_t *end, lxb_codepoint_t cp)
1585
- {
1586
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_koi8_r,
1587
- LXB_ENCODING_SINGLE_HASH_KOI8_R_SIZE);
1588
- }
1589
-
1590
- int8_t
1591
- lxb_encoding_encode_koi8_u_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1592
- const lxb_char_t *end, lxb_codepoint_t cp)
1593
- {
1594
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_koi8_u,
1595
- LXB_ENCODING_SINGLE_HASH_KOI8_U_SIZE);
1596
- }
1597
-
1598
- int8_t
1599
- lxb_encoding_encode_shift_jis_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1600
- const lxb_char_t *end, lxb_codepoint_t cp)
1601
- {
1602
- uint32_t lead, trail;
1603
- const lexbor_shs_hash_t *hash;
1604
-
1605
- if (cp <= 0x80) {
1606
- *(*data)++ = (lxb_char_t) cp;
1607
-
1608
- return 1;
1609
- }
1610
-
1611
- if ((unsigned) (cp - 0xFF61) <= (0xFF9F - 0xFF61)) {
1612
- *(*data)++ = cp - 0xFF61 + 0xA1;
1613
-
1614
- return 1;
1615
- }
1616
-
1617
- switch (cp) {
1618
- case 0x00A5:
1619
- *(*data)++ = 0x5C;
1620
- return 1;
1621
-
1622
- case 0x203E:
1623
- *(*data)++ = 0x7E;
1624
- return 1;
1625
-
1626
- case 0x2212:
1627
- cp = 0xFF0D;
1628
- break;
1629
- }
1630
-
1631
- hash = lxb_encoding_encode_shift_jis_index(cp);
1632
- if (hash == NULL) {
1633
- return LXB_ENCODING_ENCODE_ERROR;
1634
- }
1635
-
1636
- if ((*data + 2) > end) {
1637
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1638
- }
1639
-
1640
- lead = (uint32_t) (uintptr_t) hash->value / 188;
1641
- trail = (uint32_t) (uintptr_t) hash->value % 188;
1642
-
1643
- *(*data)++ = lead + ((lead < 0x1F) ? 0x81 : 0xC1);
1644
- *(*data)++ = trail + ((trail < 0x3F) ? 0x40 : 0x41);
1645
-
1646
- return 2;
1647
- }
1648
-
1649
- lxb_inline void
1650
- lxb_encoding_encode_utf_16_write_single(bool is_be, lxb_char_t **data,
1651
- lxb_codepoint_t cp)
1652
- {
1653
- if (is_be) {
1654
- *(*data)++ = cp >> 8;
1655
- *(*data)++ = cp & 0x00FF;
1656
-
1657
- return;
1658
- }
1659
-
1660
- *(*data)++ = cp & 0x00FF;
1661
- *(*data)++ = cp >> 8;
1662
- }
1663
-
1664
- lxb_inline int8_t
1665
- lxb_encoding_encode_utf_16_single(lxb_encoding_encode_t *ctx, bool is_be,
1666
- lxb_char_t **data, const lxb_char_t *end, lxb_codepoint_t cp)
1667
- {
1668
- if ((*data + 2) > end) {
1669
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1670
- }
1671
-
1672
- if (cp < 0x10000) {
1673
- lxb_encoding_encode_utf_16_write_single(is_be, data, cp);
1674
-
1675
- return 2;
1676
- }
1677
-
1678
- if ((*data + 4) > end) {
1679
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1680
- }
1681
-
1682
- cp -= 0x10000;
1683
-
1684
- lxb_encoding_encode_utf_16_write_single(is_be, data, (0xD800 | (cp >> 0x0A)));
1685
- lxb_encoding_encode_utf_16_write_single(is_be, data, (0xDC00 | (cp & 0x03FF)));
1686
-
1687
- return 4;
1688
- }
1689
-
1690
- int8_t
1691
- lxb_encoding_encode_utf_16be_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1692
- const lxb_char_t *end, lxb_codepoint_t cp)
1693
- {
1694
- return lxb_encoding_encode_utf_16_single(ctx, true, data, end, cp);
1695
- }
1696
-
1697
- int8_t
1698
- lxb_encoding_encode_utf_16le_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1699
- const lxb_char_t *end, lxb_codepoint_t cp)
1700
- {
1701
- return lxb_encoding_encode_utf_16_single(ctx, false, data, end, cp);
1702
- }
1703
-
1704
- int8_t
1705
- lxb_encoding_encode_utf_8_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1706
- const lxb_char_t *end, lxb_codepoint_t cp)
1707
- {
1708
- if (cp < 0x80) {
1709
- /* 0xxxxxxx */
1710
- *(*data)++ = (lxb_char_t) cp;
1711
-
1712
- return 1;
1713
- }
1714
-
1715
- if (cp < 0x800) {
1716
- if ((*data + 2) > end) {
1717
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1718
- }
1719
-
1720
- /* 110xxxxx 10xxxxxx */
1721
- *(*data)++ = (lxb_char_t) (0xC0 | (cp >> 6 ));
1722
- *(*data)++ = (lxb_char_t) (0x80 | (cp & 0x3F));
1723
-
1724
- return 2;
1725
- }
1726
-
1727
- if (cp < 0x10000) {
1728
- if ((*data + 3) > end) {
1729
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1730
- }
1731
-
1732
- /* 1110xxxx 10xxxxxx 10xxxxxx */
1733
- *(*data)++ = (lxb_char_t) (0xE0 | ((cp >> 12)));
1734
- *(*data)++ = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
1735
- *(*data)++ = (lxb_char_t) (0x80 | ( cp & 0x3F));
1736
-
1737
- return 3;
1738
- }
1739
-
1740
- if (cp < 0x110000) {
1741
- if ((*data + 4) > end) {
1742
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1743
- }
1744
-
1745
- /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1746
- *(*data)++ = (lxb_char_t) (0xF0 | ( cp >> 18));
1747
- *(*data)++ = (lxb_char_t) (0x80 | ((cp >> 12) & 0x3F));
1748
- *(*data)++ = (lxb_char_t) (0x80 | ((cp >> 6 ) & 0x3F));
1749
- *(*data)++ = (lxb_char_t) (0x80 | ( cp & 0x3F));
1750
-
1751
- return 4;
1752
- }
1753
-
1754
- return LXB_ENCODING_ENCODE_ERROR;
1755
- }
1756
-
1757
- int8_t
1758
- lxb_encoding_encode_gb18030_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1759
- const lxb_char_t *end, lxb_codepoint_t cp)
1760
- {
1761
- uint32_t index;
1762
- const lexbor_shs_hash_t *hash;
1763
-
1764
- if (cp < 0x80) {
1765
- *(*data)++ = (lxb_char_t) cp;
1766
-
1767
- return 1;
1768
- }
1769
-
1770
- if (cp == 0xE5E5) {
1771
- return LXB_ENCODING_ENCODE_ERROR;
1772
- }
1773
-
1774
- hash = lexbor_shs_hash_get_static(lxb_encoding_multi_hash_gb18030,
1775
- LXB_ENCODING_MULTI_HASH_GB18030_SIZE, cp);
1776
- if (hash != NULL) {
1777
- if ((*data + 2) > end) {
1778
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1779
- }
1780
-
1781
- *(*data)++ = (uint32_t) (uintptr_t) hash->value / 190 + 0x81;
1782
-
1783
- if (((uint32_t) (uintptr_t) hash->value % 190) < 0x3F) {
1784
- *(*data)++ = ((uint32_t) (uintptr_t) hash->value % 190) + 0x40;
1785
- }
1786
- else {
1787
- *(*data)++ = ((uint32_t) (uintptr_t) hash->value % 190) + 0x41;
1788
- }
1789
-
1790
- return 2;
1791
- }
1792
-
1793
- if ((*data + 4) > end) {
1794
- return LXB_ENCODING_ENCODE_SMALL_BUFFER;
1795
- }
1796
-
1797
- index = lxb_encoding_encode_gb18030_range(cp);
1798
-
1799
- *(*data)++ = (index / (10 * 126 * 10)) + 0x81;
1800
- *(*data)++ = ((index % (10 * 126 * 10)) / (10 * 126)) + 0x30;
1801
-
1802
- index = (index % (10 * 126 * 10)) % (10 * 126);
1803
-
1804
- *(*data)++ = (index / 10) + 0x81;
1805
- *(*data)++ = (index % 10) + 0x30;
1806
-
1807
- return 4;
1808
- }
1809
-
1810
- int8_t
1811
- lxb_encoding_encode_macintosh_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1812
- const lxb_char_t *end, lxb_codepoint_t cp)
1813
- {
1814
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_macintosh,
1815
- LXB_ENCODING_SINGLE_HASH_MACINTOSH_SIZE);
1816
- }
1817
-
1818
- int8_t
1819
- lxb_encoding_encode_replacement_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1820
- const lxb_char_t *end, lxb_codepoint_t cp)
1821
- {
1822
- (*data)++;
1823
- return LXB_ENCODING_ENCODE_ERROR;
1824
- }
1825
-
1826
- int8_t
1827
- lxb_encoding_encode_windows_1250_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1828
- const lxb_char_t *end, lxb_codepoint_t cp)
1829
- {
1830
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1250,
1831
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1250_SIZE);
1832
- }
1833
-
1834
- int8_t
1835
- lxb_encoding_encode_windows_1251_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1836
- const lxb_char_t *end, lxb_codepoint_t cp)
1837
- {
1838
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1251,
1839
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1251_SIZE);
1840
- }
1841
-
1842
- int8_t
1843
- lxb_encoding_encode_windows_1252_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1844
- const lxb_char_t *end, lxb_codepoint_t cp)
1845
- {
1846
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1252,
1847
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1252_SIZE);
1848
- }
1849
-
1850
- int8_t
1851
- lxb_encoding_encode_windows_1253_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1852
- const lxb_char_t *end, lxb_codepoint_t cp)
1853
- {
1854
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1253,
1855
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1253_SIZE);
1856
- }
1857
-
1858
- int8_t
1859
- lxb_encoding_encode_windows_1254_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1860
- const lxb_char_t *end, lxb_codepoint_t cp)
1861
- {
1862
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1254,
1863
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1254_SIZE);
1864
- }
1865
-
1866
- int8_t
1867
- lxb_encoding_encode_windows_1255_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1868
- const lxb_char_t *end, lxb_codepoint_t cp)
1869
- {
1870
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1255,
1871
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1255_SIZE);
1872
- }
1873
-
1874
- int8_t
1875
- lxb_encoding_encode_windows_1256_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1876
- const lxb_char_t *end, lxb_codepoint_t cp)
1877
- {
1878
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1256,
1879
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1256_SIZE);
1880
- }
1881
-
1882
- int8_t
1883
- lxb_encoding_encode_windows_1257_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1884
- const lxb_char_t *end, lxb_codepoint_t cp)
1885
- {
1886
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1257,
1887
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1257_SIZE);
1888
- }
1889
-
1890
- int8_t
1891
- lxb_encoding_encode_windows_1258_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1892
- const lxb_char_t *end, lxb_codepoint_t cp)
1893
- {
1894
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_1258,
1895
- LXB_ENCODING_SINGLE_HASH_WINDOWS_1258_SIZE);
1896
- }
1897
-
1898
- int8_t
1899
- lxb_encoding_encode_windows_874_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1900
- const lxb_char_t *end, lxb_codepoint_t cp)
1901
- {
1902
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_windows_874,
1903
- LXB_ENCODING_SINGLE_HASH_WINDOWS_874_SIZE);
1904
- }
1905
-
1906
- int8_t
1907
- lxb_encoding_encode_x_mac_cyrillic_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1908
- const lxb_char_t *end, lxb_codepoint_t cp)
1909
- {
1910
- LXB_ENCODING_ENCODE_BYTE_SINGLE(lxb_encoding_single_hash_x_mac_cyrillic,
1911
- LXB_ENCODING_SINGLE_HASH_X_MAC_CYRILLIC_SIZE);
1912
- }
1913
-
1914
- int8_t
1915
- lxb_encoding_encode_x_user_defined_single(lxb_encoding_encode_t *ctx, lxb_char_t **data,
1916
- const lxb_char_t *end, lxb_codepoint_t cp)
1917
- {
1918
- if (cp < 0x80) {
1919
- *(*data)++ = (lxb_char_t) cp;
1920
-
1921
- return 1;
1922
- }
1923
-
1924
- if (cp >= 0xF780 && cp <= 0xF7FF) {
1925
- *(*data)++ = (lxb_char_t) (cp - 0xF780 + 0x80);
1926
-
1927
- return 1;
1928
- }
1929
-
1930
- return LXB_ENCODING_ENCODE_ERROR;
1931
- }