nokolexbor 0.3.4 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nokolexbor/extconf.rb +9 -5
  3. data/ext/nokolexbor/nl_attribute.c +46 -0
  4. data/ext/nokolexbor/nl_cdata.c +8 -0
  5. data/ext/nokolexbor/nl_comment.c +6 -0
  6. data/ext/nokolexbor/nl_document.c +53 -7
  7. data/ext/nokolexbor/nl_document_fragment.c +9 -0
  8. data/ext/nokolexbor/nl_error.c +21 -19
  9. data/ext/nokolexbor/nl_node.c +255 -50
  10. data/ext/nokolexbor/nl_node_set.c +56 -1
  11. data/ext/nokolexbor/nl_processing_instruction.c +6 -0
  12. data/ext/nokolexbor/nl_text.c +6 -0
  13. data/ext/nokolexbor/nokolexbor.h +1 -0
  14. data/lib/nokolexbor/document.rb +52 -5
  15. data/lib/nokolexbor/document_fragment.rb +11 -0
  16. data/lib/nokolexbor/node.rb +367 -18
  17. data/lib/nokolexbor/node_set.rb +56 -0
  18. data/lib/nokolexbor/version.rb +1 -1
  19. metadata +2 -24
  20. data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
  21. data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
  22. data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
  23. data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
  24. data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
  25. data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
  26. data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
  27. data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
  28. data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
  29. data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
  30. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
  31. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
  32. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
  33. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
  34. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
  35. data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
  36. data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
  37. data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
  38. data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
  39. data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
  40. data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
  41. data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
@@ -1,475 +0,0 @@
1
- /*
2
- * Copyright (C) 2019 Alexander Borisov
3
- *
4
- * Author: Alexander Borisov <borisov@lexbor.com>
5
- */
6
-
7
- #ifndef LEXBOR_ENCODING_ENCODING_H
8
- #define LEXBOR_ENCODING_ENCODING_H
9
-
10
- #ifdef __cplusplus
11
- extern "C" {
12
- #endif
13
-
14
-
15
- #include "lexbor/encoding/base.h"
16
- #include "lexbor/encoding/res.h"
17
- #include "lexbor/encoding/encode.h"
18
- #include "lexbor/encoding/decode.h"
19
-
20
- #include "lexbor/core/shs.h"
21
-
22
-
23
- /*
24
- * Before searching will be removed any leading and trailing
25
- * ASCII whitespace in name.
26
- */
27
- LXB_API const lxb_encoding_data_t *
28
- lxb_encoding_data_by_pre_name(const lxb_char_t *name, size_t length);
29
-
30
-
31
- /*
32
- * Inline functions
33
- */
34
-
35
- /*
36
- * Encode
37
- */
38
- lxb_inline lxb_status_t
39
- lxb_encoding_encode_init(lxb_encoding_encode_t *encode,
40
- const lxb_encoding_data_t *encoding_data,
41
- lxb_char_t *buffer_out, size_t buffer_length)
42
- {
43
- if (encoding_data == NULL) {
44
- return LXB_STATUS_ERROR_WRONG_ARGS;
45
- }
46
-
47
- memset(encode, 0, sizeof(lxb_encoding_encode_t));
48
-
49
- encode->buffer_out = buffer_out;
50
- encode->buffer_length = buffer_length;
51
- encode->encoding_data = encoding_data;
52
-
53
- return LXB_STATUS_OK;
54
- }
55
-
56
- lxb_inline lxb_status_t
57
- lxb_encoding_encode_finish(lxb_encoding_encode_t *encode)
58
- {
59
- if (encode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP) {
60
- return lxb_encoding_encode_iso_2022_jp_eof(encode);
61
- }
62
-
63
- return LXB_STATUS_OK;
64
- }
65
-
66
- lxb_inline lxb_char_t *
67
- lxb_encoding_encode_buf(lxb_encoding_encode_t *encode)
68
- {
69
- return encode->buffer_out;
70
- }
71
-
72
- lxb_inline void
73
- lxb_encoding_encode_buf_set(lxb_encoding_encode_t *encode,
74
- lxb_char_t *buffer_out, size_t buffer_length)
75
- {
76
- encode->buffer_out = buffer_out;
77
- encode->buffer_length = buffer_length;
78
- encode->buffer_used = 0;
79
- }
80
-
81
- lxb_inline void
82
- lxb_encoding_encode_buf_used_set(lxb_encoding_encode_t *encode,
83
- size_t buffer_used)
84
- {
85
- encode->buffer_used = buffer_used;
86
- }
87
-
88
- lxb_inline size_t
89
- lxb_encoding_encode_buf_used(lxb_encoding_encode_t *encode)
90
- {
91
- return encode->buffer_used;
92
- }
93
-
94
- lxb_inline lxb_status_t
95
- lxb_encoding_encode_replace_set(lxb_encoding_encode_t *encode,
96
- const lxb_char_t *replace, size_t length)
97
- {
98
- if (encode->buffer_out == NULL || encode->buffer_length < length) {
99
- return LXB_STATUS_SMALL_BUFFER;
100
- }
101
-
102
- encode->replace_to = replace;
103
- encode->replace_len = length;
104
-
105
- return LXB_STATUS_OK;
106
- }
107
-
108
- lxb_inline lxb_status_t
109
- lxb_encoding_encode_buf_add_to(lxb_encoding_encode_t *encode,
110
- lxb_char_t *data, size_t length)
111
- {
112
- if ((encode->buffer_used + length) > encode->buffer_length) {
113
- return LXB_STATUS_SMALL_BUFFER;
114
- }
115
-
116
- memcpy(&encode->buffer_out[encode->buffer_used], data, length);
117
-
118
- encode->buffer_used += length;
119
-
120
- return LXB_STATUS_OK;
121
- }
122
-
123
- /*
124
- * Decode
125
- */
126
- lxb_inline lxb_status_t
127
- lxb_encoding_decode_buf_add_to(lxb_encoding_decode_t *decode,
128
- const lxb_codepoint_t *data, size_t length)
129
- {
130
- if ((decode->buffer_used + length) > decode->buffer_length) {
131
- return LXB_STATUS_SMALL_BUFFER;
132
- }
133
-
134
- memcpy(&decode->buffer_out[decode->buffer_used], data,
135
- sizeof(lxb_codepoint_t) * length);
136
-
137
- decode->buffer_used += length;
138
-
139
- return LXB_STATUS_OK;
140
- }
141
-
142
- lxb_inline lxb_status_t
143
- lxb_encoding_decode_init(lxb_encoding_decode_t *decode,
144
- const lxb_encoding_data_t *encoding_data,
145
- lxb_codepoint_t *buffer_out, size_t buffer_length)
146
- {
147
- if (encoding_data == NULL) {
148
- return LXB_STATUS_ERROR_WRONG_ARGS;
149
- }
150
-
151
- memset(decode, 0, sizeof(lxb_encoding_decode_t));
152
-
153
- decode->buffer_out = buffer_out;
154
- decode->buffer_length = buffer_length;
155
- decode->encoding_data = encoding_data;
156
-
157
- return LXB_STATUS_OK;
158
- }
159
-
160
- lxb_inline lxb_status_t
161
- lxb_encoding_decode_finish(lxb_encoding_decode_t *decode)
162
- {
163
- lxb_status_t status;
164
-
165
- if (decode->status != LXB_STATUS_OK) {
166
-
167
- if (decode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP
168
- && decode->u.iso_2022_jp.state == LXB_ENCODING_DECODE_2022_JP_ASCII)
169
- {
170
- return LXB_STATUS_OK;
171
- }
172
-
173
- if (decode->replace_to == NULL) {
174
- return LXB_STATUS_ERROR;
175
- }
176
-
177
- status = lxb_encoding_decode_buf_add_to(decode, decode->replace_to,
178
- decode->replace_len);
179
- if (status == LXB_STATUS_SMALL_BUFFER) {
180
- return status;
181
- }
182
- }
183
-
184
- return LXB_STATUS_OK;
185
- }
186
-
187
- lxb_inline lxb_codepoint_t *
188
- lxb_encoding_decode_buf(lxb_encoding_decode_t *decode)
189
- {
190
- return decode->buffer_out;
191
- }
192
-
193
- lxb_inline void
194
- lxb_encoding_decode_buf_set(lxb_encoding_decode_t *decode,
195
- lxb_codepoint_t *buffer_out, size_t buffer_length)
196
- {
197
- decode->buffer_out = buffer_out;
198
- decode->buffer_length = buffer_length;
199
- decode->buffer_used = 0;
200
- }
201
-
202
- lxb_inline void
203
- lxb_encoding_decode_buf_used_set(lxb_encoding_decode_t *decode,
204
- size_t buffer_used)
205
- {
206
- decode->buffer_used = buffer_used;
207
- }
208
-
209
- lxb_inline size_t
210
- lxb_encoding_decode_buf_used(lxb_encoding_decode_t *decode)
211
- {
212
- return decode->buffer_used;
213
- }
214
-
215
- lxb_inline lxb_status_t
216
- lxb_encoding_decode_replace_set(lxb_encoding_decode_t *decode,
217
- const lxb_codepoint_t *replace, size_t length)
218
- {
219
- if (decode->buffer_out == NULL || decode->buffer_length < length) {
220
- return LXB_STATUS_SMALL_BUFFER;
221
- }
222
-
223
- decode->replace_to = replace;
224
- decode->replace_len = length;
225
-
226
- return LXB_STATUS_OK;
227
- }
228
-
229
- /*
230
- * Single encode.
231
- */
232
- lxb_inline lxb_status_t
233
- lxb_encoding_encode_init_single(lxb_encoding_encode_t *encode,
234
- const lxb_encoding_data_t *encoding_data)
235
- {
236
- if (encoding_data == NULL) {
237
- return LXB_STATUS_ERROR_WRONG_ARGS;
238
- }
239
-
240
- memset(encode, 0, sizeof(lxb_encoding_encode_t));
241
-
242
- encode->encoding_data = encoding_data;
243
-
244
- return LXB_STATUS_OK;
245
- }
246
-
247
- lxb_inline int8_t
248
- lxb_encoding_encode_finish_single(lxb_encoding_encode_t *encode,
249
- lxb_char_t **data, const lxb_char_t *end)
250
- {
251
- if (encode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP) {
252
- return lxb_encoding_encode_iso_2022_jp_eof_single(encode, data, end);
253
- }
254
-
255
- return 0;
256
- }
257
-
258
- /*
259
- * Single decode.
260
- */
261
- lxb_inline lxb_status_t
262
- lxb_encoding_decode_init_single(lxb_encoding_decode_t *decode,
263
- const lxb_encoding_data_t *encoding_data)
264
- {
265
- if (encoding_data == NULL) {
266
- return LXB_STATUS_ERROR_WRONG_ARGS;
267
- }
268
-
269
- memset(decode, 0, sizeof(lxb_encoding_decode_t));
270
-
271
- decode->encoding_data = encoding_data;
272
-
273
- return LXB_STATUS_OK;
274
- }
275
-
276
- lxb_inline lxb_status_t
277
- lxb_encoding_decode_finish_single(lxb_encoding_decode_t *decode)
278
- {
279
- if (decode->status != LXB_STATUS_OK) {
280
-
281
- if (decode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP
282
- && decode->u.iso_2022_jp.state == LXB_ENCODING_DECODE_2022_JP_ASCII)
283
- {
284
- return LXB_STATUS_OK;
285
- }
286
-
287
- return LXB_STATUS_ERROR;
288
- }
289
-
290
- return LXB_STATUS_OK;
291
- }
292
-
293
- /*
294
- * Encoding data.
295
- */
296
- lxb_inline const lxb_encoding_data_t *
297
- lxb_encoding_data_by_name(const lxb_char_t *name, size_t length)
298
- {
299
- const lexbor_shs_entry_t *entry;
300
-
301
- if (length == 0) {
302
- return NULL;
303
- }
304
-
305
- entry = lexbor_shs_entry_get_lower_static(lxb_encoding_res_shs_entities,
306
- name, length);
307
- if (entry == NULL) {
308
- return NULL;
309
- }
310
-
311
- return (const lxb_encoding_data_t *) entry->value;
312
- }
313
-
314
- lxb_inline const lxb_encoding_data_t *
315
- lxb_encoding_data(lxb_encoding_t encoding)
316
- {
317
- if (encoding >= LXB_ENCODING_LAST_ENTRY) {
318
- return NULL;
319
- }
320
-
321
- return &lxb_encoding_res_map[encoding];
322
- }
323
-
324
- lxb_inline lxb_encoding_encode_f
325
- lxb_encoding_encode_function(lxb_encoding_t encoding)
326
- {
327
- if (encoding >= LXB_ENCODING_LAST_ENTRY) {
328
- return NULL;
329
- }
330
-
331
- return lxb_encoding_res_map[encoding].encode;
332
- }
333
-
334
- lxb_inline lxb_encoding_decode_f
335
- lxb_encoding_decode_function(lxb_encoding_t encoding)
336
- {
337
- if (encoding >= LXB_ENCODING_LAST_ENTRY) {
338
- return NULL;
339
- }
340
-
341
- return lxb_encoding_res_map[encoding].decode;
342
- }
343
-
344
- lxb_inline lxb_status_t
345
- lxb_encoding_data_call_encode(lxb_encoding_data_t *encoding_data, lxb_encoding_encode_t *ctx,
346
- const lxb_codepoint_t **cp, const lxb_codepoint_t *end)
347
- {
348
- return encoding_data->encode(ctx, cp, end);
349
- }
350
-
351
- lxb_inline lxb_status_t
352
- lxb_encoding_data_call_decode(lxb_encoding_data_t *encoding_data, lxb_encoding_decode_t *ctx,
353
- const lxb_char_t **data, const lxb_char_t *end)
354
- {
355
- return encoding_data->decode(ctx, data, end);
356
- }
357
-
358
- lxb_inline lxb_encoding_t
359
- lxb_encoding_data_encoding(lxb_encoding_data_t *data)
360
- {
361
- return data->encoding;
362
- }
363
-
364
- /*
365
- * No inline functions for ABI.
366
- */
367
- LXB_API lxb_status_t
368
- lxb_encoding_encode_init_noi(lxb_encoding_encode_t *encode,
369
- const lxb_encoding_data_t *encoding_data,
370
- lxb_char_t *buffer_out, size_t buffer_length);
371
-
372
- LXB_API lxb_status_t
373
- lxb_encoding_encode_finish_noi(lxb_encoding_encode_t *encode);
374
-
375
- LXB_API lxb_char_t *
376
- lxb_encoding_encode_buf_noi(lxb_encoding_encode_t *encode);
377
-
378
- LXB_API void
379
- lxb_encoding_encode_buf_set_noi(lxb_encoding_encode_t *encode,
380
- lxb_char_t *buffer_out, size_t buffer_length);
381
-
382
- LXB_API void
383
- lxb_encoding_encode_buf_used_set_noi(lxb_encoding_encode_t *encode,
384
- size_t buffer_used);
385
-
386
- LXB_API size_t
387
- lxb_encoding_encode_buf_used_noi(lxb_encoding_encode_t *encode);
388
-
389
- LXB_API lxb_status_t
390
- lxb_encoding_encode_replace_set_noi(lxb_encoding_encode_t *encode,
391
- const lxb_char_t *replace, size_t buffer_length);
392
-
393
- LXB_API lxb_status_t
394
- lxb_encoding_encode_buf_add_to_noi(lxb_encoding_encode_t *encode,
395
- lxb_char_t *data, size_t length);
396
-
397
- LXB_API lxb_status_t
398
- lxb_encoding_decode_init_noi(lxb_encoding_decode_t *decode,
399
- const lxb_encoding_data_t *encoding_data,
400
- lxb_codepoint_t *buffer_out, size_t buffer_length);
401
-
402
- LXB_API lxb_status_t
403
- lxb_encoding_decode_finish_noi(lxb_encoding_decode_t *decode);
404
-
405
- LXB_API lxb_codepoint_t *
406
- lxb_encoding_decode_buf_noi(lxb_encoding_decode_t *decode);
407
-
408
- LXB_API void
409
- lxb_encoding_decode_buf_set_noi(lxb_encoding_decode_t *decode,
410
- lxb_codepoint_t *buffer_out, size_t buffer_length);
411
-
412
- LXB_API void
413
- lxb_encoding_decode_buf_used_set_noi(lxb_encoding_decode_t *decode,
414
- size_t buffer_used);
415
-
416
- LXB_API size_t
417
- lxb_encoding_decode_buf_used_noi(lxb_encoding_decode_t *decode);
418
-
419
- LXB_API lxb_status_t
420
- lxb_encoding_decode_replace_set_noi(lxb_encoding_decode_t *decode,
421
- const lxb_codepoint_t *replace, size_t length);
422
-
423
- LXB_API lxb_status_t
424
- lxb_encoding_decode_buf_add_to_noi(lxb_encoding_decode_t *decode,
425
- const lxb_codepoint_t *data, size_t length);
426
-
427
- LXB_API lxb_status_t
428
- lxb_encoding_encode_init_single_noi(lxb_encoding_encode_t *encode,
429
- const lxb_encoding_data_t *encoding_data);
430
-
431
- LXB_API int8_t
432
- lxb_encoding_encode_finish_single_noi(lxb_encoding_encode_t *encode,
433
- lxb_char_t **data, const lxb_char_t *end);
434
-
435
- LXB_API lxb_status_t
436
- lxb_encoding_decode_init_single_noi(lxb_encoding_decode_t *decode,
437
- const lxb_encoding_data_t *encoding_data);
438
-
439
- LXB_API lxb_status_t
440
- lxb_encoding_decode_finish_single_noi(lxb_encoding_decode_t *decode);
441
-
442
- LXB_API const lxb_encoding_data_t *
443
- lxb_encoding_data_by_name_noi(const lxb_char_t *name, size_t length);
444
-
445
- LXB_API const lxb_encoding_data_t *
446
- lxb_encoding_data_noi(lxb_encoding_t encoding);
447
-
448
- LXB_API lxb_encoding_encode_f
449
- lxb_encoding_encode_function_noi(lxb_encoding_t encoding);
450
-
451
- LXB_API lxb_encoding_decode_f
452
- lxb_encoding_decode_function_noi(lxb_encoding_t encoding);
453
-
454
- LXB_API lxb_status_t
455
- lxb_encoding_data_call_encode_noi(lxb_encoding_data_t *encoding_data, lxb_encoding_encode_t *ctx,
456
- const lxb_codepoint_t **cp, const lxb_codepoint_t *end);
457
- LXB_API lxb_status_t
458
- lxb_encoding_data_call_decode_noi(lxb_encoding_data_t *encoding_data, lxb_encoding_decode_t *ctx,
459
- const lxb_char_t **data, const lxb_char_t *end);
460
-
461
- LXB_API lxb_encoding_t
462
- lxb_encoding_data_encoding_noi(lxb_encoding_data_t *data);
463
-
464
- LXB_API size_t
465
- lxb_encoding_encode_t_sizeof(void);
466
-
467
- LXB_API size_t
468
- lxb_encoding_decode_t_sizeof(void);
469
-
470
-
471
- #ifdef __cplusplus
472
- } /* extern "C" */
473
- #endif
474
-
475
- #endif /* LEXBOR_ENCODING_ENCODING_H */