nokolexbor 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nokolexbor/nl_attribute.c +201 -0
  3. data/ext/nokolexbor/nl_cdata.c +8 -0
  4. data/ext/nokolexbor/nl_comment.c +6 -0
  5. data/ext/nokolexbor/nl_document.c +53 -7
  6. data/ext/nokolexbor/nl_document_fragment.c +9 -0
  7. data/ext/nokolexbor/nl_error.c +21 -19
  8. data/ext/nokolexbor/nl_node.c +317 -48
  9. data/ext/nokolexbor/nl_node_set.c +56 -1
  10. data/ext/nokolexbor/nl_processing_instruction.c +6 -0
  11. data/ext/nokolexbor/nl_text.c +6 -0
  12. data/ext/nokolexbor/nokolexbor.c +1 -0
  13. data/ext/nokolexbor/nokolexbor.h +2 -0
  14. data/lib/nokolexbor/document.rb +52 -5
  15. data/lib/nokolexbor/document_fragment.rb +11 -0
  16. data/lib/nokolexbor/node.rb +370 -24
  17. data/lib/nokolexbor/node_set.rb +56 -0
  18. data/lib/nokolexbor/version.rb +1 -1
  19. data/lib/nokolexbor.rb +0 -1
  20. metadata +3 -25
  21. data/lib/nokolexbor/attribute.rb +0 -18
  22. data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
  23. data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
  24. data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
  25. data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
  26. data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
  27. data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
  28. data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
  29. data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
  30. data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
  31. data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
  32. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
  33. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
  34. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
  35. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
  36. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
  37. data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
  38. data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
  39. data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
  40. data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
  41. data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
  42. data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
  43. data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
@@ -1,475 +0,0 @@
1
- /*
2
- * Copyright (C) 2019 Alexander Borisov
3
- *
4
- * Author: Alexander Borisov <borisov@lexbor.com>
5
- */
6
-
7
- #ifndef LEXBOR_ENCODING_ENCODING_H
8
- #define LEXBOR_ENCODING_ENCODING_H
9
-
10
- #ifdef __cplusplus
11
- extern "C" {
12
- #endif
13
-
14
-
15
- #include "lexbor/encoding/base.h"
16
- #include "lexbor/encoding/res.h"
17
- #include "lexbor/encoding/encode.h"
18
- #include "lexbor/encoding/decode.h"
19
-
20
- #include "lexbor/core/shs.h"
21
-
22
-
23
- /*
24
- * Before searching will be removed any leading and trailing
25
- * ASCII whitespace in name.
26
- */
27
- LXB_API const lxb_encoding_data_t *
28
- lxb_encoding_data_by_pre_name(const lxb_char_t *name, size_t length);
29
-
30
-
31
- /*
32
- * Inline functions
33
- */
34
-
35
- /*
36
- * Encode
37
- */
38
- lxb_inline lxb_status_t
39
- lxb_encoding_encode_init(lxb_encoding_encode_t *encode,
40
- const lxb_encoding_data_t *encoding_data,
41
- lxb_char_t *buffer_out, size_t buffer_length)
42
- {
43
- if (encoding_data == NULL) {
44
- return LXB_STATUS_ERROR_WRONG_ARGS;
45
- }
46
-
47
- memset(encode, 0, sizeof(lxb_encoding_encode_t));
48
-
49
- encode->buffer_out = buffer_out;
50
- encode->buffer_length = buffer_length;
51
- encode->encoding_data = encoding_data;
52
-
53
- return LXB_STATUS_OK;
54
- }
55
-
56
- lxb_inline lxb_status_t
57
- lxb_encoding_encode_finish(lxb_encoding_encode_t *encode)
58
- {
59
- if (encode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP) {
60
- return lxb_encoding_encode_iso_2022_jp_eof(encode);
61
- }
62
-
63
- return LXB_STATUS_OK;
64
- }
65
-
66
- lxb_inline lxb_char_t *
67
- lxb_encoding_encode_buf(lxb_encoding_encode_t *encode)
68
- {
69
- return encode->buffer_out;
70
- }
71
-
72
- lxb_inline void
73
- lxb_encoding_encode_buf_set(lxb_encoding_encode_t *encode,
74
- lxb_char_t *buffer_out, size_t buffer_length)
75
- {
76
- encode->buffer_out = buffer_out;
77
- encode->buffer_length = buffer_length;
78
- encode->buffer_used = 0;
79
- }
80
-
81
- lxb_inline void
82
- lxb_encoding_encode_buf_used_set(lxb_encoding_encode_t *encode,
83
- size_t buffer_used)
84
- {
85
- encode->buffer_used = buffer_used;
86
- }
87
-
88
- lxb_inline size_t
89
- lxb_encoding_encode_buf_used(lxb_encoding_encode_t *encode)
90
- {
91
- return encode->buffer_used;
92
- }
93
-
94
- lxb_inline lxb_status_t
95
- lxb_encoding_encode_replace_set(lxb_encoding_encode_t *encode,
96
- const lxb_char_t *replace, size_t length)
97
- {
98
- if (encode->buffer_out == NULL || encode->buffer_length < length) {
99
- return LXB_STATUS_SMALL_BUFFER;
100
- }
101
-
102
- encode->replace_to = replace;
103
- encode->replace_len = length;
104
-
105
- return LXB_STATUS_OK;
106
- }
107
-
108
- lxb_inline lxb_status_t
109
- lxb_encoding_encode_buf_add_to(lxb_encoding_encode_t *encode,
110
- lxb_char_t *data, size_t length)
111
- {
112
- if ((encode->buffer_used + length) > encode->buffer_length) {
113
- return LXB_STATUS_SMALL_BUFFER;
114
- }
115
-
116
- memcpy(&encode->buffer_out[encode->buffer_used], data, length);
117
-
118
- encode->buffer_used += length;
119
-
120
- return LXB_STATUS_OK;
121
- }
122
-
123
- /*
124
- * Decode
125
- */
126
- lxb_inline lxb_status_t
127
- lxb_encoding_decode_buf_add_to(lxb_encoding_decode_t *decode,
128
- const lxb_codepoint_t *data, size_t length)
129
- {
130
- if ((decode->buffer_used + length) > decode->buffer_length) {
131
- return LXB_STATUS_SMALL_BUFFER;
132
- }
133
-
134
- memcpy(&decode->buffer_out[decode->buffer_used], data,
135
- sizeof(lxb_codepoint_t) * length);
136
-
137
- decode->buffer_used += length;
138
-
139
- return LXB_STATUS_OK;
140
- }
141
-
142
- lxb_inline lxb_status_t
143
- lxb_encoding_decode_init(lxb_encoding_decode_t *decode,
144
- const lxb_encoding_data_t *encoding_data,
145
- lxb_codepoint_t *buffer_out, size_t buffer_length)
146
- {
147
- if (encoding_data == NULL) {
148
- return LXB_STATUS_ERROR_WRONG_ARGS;
149
- }
150
-
151
- memset(decode, 0, sizeof(lxb_encoding_decode_t));
152
-
153
- decode->buffer_out = buffer_out;
154
- decode->buffer_length = buffer_length;
155
- decode->encoding_data = encoding_data;
156
-
157
- return LXB_STATUS_OK;
158
- }
159
-
160
- lxb_inline lxb_status_t
161
- lxb_encoding_decode_finish(lxb_encoding_decode_t *decode)
162
- {
163
- lxb_status_t status;
164
-
165
- if (decode->status != LXB_STATUS_OK) {
166
-
167
- if (decode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP
168
- && decode->u.iso_2022_jp.state == LXB_ENCODING_DECODE_2022_JP_ASCII)
169
- {
170
- return LXB_STATUS_OK;
171
- }
172
-
173
- if (decode->replace_to == NULL) {
174
- return LXB_STATUS_ERROR;
175
- }
176
-
177
- status = lxb_encoding_decode_buf_add_to(decode, decode->replace_to,
178
- decode->replace_len);
179
- if (status == LXB_STATUS_SMALL_BUFFER) {
180
- return status;
181
- }
182
- }
183
-
184
- return LXB_STATUS_OK;
185
- }
186
-
187
- lxb_inline lxb_codepoint_t *
188
- lxb_encoding_decode_buf(lxb_encoding_decode_t *decode)
189
- {
190
- return decode->buffer_out;
191
- }
192
-
193
- lxb_inline void
194
- lxb_encoding_decode_buf_set(lxb_encoding_decode_t *decode,
195
- lxb_codepoint_t *buffer_out, size_t buffer_length)
196
- {
197
- decode->buffer_out = buffer_out;
198
- decode->buffer_length = buffer_length;
199
- decode->buffer_used = 0;
200
- }
201
-
202
- lxb_inline void
203
- lxb_encoding_decode_buf_used_set(lxb_encoding_decode_t *decode,
204
- size_t buffer_used)
205
- {
206
- decode->buffer_used = buffer_used;
207
- }
208
-
209
- lxb_inline size_t
210
- lxb_encoding_decode_buf_used(lxb_encoding_decode_t *decode)
211
- {
212
- return decode->buffer_used;
213
- }
214
-
215
- lxb_inline lxb_status_t
216
- lxb_encoding_decode_replace_set(lxb_encoding_decode_t *decode,
217
- const lxb_codepoint_t *replace, size_t length)
218
- {
219
- if (decode->buffer_out == NULL || decode->buffer_length < length) {
220
- return LXB_STATUS_SMALL_BUFFER;
221
- }
222
-
223
- decode->replace_to = replace;
224
- decode->replace_len = length;
225
-
226
- return LXB_STATUS_OK;
227
- }
228
-
229
- /*
230
- * Single encode.
231
- */
232
- lxb_inline lxb_status_t
233
- lxb_encoding_encode_init_single(lxb_encoding_encode_t *encode,
234
- const lxb_encoding_data_t *encoding_data)
235
- {
236
- if (encoding_data == NULL) {
237
- return LXB_STATUS_ERROR_WRONG_ARGS;
238
- }
239
-
240
- memset(encode, 0, sizeof(lxb_encoding_encode_t));
241
-
242
- encode->encoding_data = encoding_data;
243
-
244
- return LXB_STATUS_OK;
245
- }
246
-
247
- lxb_inline int8_t
248
- lxb_encoding_encode_finish_single(lxb_encoding_encode_t *encode,
249
- lxb_char_t **data, const lxb_char_t *end)
250
- {
251
- if (encode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP) {
252
- return lxb_encoding_encode_iso_2022_jp_eof_single(encode, data, end);
253
- }
254
-
255
- return 0;
256
- }
257
-
258
- /*
259
- * Single decode.
260
- */
261
- lxb_inline lxb_status_t
262
- lxb_encoding_decode_init_single(lxb_encoding_decode_t *decode,
263
- const lxb_encoding_data_t *encoding_data)
264
- {
265
- if (encoding_data == NULL) {
266
- return LXB_STATUS_ERROR_WRONG_ARGS;
267
- }
268
-
269
- memset(decode, 0, sizeof(lxb_encoding_decode_t));
270
-
271
- decode->encoding_data = encoding_data;
272
-
273
- return LXB_STATUS_OK;
274
- }
275
-
276
- lxb_inline lxb_status_t
277
- lxb_encoding_decode_finish_single(lxb_encoding_decode_t *decode)
278
- {
279
- if (decode->status != LXB_STATUS_OK) {
280
-
281
- if (decode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP
282
- && decode->u.iso_2022_jp.state == LXB_ENCODING_DECODE_2022_JP_ASCII)
283
- {
284
- return LXB_STATUS_OK;
285
- }
286
-
287
- return LXB_STATUS_ERROR;
288
- }
289
-
290
- return LXB_STATUS_OK;
291
- }
292
-
293
- /*
294
- * Encoding data.
295
- */
296
- lxb_inline const lxb_encoding_data_t *
297
- lxb_encoding_data_by_name(const lxb_char_t *name, size_t length)
298
- {
299
- const lexbor_shs_entry_t *entry;
300
-
301
- if (length == 0) {
302
- return NULL;
303
- }
304
-
305
- entry = lexbor_shs_entry_get_lower_static(lxb_encoding_res_shs_entities,
306
- name, length);
307
- if (entry == NULL) {
308
- return NULL;
309
- }
310
-
311
- return (const lxb_encoding_data_t *) entry->value;
312
- }
313
-
314
- lxb_inline const lxb_encoding_data_t *
315
- lxb_encoding_data(lxb_encoding_t encoding)
316
- {
317
- if (encoding >= LXB_ENCODING_LAST_ENTRY) {
318
- return NULL;
319
- }
320
-
321
- return &lxb_encoding_res_map[encoding];
322
- }
323
-
324
- lxb_inline lxb_encoding_encode_f
325
- lxb_encoding_encode_function(lxb_encoding_t encoding)
326
- {
327
- if (encoding >= LXB_ENCODING_LAST_ENTRY) {
328
- return NULL;
329
- }
330
-
331
- return lxb_encoding_res_map[encoding].encode;
332
- }
333
-
334
- lxb_inline lxb_encoding_decode_f
335
- lxb_encoding_decode_function(lxb_encoding_t encoding)
336
- {
337
- if (encoding >= LXB_ENCODING_LAST_ENTRY) {
338
- return NULL;
339
- }
340
-
341
- return lxb_encoding_res_map[encoding].decode;
342
- }
343
-
344
- lxb_inline lxb_status_t
345
- lxb_encoding_data_call_encode(lxb_encoding_data_t *encoding_data, lxb_encoding_encode_t *ctx,
346
- const lxb_codepoint_t **cp, const lxb_codepoint_t *end)
347
- {
348
- return encoding_data->encode(ctx, cp, end);
349
- }
350
-
351
- lxb_inline lxb_status_t
352
- lxb_encoding_data_call_decode(lxb_encoding_data_t *encoding_data, lxb_encoding_decode_t *ctx,
353
- const lxb_char_t **data, const lxb_char_t *end)
354
- {
355
- return encoding_data->decode(ctx, data, end);
356
- }
357
-
358
- lxb_inline lxb_encoding_t
359
- lxb_encoding_data_encoding(lxb_encoding_data_t *data)
360
- {
361
- return data->encoding;
362
- }
363
-
364
- /*
365
- * No inline functions for ABI.
366
- */
367
- LXB_API lxb_status_t
368
- lxb_encoding_encode_init_noi(lxb_encoding_encode_t *encode,
369
- const lxb_encoding_data_t *encoding_data,
370
- lxb_char_t *buffer_out, size_t buffer_length);
371
-
372
- LXB_API lxb_status_t
373
- lxb_encoding_encode_finish_noi(lxb_encoding_encode_t *encode);
374
-
375
- LXB_API lxb_char_t *
376
- lxb_encoding_encode_buf_noi(lxb_encoding_encode_t *encode);
377
-
378
- LXB_API void
379
- lxb_encoding_encode_buf_set_noi(lxb_encoding_encode_t *encode,
380
- lxb_char_t *buffer_out, size_t buffer_length);
381
-
382
- LXB_API void
383
- lxb_encoding_encode_buf_used_set_noi(lxb_encoding_encode_t *encode,
384
- size_t buffer_used);
385
-
386
- LXB_API size_t
387
- lxb_encoding_encode_buf_used_noi(lxb_encoding_encode_t *encode);
388
-
389
- LXB_API lxb_status_t
390
- lxb_encoding_encode_replace_set_noi(lxb_encoding_encode_t *encode,
391
- const lxb_char_t *replace, size_t buffer_length);
392
-
393
- LXB_API lxb_status_t
394
- lxb_encoding_encode_buf_add_to_noi(lxb_encoding_encode_t *encode,
395
- lxb_char_t *data, size_t length);
396
-
397
- LXB_API lxb_status_t
398
- lxb_encoding_decode_init_noi(lxb_encoding_decode_t *decode,
399
- const lxb_encoding_data_t *encoding_data,
400
- lxb_codepoint_t *buffer_out, size_t buffer_length);
401
-
402
- LXB_API lxb_status_t
403
- lxb_encoding_decode_finish_noi(lxb_encoding_decode_t *decode);
404
-
405
- LXB_API lxb_codepoint_t *
406
- lxb_encoding_decode_buf_noi(lxb_encoding_decode_t *decode);
407
-
408
- LXB_API void
409
- lxb_encoding_decode_buf_set_noi(lxb_encoding_decode_t *decode,
410
- lxb_codepoint_t *buffer_out, size_t buffer_length);
411
-
412
- LXB_API void
413
- lxb_encoding_decode_buf_used_set_noi(lxb_encoding_decode_t *decode,
414
- size_t buffer_used);
415
-
416
- LXB_API size_t
417
- lxb_encoding_decode_buf_used_noi(lxb_encoding_decode_t *decode);
418
-
419
- LXB_API lxb_status_t
420
- lxb_encoding_decode_replace_set_noi(lxb_encoding_decode_t *decode,
421
- const lxb_codepoint_t *replace, size_t length);
422
-
423
- LXB_API lxb_status_t
424
- lxb_encoding_decode_buf_add_to_noi(lxb_encoding_decode_t *decode,
425
- const lxb_codepoint_t *data, size_t length);
426
-
427
- LXB_API lxb_status_t
428
- lxb_encoding_encode_init_single_noi(lxb_encoding_encode_t *encode,
429
- const lxb_encoding_data_t *encoding_data);
430
-
431
- LXB_API int8_t
432
- lxb_encoding_encode_finish_single_noi(lxb_encoding_encode_t *encode,
433
- lxb_char_t **data, const lxb_char_t *end);
434
-
435
- LXB_API lxb_status_t
436
- lxb_encoding_decode_init_single_noi(lxb_encoding_decode_t *decode,
437
- const lxb_encoding_data_t *encoding_data);
438
-
439
- LXB_API lxb_status_t
440
- lxb_encoding_decode_finish_single_noi(lxb_encoding_decode_t *decode);
441
-
442
- LXB_API const lxb_encoding_data_t *
443
- lxb_encoding_data_by_name_noi(const lxb_char_t *name, size_t length);
444
-
445
- LXB_API const lxb_encoding_data_t *
446
- lxb_encoding_data_noi(lxb_encoding_t encoding);
447
-
448
- LXB_API lxb_encoding_encode_f
449
- lxb_encoding_encode_function_noi(lxb_encoding_t encoding);
450
-
451
- LXB_API lxb_encoding_decode_f
452
- lxb_encoding_decode_function_noi(lxb_encoding_t encoding);
453
-
454
- LXB_API lxb_status_t
455
- lxb_encoding_data_call_encode_noi(lxb_encoding_data_t *encoding_data, lxb_encoding_encode_t *ctx,
456
- const lxb_codepoint_t **cp, const lxb_codepoint_t *end);
457
- LXB_API lxb_status_t
458
- lxb_encoding_data_call_decode_noi(lxb_encoding_data_t *encoding_data, lxb_encoding_decode_t *ctx,
459
- const lxb_char_t **data, const lxb_char_t *end);
460
-
461
- LXB_API lxb_encoding_t
462
- lxb_encoding_data_encoding_noi(lxb_encoding_data_t *data);
463
-
464
- LXB_API size_t
465
- lxb_encoding_encode_t_sizeof(void);
466
-
467
- LXB_API size_t
468
- lxb_encoding_decode_t_sizeof(void);
469
-
470
-
471
- #ifdef __cplusplus
472
- } /* extern "C" */
473
- #endif
474
-
475
- #endif /* LEXBOR_ENCODING_ENCODING_H */