nokolexbor 0.2.5 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/libxml/SAX2.h +4 -4
- data/ext/nokolexbor/libxml/chvalid.h +21 -21
- data/ext/nokolexbor/libxml/dict.h +13 -13
- data/ext/nokolexbor/libxml/globals.h +202 -202
- data/ext/nokolexbor/libxml/hash.h +25 -25
- data/ext/nokolexbor/libxml/parser.h +5 -5
- data/ext/nokolexbor/libxml/parserInternals.h +4 -4
- data/ext/nokolexbor/libxml/pattern.h +14 -14
- data/ext/nokolexbor/libxml/threads.h +15 -15
- data/ext/nokolexbor/libxml/tree.h +5 -5
- data/ext/nokolexbor/libxml/xmlerror.h +5 -5
- data/ext/nokolexbor/libxml/xmlmemory.h +16 -16
- data/ext/nokolexbor/libxml/xmlstring.h +30 -30
- data/ext/nokolexbor/libxml/xpath.h +43 -43
- data/ext/nokolexbor/libxml/xpathInternals.h +128 -128
- data/ext/nokolexbor/memory.c +6 -6
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +191 -178
- data/ext/nokolexbor/nl_node_set.c +38 -73
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +33 -42
- data/ext/nokolexbor/nokolexbor.c +7 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/ext/nokolexbor/private/buf.h +1 -1
- data/ext/nokolexbor/private/error.h +3 -3
- data/ext/nokolexbor/xml_SAX2.c +8 -8
- data/ext/nokolexbor/xml_buf.c +19 -19
- data/ext/nokolexbor/xml_chvalid.c +25 -25
- data/ext/nokolexbor/xml_dict.c +69 -69
- data/ext/nokolexbor/xml_encoding.c +2 -2
- data/ext/nokolexbor/xml_error.c +51 -51
- data/ext/nokolexbor/xml_globals.c +329 -329
- data/ext/nokolexbor/xml_hash.c +131 -131
- data/ext/nokolexbor/xml_memory.c +25 -25
- data/ext/nokolexbor/xml_parser.c +3 -3
- data/ext/nokolexbor/xml_parserInternals.c +15 -15
- data/ext/nokolexbor/xml_pattern.c +103 -103
- data/ext/nokolexbor/xml_string.c +93 -93
- data/ext/nokolexbor/xml_threads.c +61 -61
- data/ext/nokolexbor/xml_tree.c +12 -12
- data/ext/nokolexbor/xml_xpath.c +1194 -1203
- data/lib/nokolexbor/document.rb +92 -1
- data/lib/nokolexbor/node.rb +64 -0
- data/lib/nokolexbor/node_set.rb +6 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +1 -1
- metadata +7 -4
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -1,38 +1,64 @@
|
|
1
1
|
#include "nokolexbor.h"
|
2
2
|
|
3
|
-
#define SORT_NAME
|
3
|
+
#define SORT_NAME nl_css_result
|
4
4
|
#define SORT_TYPE lxb_dom_node_t *
|
5
5
|
#define SORT_CMP(x, y) (x->user >= y->user ? (x->user == y->user ? 0 : 1) : -1)
|
6
6
|
#include "timsort.h"
|
7
7
|
|
8
8
|
extern VALUE mNokolexbor;
|
9
9
|
extern VALUE cNokolexborDocument;
|
10
|
+
extern VALUE cNokolexborText;
|
11
|
+
extern VALUE cNokolexborComment;
|
10
12
|
extern VALUE cNokolexborNodeSet;
|
11
13
|
extern VALUE eLexborError;
|
12
14
|
VALUE cNokolexborNode;
|
15
|
+
VALUE cNokolexborElement;
|
16
|
+
VALUE cNokolexborCharacterData;
|
13
17
|
|
14
18
|
extern rb_data_type_t nl_document_type;
|
15
19
|
|
16
|
-
static const rb_data_type_t nl_node_type = {
|
17
|
-
"Nokolexbor::Node",
|
18
|
-
{
|
19
|
-
0,
|
20
|
-
0,
|
21
|
-
},
|
22
|
-
0,
|
23
|
-
0,
|
24
|
-
RUBY_TYPED_FREE_IMMEDIATELY,
|
25
|
-
};
|
26
|
-
|
27
20
|
VALUE
|
28
21
|
nl_rb_node_create(lxb_dom_node_t *node, VALUE rb_document)
|
29
22
|
{
|
30
|
-
if (node == NULL)
|
31
|
-
{
|
23
|
+
if (node == NULL) {
|
32
24
|
rb_raise(rb_eArgError, "Cannot create Nokolexbor::Node with null pointer");
|
33
25
|
}
|
34
26
|
|
35
|
-
VALUE
|
27
|
+
VALUE rb_class;
|
28
|
+
switch (node->type) {
|
29
|
+
case LXB_DOM_NODE_TYPE_ELEMENT:
|
30
|
+
rb_class = cNokolexborElement;
|
31
|
+
break;
|
32
|
+
// case LXB_DOM_NODE_TYPE_ATTRIBUTE:
|
33
|
+
// break;
|
34
|
+
case LXB_DOM_NODE_TYPE_TEXT:
|
35
|
+
rb_class = cNokolexborText;
|
36
|
+
break;
|
37
|
+
case LXB_DOM_NODE_TYPE_CDATA_SECTION:
|
38
|
+
rb_class = cNokolexborCharacterData;
|
39
|
+
break;
|
40
|
+
// case LXB_DOM_NODE_TYPE_ENTITY_REFERENCE:
|
41
|
+
// break;
|
42
|
+
// case LXB_DOM_NODE_TYPE_ENTITY:
|
43
|
+
// break;
|
44
|
+
// case LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION:
|
45
|
+
// break;
|
46
|
+
case LXB_DOM_NODE_TYPE_COMMENT:
|
47
|
+
rb_class = cNokolexborComment;
|
48
|
+
break;
|
49
|
+
// case LXB_DOM_NODE_TYPE_DOCUMENT:
|
50
|
+
// break;
|
51
|
+
// case LXB_DOM_NODE_TYPE_DOCUMENT_TYPE:
|
52
|
+
// break;
|
53
|
+
// case LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT:
|
54
|
+
// break;
|
55
|
+
// case LXB_DOM_NODE_TYPE_NOTATION:
|
56
|
+
// break;
|
57
|
+
default:
|
58
|
+
rb_class = cNokolexborNode;
|
59
|
+
}
|
60
|
+
|
61
|
+
VALUE ret = Data_Wrap_Struct(rb_class, NULL, NULL, node);
|
36
62
|
rb_iv_set(ret, "@document", rb_document);
|
37
63
|
return ret;
|
38
64
|
}
|
@@ -41,13 +67,10 @@ inline lxb_dom_node_t *
|
|
41
67
|
nl_rb_node_unwrap(VALUE rb_node)
|
42
68
|
{
|
43
69
|
lxb_dom_node_t *node;
|
44
|
-
if (
|
45
|
-
{
|
70
|
+
if (rb_obj_is_kind_of(rb_node, cNokolexborDocument)) {
|
46
71
|
TypedData_Get_Struct(rb_node, lxb_dom_node_t, &nl_document_type, node);
|
47
|
-
}
|
48
|
-
|
49
|
-
{
|
50
|
-
TypedData_Get_Struct(rb_node, lxb_dom_node_t, &nl_node_type, node);
|
72
|
+
} else {
|
73
|
+
Data_Get_Struct(rb_node, lxb_dom_node_t, node);
|
51
74
|
}
|
52
75
|
return node;
|
53
76
|
}
|
@@ -62,23 +85,22 @@ nl_node_new(int argc, VALUE *argv, VALUE klass)
|
|
62
85
|
|
63
86
|
rb_scan_args(argc, argv, "2*", &rb_name, &rb_document, &rest);
|
64
87
|
|
65
|
-
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument))
|
66
|
-
{
|
88
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
67
89
|
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
68
90
|
}
|
69
91
|
|
70
92
|
document = nl_rb_document_unwrap(rb_document);
|
71
93
|
|
72
|
-
|
73
|
-
|
74
|
-
|
94
|
+
const char* c_name = StringValuePtr(rb_name);
|
95
|
+
size_t name_len = RSTRING_LEN(rb_name);
|
96
|
+
lxb_dom_element_t *element = lxb_dom_document_create_element(document, (const lxb_char_t *)c_name, name_len, NULL);
|
97
|
+
if (element == NULL) {
|
75
98
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
76
99
|
}
|
77
100
|
|
78
101
|
VALUE rb_node = nl_rb_node_create(&element->node, rb_document);
|
79
102
|
|
80
|
-
if (rb_block_given_p())
|
81
|
-
{
|
103
|
+
if (rb_block_given_p()) {
|
82
104
|
rb_yield(rb_node);
|
83
105
|
}
|
84
106
|
|
@@ -92,8 +114,7 @@ nl_node_content(VALUE self)
|
|
92
114
|
|
93
115
|
size_t str_len = 0;
|
94
116
|
lxb_char_t *text = lxb_dom_node_text_content(node, &str_len);
|
95
|
-
if (text == NULL)
|
96
|
-
{
|
117
|
+
if (text == NULL) {
|
97
118
|
return rb_str_new("", 0);
|
98
119
|
}
|
99
120
|
VALUE rb_str = rb_utf8_str_new((char *)text, str_len);
|
@@ -102,13 +123,26 @@ nl_node_content(VALUE self)
|
|
102
123
|
return rb_str;
|
103
124
|
}
|
104
125
|
|
126
|
+
static VALUE
|
127
|
+
nl_node_content_set(VALUE self, VALUE content)
|
128
|
+
{
|
129
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
130
|
+
|
131
|
+
const char *c_content = StringValuePtr(content);
|
132
|
+
size_t content_len = RSTRING_LEN(content);
|
133
|
+
lxb_status_t status = lxb_dom_node_text_content_set(node, (const lxb_char_t *)c_content, content_len);
|
134
|
+
if (status != LXB_STATUS_OK) {
|
135
|
+
nl_raise_lexbor_error(status);
|
136
|
+
}
|
137
|
+
return content;
|
138
|
+
}
|
139
|
+
|
105
140
|
static VALUE
|
106
141
|
nl_node_get_attr(VALUE self, VALUE rb_attr)
|
107
142
|
{
|
108
143
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
109
144
|
|
110
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
111
|
-
{
|
145
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
112
146
|
return Qnil;
|
113
147
|
}
|
114
148
|
|
@@ -118,8 +152,7 @@ nl_node_get_attr(VALUE self, VALUE rb_attr)
|
|
118
152
|
|
119
153
|
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
120
154
|
|
121
|
-
if (!lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len))
|
122
|
-
{
|
155
|
+
if (!lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len)) {
|
123
156
|
return Qnil;
|
124
157
|
}
|
125
158
|
|
@@ -134,8 +167,7 @@ nl_node_set_attr(VALUE self, VALUE rb_attr, VALUE rb_value)
|
|
134
167
|
{
|
135
168
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
136
169
|
|
137
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
138
|
-
{
|
170
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
139
171
|
return Qnil;
|
140
172
|
}
|
141
173
|
|
@@ -159,8 +191,7 @@ nl_node_remove_attr(VALUE self, VALUE rb_attr)
|
|
159
191
|
{
|
160
192
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
161
193
|
|
162
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
163
|
-
{
|
194
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
164
195
|
return Qnil;
|
165
196
|
}
|
166
197
|
|
@@ -179,8 +210,7 @@ nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *sp
|
|
179
210
|
{
|
180
211
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
181
212
|
lxb_status_t status = lexbor_array_push_unique(array, node);
|
182
|
-
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED)
|
183
|
-
{
|
213
|
+
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED) {
|
184
214
|
nl_raise_lexbor_error(status);
|
185
215
|
}
|
186
216
|
// Stop at first result
|
@@ -192,8 +222,7 @@ nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec,
|
|
192
222
|
{
|
193
223
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
194
224
|
lxb_status_t status = lexbor_array_push_unique(array, node);
|
195
|
-
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED)
|
196
|
-
{
|
225
|
+
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED) {
|
197
226
|
nl_raise_lexbor_error(status);
|
198
227
|
}
|
199
228
|
return LXB_STATUS_OK;
|
@@ -215,32 +244,28 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
215
244
|
/* Create CSS parser. */
|
216
245
|
parser = lxb_css_parser_create();
|
217
246
|
status = lxb_css_parser_init(parser, NULL, NULL);
|
218
|
-
if (status != LXB_STATUS_OK)
|
219
|
-
{
|
247
|
+
if (status != LXB_STATUS_OK) {
|
220
248
|
goto cleanup;
|
221
249
|
}
|
222
250
|
|
223
251
|
/* Selectors. */
|
224
252
|
selectors = lxb_selectors_create();
|
225
253
|
status = lxb_selectors_init(selectors);
|
226
|
-
if (status != LXB_STATUS_OK)
|
227
|
-
{
|
254
|
+
if (status != LXB_STATUS_OK) {
|
228
255
|
goto cleanup;
|
229
256
|
}
|
230
257
|
|
231
258
|
/* Parse and get the log. */
|
232
259
|
// TODO: Cache the list for reuse, improves performance
|
233
260
|
list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
|
234
|
-
if (parser->status != LXB_STATUS_OK)
|
235
|
-
{
|
261
|
+
if (parser->status != LXB_STATUS_OK) {
|
236
262
|
status = parser->status;
|
237
263
|
goto cleanup;
|
238
264
|
}
|
239
265
|
|
240
266
|
/* Find HTML nodes by CSS Selectors. */
|
241
267
|
status = lxb_selectors_find(selectors, node, list, cb, ctx);
|
242
|
-
if (status != LXB_STATUS_OK)
|
243
|
-
{
|
268
|
+
if (status != LXB_STATUS_OK) {
|
244
269
|
goto cleanup;
|
245
270
|
}
|
246
271
|
|
@@ -263,22 +288,16 @@ mark_node_orders(lxb_dom_node_t *root)
|
|
263
288
|
size_t count = 1;
|
264
289
|
root->user = (void *)count;
|
265
290
|
lxb_dom_node_t *node = root;
|
266
|
-
do
|
267
|
-
|
268
|
-
if (node->first_child != NULL)
|
269
|
-
{
|
291
|
+
do {
|
292
|
+
if (node->first_child != NULL) {
|
270
293
|
node = node->first_child;
|
271
294
|
node->user = (void *)++count;
|
272
|
-
}
|
273
|
-
|
274
|
-
{
|
275
|
-
while (node != root && node->next == NULL)
|
276
|
-
{
|
295
|
+
} else {
|
296
|
+
while (node != root && node->next == NULL) {
|
277
297
|
node = node->parent;
|
278
298
|
}
|
279
299
|
|
280
|
-
if (node == root)
|
281
|
-
{
|
300
|
+
if (node == root) {
|
282
301
|
break;
|
283
302
|
}
|
284
303
|
|
@@ -290,27 +309,23 @@ mark_node_orders(lxb_dom_node_t *root)
|
|
290
309
|
}
|
291
310
|
|
292
311
|
// Sort nodes in document traversal order (the same as Nokorigi)
|
293
|
-
void
|
312
|
+
void nl_sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array)
|
294
313
|
{
|
295
314
|
// No need to sort if there's only one selector, the results are natually in document traversal order
|
296
|
-
if (strchr(RSTRING_PTR(selector), ',') != NULL)
|
297
|
-
{
|
315
|
+
if (strchr(RSTRING_PTR(selector), ',') != NULL) {
|
298
316
|
int need_order = 0;
|
299
317
|
// Check if we have already markded orders, note that
|
300
318
|
// we need to order again if new nodes are added to the document
|
301
|
-
for (size_t i = 0; i < array->length; i++)
|
302
|
-
|
303
|
-
if (((lxb_dom_node_t *)array->list[i])->user == 0)
|
304
|
-
{
|
319
|
+
for (size_t i = 0; i < array->length; i++) {
|
320
|
+
if (((lxb_dom_node_t *)array->list[i])->user == 0) {
|
305
321
|
need_order = 1;
|
306
322
|
break;
|
307
323
|
}
|
308
324
|
}
|
309
|
-
if (need_order)
|
310
|
-
{
|
325
|
+
if (need_order) {
|
311
326
|
mark_node_orders(&doc->node);
|
312
327
|
}
|
313
|
-
|
328
|
+
nl_css_result_tim_sort((lxb_dom_node_t **)&array->list[0], array->length);
|
314
329
|
}
|
315
330
|
}
|
316
331
|
|
@@ -322,19 +337,17 @@ nl_node_at_css(VALUE self, VALUE selector)
|
|
322
337
|
|
323
338
|
lxb_status_t status = nl_node_find(self, selector, nl_node_at_css_callback, array);
|
324
339
|
|
325
|
-
if (status != LXB_STATUS_OK)
|
326
|
-
{
|
340
|
+
if (status != LXB_STATUS_OK) {
|
327
341
|
lexbor_array_destroy(array, true);
|
328
342
|
nl_raise_lexbor_error(status);
|
329
343
|
}
|
330
344
|
|
331
|
-
if (array->length == 0)
|
332
|
-
{
|
345
|
+
if (array->length == 0) {
|
333
346
|
lexbor_array_destroy(array, true);
|
334
347
|
return Qnil;
|
335
348
|
}
|
336
349
|
|
337
|
-
|
350
|
+
nl_sort_nodes_if_necessary(selector, node->owner_document, array);
|
338
351
|
|
339
352
|
VALUE ret = nl_rb_node_create(array->list[0], nl_rb_document_get(self));
|
340
353
|
|
@@ -350,34 +363,45 @@ nl_node_css(VALUE self, VALUE selector)
|
|
350
363
|
lexbor_array_t *array = lexbor_array_create();
|
351
364
|
|
352
365
|
lxb_status_t status = nl_node_find(self, selector, nl_node_css_callback, array);
|
353
|
-
if (status != LXB_STATUS_OK)
|
354
|
-
{
|
366
|
+
if (status != LXB_STATUS_OK) {
|
355
367
|
lexbor_array_destroy(array, true);
|
356
368
|
nl_raise_lexbor_error(status);
|
357
369
|
}
|
358
370
|
|
359
|
-
|
371
|
+
nl_sort_nodes_if_necessary(selector, node->owner_document, array);
|
360
372
|
|
361
373
|
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
362
374
|
}
|
363
375
|
|
364
376
|
static VALUE
|
365
|
-
nl_node_inner_html(VALUE self)
|
377
|
+
nl_node_inner_html(int argc, VALUE *argv, VALUE self)
|
366
378
|
{
|
367
379
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
368
380
|
lexbor_str_t str = {0};
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
381
|
+
VALUE options;
|
382
|
+
lxb_status_t status;
|
383
|
+
size_t indent = 0;
|
384
|
+
rb_scan_args(argc, argv, "01", &options);
|
385
|
+
|
386
|
+
if (TYPE(options) == T_HASH) {
|
387
|
+
VALUE rb_indent = rb_hash_aref(options, ID2SYM(rb_intern("indent")));
|
388
|
+
if (!NIL_P(rb_indent)) {
|
389
|
+
indent = NUM2INT(rb_indent);
|
390
|
+
}
|
391
|
+
}
|
392
|
+
if (indent > 0) {
|
393
|
+
status = lxb_html_serialize_pretty_deep_str(node, 0, 0, &str);
|
394
|
+
} else {
|
395
|
+
status = lxb_html_serialize_deep_str(node, &str);
|
396
|
+
}
|
397
|
+
if (status != LXB_STATUS_OK) {
|
398
|
+
if (str.data != NULL) {
|
374
399
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
375
400
|
}
|
376
401
|
nl_raise_lexbor_error(status);
|
377
402
|
}
|
378
403
|
|
379
|
-
if (str.data != NULL)
|
380
|
-
{
|
404
|
+
if (str.data != NULL) {
|
381
405
|
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
382
406
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
383
407
|
return ret;
|
@@ -387,22 +411,34 @@ nl_node_inner_html(VALUE self)
|
|
387
411
|
}
|
388
412
|
|
389
413
|
static VALUE
|
390
|
-
nl_node_outer_html(VALUE self)
|
414
|
+
nl_node_outer_html(int argc, VALUE *argv, VALUE self)
|
391
415
|
{
|
392
416
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
393
417
|
lexbor_str_t str = {0};
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
418
|
+
VALUE options;
|
419
|
+
lxb_status_t status;
|
420
|
+
size_t indent = 0;
|
421
|
+
rb_scan_args(argc, argv, "01", &options);
|
422
|
+
|
423
|
+
if (TYPE(options) == T_HASH) {
|
424
|
+
VALUE rb_indent = rb_hash_aref(options, ID2SYM(rb_intern("indent")));
|
425
|
+
if (!NIL_P(rb_indent)) {
|
426
|
+
indent = NUM2INT(rb_indent);
|
427
|
+
}
|
428
|
+
}
|
429
|
+
if (indent > 0) {
|
430
|
+
status = lxb_html_serialize_pretty_tree_str(node, 0, 0, &str);
|
431
|
+
} else {
|
432
|
+
status = lxb_html_serialize_tree_str(node, &str);
|
433
|
+
}
|
434
|
+
if (status != LXB_STATUS_OK) {
|
435
|
+
if (str.data != NULL) {
|
399
436
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
400
437
|
}
|
401
438
|
nl_raise_lexbor_error(status);
|
402
439
|
}
|
403
440
|
|
404
|
-
if (str.data != NULL)
|
405
|
-
{
|
441
|
+
if (str.data != NULL) {
|
406
442
|
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
407
443
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
408
444
|
return ret;
|
@@ -416,8 +452,7 @@ nl_node_has_key(VALUE self, VALUE rb_attr)
|
|
416
452
|
{
|
417
453
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
418
454
|
|
419
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
420
|
-
{
|
455
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
421
456
|
return Qfalse;
|
422
457
|
}
|
423
458
|
|
@@ -436,15 +471,13 @@ nl_node_keys(VALUE self)
|
|
436
471
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
437
472
|
VALUE ary_keys = rb_ary_new();
|
438
473
|
|
439
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
440
|
-
{
|
474
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
441
475
|
return ary_keys;
|
442
476
|
}
|
443
477
|
|
444
478
|
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
445
479
|
|
446
|
-
while (attr != NULL)
|
447
|
-
{
|
480
|
+
while (attr != NULL) {
|
448
481
|
size_t tmp_len;
|
449
482
|
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
450
483
|
rb_ary_push(ary_keys, rb_utf8_str_new((const char *)tmp, tmp_len));
|
@@ -461,23 +494,18 @@ nl_node_values(VALUE self)
|
|
461
494
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
462
495
|
VALUE ary_values = rb_ary_new();
|
463
496
|
|
464
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
465
|
-
{
|
497
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
466
498
|
return ary_values;
|
467
499
|
}
|
468
500
|
|
469
501
|
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
470
502
|
|
471
|
-
while (attr != NULL)
|
472
|
-
{
|
503
|
+
while (attr != NULL) {
|
473
504
|
size_t tmp_len;
|
474
505
|
const lxb_char_t *tmp = lxb_dom_attr_value(attr, &tmp_len);
|
475
|
-
if (tmp != NULL)
|
476
|
-
{
|
506
|
+
if (tmp != NULL) {
|
477
507
|
rb_ary_push(ary_values, rb_utf8_str_new((const char *)tmp, tmp_len));
|
478
|
-
}
|
479
|
-
else
|
480
|
-
{
|
508
|
+
} else {
|
481
509
|
rb_ary_push(ary_values, rb_str_new("", 0));
|
482
510
|
}
|
483
511
|
|
@@ -493,15 +521,13 @@ nl_node_attrs(VALUE self)
|
|
493
521
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
494
522
|
VALUE rb_hash = rb_hash_new();
|
495
523
|
|
496
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
497
|
-
{
|
524
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
498
525
|
return rb_hash;
|
499
526
|
}
|
500
527
|
|
501
528
|
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
502
529
|
|
503
|
-
while (attr != NULL)
|
504
|
-
{
|
530
|
+
while (attr != NULL) {
|
505
531
|
size_t tmp_len;
|
506
532
|
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
507
533
|
VALUE rb_key = rb_utf8_str_new((const char *)tmp, tmp_len);
|
@@ -535,11 +561,9 @@ static VALUE
|
|
535
561
|
nl_node_previous_element(VALUE self)
|
536
562
|
{
|
537
563
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
538
|
-
while (node->prev != NULL)
|
539
|
-
{
|
564
|
+
while (node->prev != NULL) {
|
540
565
|
node = node->prev;
|
541
|
-
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
542
|
-
{
|
566
|
+
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
543
567
|
return nl_rb_node_create(node, nl_rb_document_get(self));
|
544
568
|
}
|
545
569
|
}
|
@@ -557,11 +581,9 @@ static VALUE
|
|
557
581
|
nl_node_next_element(VALUE self)
|
558
582
|
{
|
559
583
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
560
|
-
while (node->next != NULL)
|
561
|
-
{
|
584
|
+
while (node->next != NULL) {
|
562
585
|
node = node->next;
|
563
|
-
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
564
|
-
{
|
586
|
+
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
565
587
|
return nl_rb_node_create(node, nl_rb_document_get(self));
|
566
588
|
}
|
567
589
|
}
|
@@ -575,8 +597,7 @@ nl_node_children(VALUE self)
|
|
575
597
|
lxb_dom_node_t *child = node->first_child;
|
576
598
|
lexbor_array_t *array = lexbor_array_create();
|
577
599
|
|
578
|
-
while (child != NULL)
|
579
|
-
{
|
600
|
+
while (child != NULL) {
|
580
601
|
lexbor_array_push(array, child);
|
581
602
|
child = child->next;
|
582
603
|
}
|
@@ -619,8 +640,7 @@ nl_node_equals(VALUE self, VALUE other)
|
|
619
640
|
const lxb_char_t *
|
620
641
|
lxb_dom_node_name_qualified(lxb_dom_node_t *node, size_t *len)
|
621
642
|
{
|
622
|
-
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
623
|
-
{
|
643
|
+
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
624
644
|
return lxb_dom_element_qualified_name(lxb_dom_interface_element(node),
|
625
645
|
len);
|
626
646
|
}
|
@@ -642,18 +662,15 @@ nl_node_parse_fragment(lxb_dom_document_t *doc, lxb_char_t *html, size_t size)
|
|
642
662
|
size_t tag_name_len;
|
643
663
|
lxb_html_document_t *html_doc = lxb_html_interface_document(doc);
|
644
664
|
const lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(html_doc), LXB_TAG__UNDEF, &tag_name_len);
|
645
|
-
if (tag_name == NULL)
|
646
|
-
{
|
665
|
+
if (tag_name == NULL) {
|
647
666
|
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
648
667
|
}
|
649
668
|
lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
|
650
|
-
if (element == NULL)
|
651
|
-
{
|
669
|
+
if (element == NULL) {
|
652
670
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
653
671
|
}
|
654
672
|
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(html_doc, element, html, size);
|
655
|
-
if (frag_root == NULL)
|
656
|
-
{
|
673
|
+
if (frag_root == NULL) {
|
657
674
|
rb_raise(rb_eArgError, "Error parsing HTML");
|
658
675
|
}
|
659
676
|
return frag_root;
|
@@ -677,39 +694,34 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
|
677
694
|
lxb_dom_document_t *doc = node->owner_document;
|
678
695
|
|
679
696
|
int insert_after;
|
680
|
-
if (rb_eql(rb_String(next_or_previous), rb_str_new_literal("next")))
|
681
|
-
{
|
697
|
+
if (rb_eql(rb_String(next_or_previous), rb_str_new_literal("next"))) {
|
682
698
|
insert_after = 1;
|
683
|
-
}
|
684
|
-
else if (rb_eql(rb_String(next_or_previous), rb_str_new_literal("previous")))
|
685
|
-
{
|
699
|
+
} else if (rb_eql(rb_String(next_or_previous), rb_str_new_literal("previous"))) {
|
686
700
|
insert_after = 0;
|
687
|
-
}
|
688
|
-
else
|
689
|
-
{
|
701
|
+
} else {
|
690
702
|
rb_raise(rb_eArgError, "Unsupported inserting position");
|
691
703
|
}
|
692
704
|
|
693
|
-
if (TYPE(new) == T_STRING)
|
694
|
-
{
|
705
|
+
if (TYPE(new) == T_STRING) {
|
695
706
|
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
707
|
+
lexbor_array_t *array = lexbor_array_create();
|
696
708
|
|
697
|
-
while (frag_root->first_child != NULL)
|
698
|
-
{
|
709
|
+
while (frag_root->first_child != NULL) {
|
699
710
|
lxb_dom_node_t *child = frag_root->first_child;
|
700
711
|
lxb_dom_node_remove(child);
|
701
712
|
insert_after ? lxb_dom_node_insert_after(node, child) : lxb_dom_node_insert_before(node, child);
|
713
|
+
lexbor_array_push(array, child);
|
702
714
|
}
|
703
715
|
lxb_dom_node_destroy(frag_root);
|
704
|
-
|
705
|
-
|
706
|
-
{
|
716
|
+
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
717
|
+
|
718
|
+
} else if (rb_obj_is_kind_of(new, cNokolexborNode)) {
|
707
719
|
lxb_dom_node_t *node_new = nl_rb_node_unwrap(new);
|
708
720
|
lxb_dom_node_remove(node_new);
|
709
721
|
insert_after ? lxb_dom_node_insert_after(node, node_new) : lxb_dom_node_insert_before(node, node_new);
|
710
|
-
|
711
|
-
|
712
|
-
{
|
722
|
+
return new;
|
723
|
+
|
724
|
+
} else {
|
713
725
|
rb_raise(rb_eArgError, "Unsupported node type");
|
714
726
|
}
|
715
727
|
return Qnil;
|
@@ -721,26 +733,26 @@ nl_node_add_child(VALUE self, VALUE new)
|
|
721
733
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
722
734
|
lxb_dom_document_t *doc = node->owner_document;
|
723
735
|
|
724
|
-
if (TYPE(new) == T_STRING)
|
725
|
-
{
|
736
|
+
if (TYPE(new) == T_STRING) {
|
726
737
|
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
738
|
+
lexbor_array_t *array = lexbor_array_create();
|
727
739
|
|
728
|
-
while (frag_root->first_child != NULL)
|
729
|
-
{
|
740
|
+
while (frag_root->first_child != NULL) {
|
730
741
|
lxb_dom_node_t *child = frag_root->first_child;
|
731
742
|
lxb_dom_node_remove(child);
|
732
743
|
lxb_dom_node_insert_child(node, child);
|
744
|
+
lexbor_array_push(array, child);
|
733
745
|
}
|
734
746
|
lxb_dom_node_destroy(frag_root);
|
735
|
-
|
736
|
-
|
737
|
-
{
|
747
|
+
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
748
|
+
|
749
|
+
} else if (rb_obj_is_kind_of(new, cNokolexborNode)) {
|
738
750
|
lxb_dom_node_t *node_new = nl_rb_node_unwrap(new);
|
739
751
|
lxb_dom_node_remove(node_new);
|
740
752
|
lxb_dom_node_insert_child(node, node_new);
|
741
|
-
|
742
|
-
|
743
|
-
{
|
753
|
+
return new;
|
754
|
+
|
755
|
+
} else {
|
744
756
|
rb_raise(rb_eArgError, "Unsupported node type");
|
745
757
|
}
|
746
758
|
return Qnil;
|
@@ -758,12 +770,10 @@ nl_node_first_element_child(VALUE self)
|
|
758
770
|
lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
|
759
771
|
lxb_dom_node_t *cur;
|
760
772
|
|
761
|
-
if (parent == NULL)
|
762
|
-
{
|
773
|
+
if (parent == NULL) {
|
763
774
|
return Qnil;
|
764
775
|
}
|
765
|
-
switch (parent->type)
|
766
|
-
{
|
776
|
+
switch (parent->type) {
|
767
777
|
case LXB_DOM_NODE_TYPE_ELEMENT:
|
768
778
|
case LXB_DOM_NODE_TYPE_ENTITY:
|
769
779
|
case LXB_DOM_NODE_TYPE_DOCUMENT:
|
@@ -772,10 +782,8 @@ nl_node_first_element_child(VALUE self)
|
|
772
782
|
default:
|
773
783
|
return Qnil;
|
774
784
|
}
|
775
|
-
while (cur != NULL)
|
776
|
-
|
777
|
-
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
778
|
-
{
|
785
|
+
while (cur != NULL) {
|
786
|
+
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
779
787
|
return nl_rb_node_create(cur, nl_rb_document_get(self));
|
780
788
|
}
|
781
789
|
cur = cur->next;
|
@@ -789,12 +797,10 @@ nl_node_last_element_child(VALUE self)
|
|
789
797
|
lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
|
790
798
|
lxb_dom_node_t *cur;
|
791
799
|
|
792
|
-
if (parent == NULL)
|
793
|
-
{
|
800
|
+
if (parent == NULL) {
|
794
801
|
return Qnil;
|
795
802
|
}
|
796
|
-
switch (parent->type)
|
797
|
-
{
|
803
|
+
switch (parent->type) {
|
798
804
|
case LXB_DOM_NODE_TYPE_ELEMENT:
|
799
805
|
case LXB_DOM_NODE_TYPE_ENTITY:
|
800
806
|
case LXB_DOM_NODE_TYPE_DOCUMENT:
|
@@ -803,10 +809,8 @@ nl_node_last_element_child(VALUE self)
|
|
803
809
|
default:
|
804
810
|
return Qnil;
|
805
811
|
}
|
806
|
-
while (cur != NULL)
|
807
|
-
|
808
|
-
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
809
|
-
{
|
812
|
+
while (cur != NULL) {
|
813
|
+
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
810
814
|
return nl_rb_node_create(cur, nl_rb_document_get(self));
|
811
815
|
}
|
812
816
|
cur = cur->prev;
|
@@ -827,16 +831,20 @@ void Init_nl_node(void)
|
|
827
831
|
cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
|
828
832
|
rb_undef_alloc_func(cNokolexborNode);
|
829
833
|
|
834
|
+
cNokolexborElement = rb_define_class_under(mNokolexbor, "Element", cNokolexborNode);
|
835
|
+
cNokolexborCharacterData = rb_define_class_under(mNokolexbor, "CharacterData", cNokolexborNode);
|
836
|
+
|
830
837
|
rb_define_singleton_method(cNokolexborNode, "new", nl_node_new, -1);
|
831
838
|
rb_define_method(cNokolexborNode, "content", nl_node_content, 0);
|
839
|
+
rb_define_method(cNokolexborNode, "content=", nl_node_content_set, 1);
|
832
840
|
rb_define_method(cNokolexborNode, "[]", nl_node_get_attr, 1);
|
833
841
|
rb_define_method(cNokolexborNode, "[]=", nl_node_set_attr, 2);
|
834
842
|
rb_define_method(cNokolexborNode, "remove_attr", nl_node_remove_attr, 1);
|
835
843
|
rb_define_method(cNokolexborNode, "==", nl_node_equals, 1);
|
836
844
|
rb_define_method(cNokolexborNode, "css_impl", nl_node_css, 1);
|
837
845
|
rb_define_method(cNokolexborNode, "at_css_impl", nl_node_at_css, 1);
|
838
|
-
rb_define_method(cNokolexborNode, "inner_html", nl_node_inner_html,
|
839
|
-
rb_define_method(cNokolexborNode, "outer_html", nl_node_outer_html,
|
846
|
+
rb_define_method(cNokolexborNode, "inner_html", nl_node_inner_html, -1);
|
847
|
+
rb_define_method(cNokolexborNode, "outer_html", nl_node_outer_html, -1);
|
840
848
|
rb_define_method(cNokolexborNode, "key?", nl_node_has_key, 1);
|
841
849
|
rb_define_method(cNokolexborNode, "keys", nl_node_keys, 0);
|
842
850
|
rb_define_method(cNokolexborNode, "values", nl_node_values, 0);
|
@@ -860,12 +868,17 @@ void Init_nl_node(void)
|
|
860
868
|
rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
|
861
869
|
|
862
870
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
871
|
+
rb_define_alias(cNokolexborNode, "get_attribute", "[]");
|
863
872
|
rb_define_alias(cNokolexborNode, "set_attr", "[]=");
|
873
|
+
rb_define_alias(cNokolexborNode, "set_attribute", "[]=");
|
874
|
+
rb_define_alias(cNokolexborNode, "has_attribute?", "key?");
|
864
875
|
rb_define_alias(cNokolexborNode, "delete", "remove_attr");
|
876
|
+
rb_define_alias(cNokolexborNode, "remove_attribute", "remove_attr");
|
865
877
|
rb_define_alias(cNokolexborNode, "text", "content");
|
866
878
|
rb_define_alias(cNokolexborNode, "inner_text", "content");
|
867
879
|
rb_define_alias(cNokolexborNode, "to_str", "content");
|
868
880
|
rb_define_alias(cNokolexborNode, "to_html", "outer_html");
|
881
|
+
rb_define_alias(cNokolexborNode, "serialize", "outer_html");
|
869
882
|
rb_define_alias(cNokolexborNode, "to_s", "outer_html");
|
870
883
|
rb_define_alias(cNokolexborNode, "unlink", "remove");
|
871
884
|
rb_define_alias(cNokolexborNode, "type", "node_type");
|