nokolexbor 0.2.6 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/memory.c +6 -6
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +186 -173
- data/ext/nokolexbor/nl_node_set.c +35 -70
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +17 -26
- data/ext/nokolexbor/nokolexbor.c +7 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/lib/nokolexbor/document.rb +92 -1
- data/lib/nokolexbor/node.rb +64 -0
- data/lib/nokolexbor/node_set.rb +6 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +1 -1
- metadata +7 -4
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -7,32 +7,58 @@
|
|
7
7
|
|
8
8
|
extern VALUE mNokolexbor;
|
9
9
|
extern VALUE cNokolexborDocument;
|
10
|
+
extern VALUE cNokolexborText;
|
11
|
+
extern VALUE cNokolexborComment;
|
10
12
|
extern VALUE cNokolexborNodeSet;
|
11
13
|
extern VALUE eLexborError;
|
12
14
|
VALUE cNokolexborNode;
|
15
|
+
VALUE cNokolexborElement;
|
16
|
+
VALUE cNokolexborCharacterData;
|
13
17
|
|
14
18
|
extern rb_data_type_t nl_document_type;
|
15
19
|
|
16
|
-
static const rb_data_type_t nl_node_type = {
|
17
|
-
"Nokolexbor::Node",
|
18
|
-
{
|
19
|
-
0,
|
20
|
-
0,
|
21
|
-
},
|
22
|
-
0,
|
23
|
-
0,
|
24
|
-
RUBY_TYPED_FREE_IMMEDIATELY,
|
25
|
-
};
|
26
|
-
|
27
20
|
VALUE
|
28
21
|
nl_rb_node_create(lxb_dom_node_t *node, VALUE rb_document)
|
29
22
|
{
|
30
|
-
if (node == NULL)
|
31
|
-
{
|
23
|
+
if (node == NULL) {
|
32
24
|
rb_raise(rb_eArgError, "Cannot create Nokolexbor::Node with null pointer");
|
33
25
|
}
|
34
26
|
|
35
|
-
VALUE
|
27
|
+
VALUE rb_class;
|
28
|
+
switch (node->type) {
|
29
|
+
case LXB_DOM_NODE_TYPE_ELEMENT:
|
30
|
+
rb_class = cNokolexborElement;
|
31
|
+
break;
|
32
|
+
// case LXB_DOM_NODE_TYPE_ATTRIBUTE:
|
33
|
+
// break;
|
34
|
+
case LXB_DOM_NODE_TYPE_TEXT:
|
35
|
+
rb_class = cNokolexborText;
|
36
|
+
break;
|
37
|
+
case LXB_DOM_NODE_TYPE_CDATA_SECTION:
|
38
|
+
rb_class = cNokolexborCharacterData;
|
39
|
+
break;
|
40
|
+
// case LXB_DOM_NODE_TYPE_ENTITY_REFERENCE:
|
41
|
+
// break;
|
42
|
+
// case LXB_DOM_NODE_TYPE_ENTITY:
|
43
|
+
// break;
|
44
|
+
// case LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION:
|
45
|
+
// break;
|
46
|
+
case LXB_DOM_NODE_TYPE_COMMENT:
|
47
|
+
rb_class = cNokolexborComment;
|
48
|
+
break;
|
49
|
+
// case LXB_DOM_NODE_TYPE_DOCUMENT:
|
50
|
+
// break;
|
51
|
+
// case LXB_DOM_NODE_TYPE_DOCUMENT_TYPE:
|
52
|
+
// break;
|
53
|
+
// case LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT:
|
54
|
+
// break;
|
55
|
+
// case LXB_DOM_NODE_TYPE_NOTATION:
|
56
|
+
// break;
|
57
|
+
default:
|
58
|
+
rb_class = cNokolexborNode;
|
59
|
+
}
|
60
|
+
|
61
|
+
VALUE ret = Data_Wrap_Struct(rb_class, NULL, NULL, node);
|
36
62
|
rb_iv_set(ret, "@document", rb_document);
|
37
63
|
return ret;
|
38
64
|
}
|
@@ -41,13 +67,10 @@ inline lxb_dom_node_t *
|
|
41
67
|
nl_rb_node_unwrap(VALUE rb_node)
|
42
68
|
{
|
43
69
|
lxb_dom_node_t *node;
|
44
|
-
if (
|
45
|
-
{
|
70
|
+
if (rb_obj_is_kind_of(rb_node, cNokolexborDocument)) {
|
46
71
|
TypedData_Get_Struct(rb_node, lxb_dom_node_t, &nl_document_type, node);
|
47
|
-
}
|
48
|
-
|
49
|
-
{
|
50
|
-
TypedData_Get_Struct(rb_node, lxb_dom_node_t, &nl_node_type, node);
|
72
|
+
} else {
|
73
|
+
Data_Get_Struct(rb_node, lxb_dom_node_t, node);
|
51
74
|
}
|
52
75
|
return node;
|
53
76
|
}
|
@@ -62,23 +85,22 @@ nl_node_new(int argc, VALUE *argv, VALUE klass)
|
|
62
85
|
|
63
86
|
rb_scan_args(argc, argv, "2*", &rb_name, &rb_document, &rest);
|
64
87
|
|
65
|
-
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument))
|
66
|
-
{
|
88
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
67
89
|
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
68
90
|
}
|
69
91
|
|
70
92
|
document = nl_rb_document_unwrap(rb_document);
|
71
93
|
|
72
|
-
|
73
|
-
|
74
|
-
|
94
|
+
const char* c_name = StringValuePtr(rb_name);
|
95
|
+
size_t name_len = RSTRING_LEN(rb_name);
|
96
|
+
lxb_dom_element_t *element = lxb_dom_document_create_element(document, (const lxb_char_t *)c_name, name_len, NULL);
|
97
|
+
if (element == NULL) {
|
75
98
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
76
99
|
}
|
77
100
|
|
78
101
|
VALUE rb_node = nl_rb_node_create(&element->node, rb_document);
|
79
102
|
|
80
|
-
if (rb_block_given_p())
|
81
|
-
{
|
103
|
+
if (rb_block_given_p()) {
|
82
104
|
rb_yield(rb_node);
|
83
105
|
}
|
84
106
|
|
@@ -92,8 +114,7 @@ nl_node_content(VALUE self)
|
|
92
114
|
|
93
115
|
size_t str_len = 0;
|
94
116
|
lxb_char_t *text = lxb_dom_node_text_content(node, &str_len);
|
95
|
-
if (text == NULL)
|
96
|
-
{
|
117
|
+
if (text == NULL) {
|
97
118
|
return rb_str_new("", 0);
|
98
119
|
}
|
99
120
|
VALUE rb_str = rb_utf8_str_new((char *)text, str_len);
|
@@ -102,13 +123,26 @@ nl_node_content(VALUE self)
|
|
102
123
|
return rb_str;
|
103
124
|
}
|
104
125
|
|
126
|
+
static VALUE
|
127
|
+
nl_node_content_set(VALUE self, VALUE content)
|
128
|
+
{
|
129
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
130
|
+
|
131
|
+
const char *c_content = StringValuePtr(content);
|
132
|
+
size_t content_len = RSTRING_LEN(content);
|
133
|
+
lxb_status_t status = lxb_dom_node_text_content_set(node, (const lxb_char_t *)c_content, content_len);
|
134
|
+
if (status != LXB_STATUS_OK) {
|
135
|
+
nl_raise_lexbor_error(status);
|
136
|
+
}
|
137
|
+
return content;
|
138
|
+
}
|
139
|
+
|
105
140
|
static VALUE
|
106
141
|
nl_node_get_attr(VALUE self, VALUE rb_attr)
|
107
142
|
{
|
108
143
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
109
144
|
|
110
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
111
|
-
{
|
145
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
112
146
|
return Qnil;
|
113
147
|
}
|
114
148
|
|
@@ -118,8 +152,7 @@ nl_node_get_attr(VALUE self, VALUE rb_attr)
|
|
118
152
|
|
119
153
|
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
120
154
|
|
121
|
-
if (!lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len))
|
122
|
-
{
|
155
|
+
if (!lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len)) {
|
123
156
|
return Qnil;
|
124
157
|
}
|
125
158
|
|
@@ -134,8 +167,7 @@ nl_node_set_attr(VALUE self, VALUE rb_attr, VALUE rb_value)
|
|
134
167
|
{
|
135
168
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
136
169
|
|
137
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
138
|
-
{
|
170
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
139
171
|
return Qnil;
|
140
172
|
}
|
141
173
|
|
@@ -159,8 +191,7 @@ nl_node_remove_attr(VALUE self, VALUE rb_attr)
|
|
159
191
|
{
|
160
192
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
161
193
|
|
162
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
163
|
-
{
|
194
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
164
195
|
return Qnil;
|
165
196
|
}
|
166
197
|
|
@@ -179,8 +210,7 @@ nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *sp
|
|
179
210
|
{
|
180
211
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
181
212
|
lxb_status_t status = lexbor_array_push_unique(array, node);
|
182
|
-
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED)
|
183
|
-
{
|
213
|
+
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED) {
|
184
214
|
nl_raise_lexbor_error(status);
|
185
215
|
}
|
186
216
|
// Stop at first result
|
@@ -192,8 +222,7 @@ nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec,
|
|
192
222
|
{
|
193
223
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
194
224
|
lxb_status_t status = lexbor_array_push_unique(array, node);
|
195
|
-
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED)
|
196
|
-
{
|
225
|
+
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED) {
|
197
226
|
nl_raise_lexbor_error(status);
|
198
227
|
}
|
199
228
|
return LXB_STATUS_OK;
|
@@ -215,32 +244,28 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
215
244
|
/* Create CSS parser. */
|
216
245
|
parser = lxb_css_parser_create();
|
217
246
|
status = lxb_css_parser_init(parser, NULL, NULL);
|
218
|
-
if (status != LXB_STATUS_OK)
|
219
|
-
{
|
247
|
+
if (status != LXB_STATUS_OK) {
|
220
248
|
goto cleanup;
|
221
249
|
}
|
222
250
|
|
223
251
|
/* Selectors. */
|
224
252
|
selectors = lxb_selectors_create();
|
225
253
|
status = lxb_selectors_init(selectors);
|
226
|
-
if (status != LXB_STATUS_OK)
|
227
|
-
{
|
254
|
+
if (status != LXB_STATUS_OK) {
|
228
255
|
goto cleanup;
|
229
256
|
}
|
230
257
|
|
231
258
|
/* Parse and get the log. */
|
232
259
|
// TODO: Cache the list for reuse, improves performance
|
233
260
|
list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
|
234
|
-
if (parser->status != LXB_STATUS_OK)
|
235
|
-
{
|
261
|
+
if (parser->status != LXB_STATUS_OK) {
|
236
262
|
status = parser->status;
|
237
263
|
goto cleanup;
|
238
264
|
}
|
239
265
|
|
240
266
|
/* Find HTML nodes by CSS Selectors. */
|
241
267
|
status = lxb_selectors_find(selectors, node, list, cb, ctx);
|
242
|
-
if (status != LXB_STATUS_OK)
|
243
|
-
{
|
268
|
+
if (status != LXB_STATUS_OK) {
|
244
269
|
goto cleanup;
|
245
270
|
}
|
246
271
|
|
@@ -263,22 +288,16 @@ mark_node_orders(lxb_dom_node_t *root)
|
|
263
288
|
size_t count = 1;
|
264
289
|
root->user = (void *)count;
|
265
290
|
lxb_dom_node_t *node = root;
|
266
|
-
do
|
267
|
-
|
268
|
-
if (node->first_child != NULL)
|
269
|
-
{
|
291
|
+
do {
|
292
|
+
if (node->first_child != NULL) {
|
270
293
|
node = node->first_child;
|
271
294
|
node->user = (void *)++count;
|
272
|
-
}
|
273
|
-
|
274
|
-
{
|
275
|
-
while (node != root && node->next == NULL)
|
276
|
-
{
|
295
|
+
} else {
|
296
|
+
while (node != root && node->next == NULL) {
|
277
297
|
node = node->parent;
|
278
298
|
}
|
279
299
|
|
280
|
-
if (node == root)
|
281
|
-
{
|
300
|
+
if (node == root) {
|
282
301
|
break;
|
283
302
|
}
|
284
303
|
|
@@ -293,21 +312,17 @@ mark_node_orders(lxb_dom_node_t *root)
|
|
293
312
|
void nl_sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array)
|
294
313
|
{
|
295
314
|
// No need to sort if there's only one selector, the results are natually in document traversal order
|
296
|
-
if (strchr(RSTRING_PTR(selector), ',') != NULL)
|
297
|
-
{
|
315
|
+
if (strchr(RSTRING_PTR(selector), ',') != NULL) {
|
298
316
|
int need_order = 0;
|
299
317
|
// Check if we have already markded orders, note that
|
300
318
|
// we need to order again if new nodes are added to the document
|
301
|
-
for (size_t i = 0; i < array->length; i++)
|
302
|
-
|
303
|
-
if (((lxb_dom_node_t *)array->list[i])->user == 0)
|
304
|
-
{
|
319
|
+
for (size_t i = 0; i < array->length; i++) {
|
320
|
+
if (((lxb_dom_node_t *)array->list[i])->user == 0) {
|
305
321
|
need_order = 1;
|
306
322
|
break;
|
307
323
|
}
|
308
324
|
}
|
309
|
-
if (need_order)
|
310
|
-
{
|
325
|
+
if (need_order) {
|
311
326
|
mark_node_orders(&doc->node);
|
312
327
|
}
|
313
328
|
nl_css_result_tim_sort((lxb_dom_node_t **)&array->list[0], array->length);
|
@@ -322,14 +337,12 @@ nl_node_at_css(VALUE self, VALUE selector)
|
|
322
337
|
|
323
338
|
lxb_status_t status = nl_node_find(self, selector, nl_node_at_css_callback, array);
|
324
339
|
|
325
|
-
if (status != LXB_STATUS_OK)
|
326
|
-
{
|
340
|
+
if (status != LXB_STATUS_OK) {
|
327
341
|
lexbor_array_destroy(array, true);
|
328
342
|
nl_raise_lexbor_error(status);
|
329
343
|
}
|
330
344
|
|
331
|
-
if (array->length == 0)
|
332
|
-
{
|
345
|
+
if (array->length == 0) {
|
333
346
|
lexbor_array_destroy(array, true);
|
334
347
|
return Qnil;
|
335
348
|
}
|
@@ -350,8 +363,7 @@ nl_node_css(VALUE self, VALUE selector)
|
|
350
363
|
lexbor_array_t *array = lexbor_array_create();
|
351
364
|
|
352
365
|
lxb_status_t status = nl_node_find(self, selector, nl_node_css_callback, array);
|
353
|
-
if (status != LXB_STATUS_OK)
|
354
|
-
{
|
366
|
+
if (status != LXB_STATUS_OK) {
|
355
367
|
lexbor_array_destroy(array, true);
|
356
368
|
nl_raise_lexbor_error(status);
|
357
369
|
}
|
@@ -362,22 +374,34 @@ nl_node_css(VALUE self, VALUE selector)
|
|
362
374
|
}
|
363
375
|
|
364
376
|
static VALUE
|
365
|
-
nl_node_inner_html(VALUE self)
|
377
|
+
nl_node_inner_html(int argc, VALUE *argv, VALUE self)
|
366
378
|
{
|
367
379
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
368
380
|
lexbor_str_t str = {0};
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
381
|
+
VALUE options;
|
382
|
+
lxb_status_t status;
|
383
|
+
size_t indent = 0;
|
384
|
+
rb_scan_args(argc, argv, "01", &options);
|
385
|
+
|
386
|
+
if (TYPE(options) == T_HASH) {
|
387
|
+
VALUE rb_indent = rb_hash_aref(options, ID2SYM(rb_intern("indent")));
|
388
|
+
if (!NIL_P(rb_indent)) {
|
389
|
+
indent = NUM2INT(rb_indent);
|
390
|
+
}
|
391
|
+
}
|
392
|
+
if (indent > 0) {
|
393
|
+
status = lxb_html_serialize_pretty_deep_str(node, 0, 0, &str);
|
394
|
+
} else {
|
395
|
+
status = lxb_html_serialize_deep_str(node, &str);
|
396
|
+
}
|
397
|
+
if (status != LXB_STATUS_OK) {
|
398
|
+
if (str.data != NULL) {
|
374
399
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
375
400
|
}
|
376
401
|
nl_raise_lexbor_error(status);
|
377
402
|
}
|
378
403
|
|
379
|
-
if (str.data != NULL)
|
380
|
-
{
|
404
|
+
if (str.data != NULL) {
|
381
405
|
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
382
406
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
383
407
|
return ret;
|
@@ -387,22 +411,34 @@ nl_node_inner_html(VALUE self)
|
|
387
411
|
}
|
388
412
|
|
389
413
|
static VALUE
|
390
|
-
nl_node_outer_html(VALUE self)
|
414
|
+
nl_node_outer_html(int argc, VALUE *argv, VALUE self)
|
391
415
|
{
|
392
416
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
393
417
|
lexbor_str_t str = {0};
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
418
|
+
VALUE options;
|
419
|
+
lxb_status_t status;
|
420
|
+
size_t indent = 0;
|
421
|
+
rb_scan_args(argc, argv, "01", &options);
|
422
|
+
|
423
|
+
if (TYPE(options) == T_HASH) {
|
424
|
+
VALUE rb_indent = rb_hash_aref(options, ID2SYM(rb_intern("indent")));
|
425
|
+
if (!NIL_P(rb_indent)) {
|
426
|
+
indent = NUM2INT(rb_indent);
|
427
|
+
}
|
428
|
+
}
|
429
|
+
if (indent > 0) {
|
430
|
+
status = lxb_html_serialize_pretty_tree_str(node, 0, 0, &str);
|
431
|
+
} else {
|
432
|
+
status = lxb_html_serialize_tree_str(node, &str);
|
433
|
+
}
|
434
|
+
if (status != LXB_STATUS_OK) {
|
435
|
+
if (str.data != NULL) {
|
399
436
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
400
437
|
}
|
401
438
|
nl_raise_lexbor_error(status);
|
402
439
|
}
|
403
440
|
|
404
|
-
if (str.data != NULL)
|
405
|
-
{
|
441
|
+
if (str.data != NULL) {
|
406
442
|
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
407
443
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
408
444
|
return ret;
|
@@ -416,8 +452,7 @@ nl_node_has_key(VALUE self, VALUE rb_attr)
|
|
416
452
|
{
|
417
453
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
418
454
|
|
419
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
420
|
-
{
|
455
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
421
456
|
return Qfalse;
|
422
457
|
}
|
423
458
|
|
@@ -436,15 +471,13 @@ nl_node_keys(VALUE self)
|
|
436
471
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
437
472
|
VALUE ary_keys = rb_ary_new();
|
438
473
|
|
439
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
440
|
-
{
|
474
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
441
475
|
return ary_keys;
|
442
476
|
}
|
443
477
|
|
444
478
|
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
445
479
|
|
446
|
-
while (attr != NULL)
|
447
|
-
{
|
480
|
+
while (attr != NULL) {
|
448
481
|
size_t tmp_len;
|
449
482
|
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
450
483
|
rb_ary_push(ary_keys, rb_utf8_str_new((const char *)tmp, tmp_len));
|
@@ -461,23 +494,18 @@ nl_node_values(VALUE self)
|
|
461
494
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
462
495
|
VALUE ary_values = rb_ary_new();
|
463
496
|
|
464
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
465
|
-
{
|
497
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
466
498
|
return ary_values;
|
467
499
|
}
|
468
500
|
|
469
501
|
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
470
502
|
|
471
|
-
while (attr != NULL)
|
472
|
-
{
|
503
|
+
while (attr != NULL) {
|
473
504
|
size_t tmp_len;
|
474
505
|
const lxb_char_t *tmp = lxb_dom_attr_value(attr, &tmp_len);
|
475
|
-
if (tmp != NULL)
|
476
|
-
{
|
506
|
+
if (tmp != NULL) {
|
477
507
|
rb_ary_push(ary_values, rb_utf8_str_new((const char *)tmp, tmp_len));
|
478
|
-
}
|
479
|
-
else
|
480
|
-
{
|
508
|
+
} else {
|
481
509
|
rb_ary_push(ary_values, rb_str_new("", 0));
|
482
510
|
}
|
483
511
|
|
@@ -493,15 +521,13 @@ nl_node_attrs(VALUE self)
|
|
493
521
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
494
522
|
VALUE rb_hash = rb_hash_new();
|
495
523
|
|
496
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT)
|
497
|
-
{
|
524
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
498
525
|
return rb_hash;
|
499
526
|
}
|
500
527
|
|
501
528
|
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
502
529
|
|
503
|
-
while (attr != NULL)
|
504
|
-
{
|
530
|
+
while (attr != NULL) {
|
505
531
|
size_t tmp_len;
|
506
532
|
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
507
533
|
VALUE rb_key = rb_utf8_str_new((const char *)tmp, tmp_len);
|
@@ -535,11 +561,9 @@ static VALUE
|
|
535
561
|
nl_node_previous_element(VALUE self)
|
536
562
|
{
|
537
563
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
538
|
-
while (node->prev != NULL)
|
539
|
-
{
|
564
|
+
while (node->prev != NULL) {
|
540
565
|
node = node->prev;
|
541
|
-
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
542
|
-
{
|
566
|
+
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
543
567
|
return nl_rb_node_create(node, nl_rb_document_get(self));
|
544
568
|
}
|
545
569
|
}
|
@@ -557,11 +581,9 @@ static VALUE
|
|
557
581
|
nl_node_next_element(VALUE self)
|
558
582
|
{
|
559
583
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
560
|
-
while (node->next != NULL)
|
561
|
-
{
|
584
|
+
while (node->next != NULL) {
|
562
585
|
node = node->next;
|
563
|
-
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
564
|
-
{
|
586
|
+
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
565
587
|
return nl_rb_node_create(node, nl_rb_document_get(self));
|
566
588
|
}
|
567
589
|
}
|
@@ -575,8 +597,7 @@ nl_node_children(VALUE self)
|
|
575
597
|
lxb_dom_node_t *child = node->first_child;
|
576
598
|
lexbor_array_t *array = lexbor_array_create();
|
577
599
|
|
578
|
-
while (child != NULL)
|
579
|
-
{
|
600
|
+
while (child != NULL) {
|
580
601
|
lexbor_array_push(array, child);
|
581
602
|
child = child->next;
|
582
603
|
}
|
@@ -619,8 +640,7 @@ nl_node_equals(VALUE self, VALUE other)
|
|
619
640
|
const lxb_char_t *
|
620
641
|
lxb_dom_node_name_qualified(lxb_dom_node_t *node, size_t *len)
|
621
642
|
{
|
622
|
-
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
623
|
-
{
|
643
|
+
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
624
644
|
return lxb_dom_element_qualified_name(lxb_dom_interface_element(node),
|
625
645
|
len);
|
626
646
|
}
|
@@ -642,18 +662,15 @@ nl_node_parse_fragment(lxb_dom_document_t *doc, lxb_char_t *html, size_t size)
|
|
642
662
|
size_t tag_name_len;
|
643
663
|
lxb_html_document_t *html_doc = lxb_html_interface_document(doc);
|
644
664
|
const lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(html_doc), LXB_TAG__UNDEF, &tag_name_len);
|
645
|
-
if (tag_name == NULL)
|
646
|
-
{
|
665
|
+
if (tag_name == NULL) {
|
647
666
|
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
648
667
|
}
|
649
668
|
lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
|
650
|
-
if (element == NULL)
|
651
|
-
{
|
669
|
+
if (element == NULL) {
|
652
670
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
653
671
|
}
|
654
672
|
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(html_doc, element, html, size);
|
655
|
-
if (frag_root == NULL)
|
656
|
-
{
|
673
|
+
if (frag_root == NULL) {
|
657
674
|
rb_raise(rb_eArgError, "Error parsing HTML");
|
658
675
|
}
|
659
676
|
return frag_root;
|
@@ -677,39 +694,34 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
|
677
694
|
lxb_dom_document_t *doc = node->owner_document;
|
678
695
|
|
679
696
|
int insert_after;
|
680
|
-
if (rb_eql(rb_String(next_or_previous), rb_str_new_literal("next")))
|
681
|
-
{
|
697
|
+
if (rb_eql(rb_String(next_or_previous), rb_str_new_literal("next"))) {
|
682
698
|
insert_after = 1;
|
683
|
-
}
|
684
|
-
else if (rb_eql(rb_String(next_or_previous), rb_str_new_literal("previous")))
|
685
|
-
{
|
699
|
+
} else if (rb_eql(rb_String(next_or_previous), rb_str_new_literal("previous"))) {
|
686
700
|
insert_after = 0;
|
687
|
-
}
|
688
|
-
else
|
689
|
-
{
|
701
|
+
} else {
|
690
702
|
rb_raise(rb_eArgError, "Unsupported inserting position");
|
691
703
|
}
|
692
704
|
|
693
|
-
if (TYPE(new) == T_STRING)
|
694
|
-
{
|
705
|
+
if (TYPE(new) == T_STRING) {
|
695
706
|
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
707
|
+
lexbor_array_t *array = lexbor_array_create();
|
696
708
|
|
697
|
-
while (frag_root->first_child != NULL)
|
698
|
-
{
|
709
|
+
while (frag_root->first_child != NULL) {
|
699
710
|
lxb_dom_node_t *child = frag_root->first_child;
|
700
711
|
lxb_dom_node_remove(child);
|
701
712
|
insert_after ? lxb_dom_node_insert_after(node, child) : lxb_dom_node_insert_before(node, child);
|
713
|
+
lexbor_array_push(array, child);
|
702
714
|
}
|
703
715
|
lxb_dom_node_destroy(frag_root);
|
704
|
-
|
705
|
-
|
706
|
-
{
|
716
|
+
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
717
|
+
|
718
|
+
} else if (rb_obj_is_kind_of(new, cNokolexborNode)) {
|
707
719
|
lxb_dom_node_t *node_new = nl_rb_node_unwrap(new);
|
708
720
|
lxb_dom_node_remove(node_new);
|
709
721
|
insert_after ? lxb_dom_node_insert_after(node, node_new) : lxb_dom_node_insert_before(node, node_new);
|
710
|
-
|
711
|
-
|
712
|
-
{
|
722
|
+
return new;
|
723
|
+
|
724
|
+
} else {
|
713
725
|
rb_raise(rb_eArgError, "Unsupported node type");
|
714
726
|
}
|
715
727
|
return Qnil;
|
@@ -721,26 +733,26 @@ nl_node_add_child(VALUE self, VALUE new)
|
|
721
733
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
722
734
|
lxb_dom_document_t *doc = node->owner_document;
|
723
735
|
|
724
|
-
if (TYPE(new) == T_STRING)
|
725
|
-
{
|
736
|
+
if (TYPE(new) == T_STRING) {
|
726
737
|
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
738
|
+
lexbor_array_t *array = lexbor_array_create();
|
727
739
|
|
728
|
-
while (frag_root->first_child != NULL)
|
729
|
-
{
|
740
|
+
while (frag_root->first_child != NULL) {
|
730
741
|
lxb_dom_node_t *child = frag_root->first_child;
|
731
742
|
lxb_dom_node_remove(child);
|
732
743
|
lxb_dom_node_insert_child(node, child);
|
744
|
+
lexbor_array_push(array, child);
|
733
745
|
}
|
734
746
|
lxb_dom_node_destroy(frag_root);
|
735
|
-
|
736
|
-
|
737
|
-
{
|
747
|
+
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
748
|
+
|
749
|
+
} else if (rb_obj_is_kind_of(new, cNokolexborNode)) {
|
738
750
|
lxb_dom_node_t *node_new = nl_rb_node_unwrap(new);
|
739
751
|
lxb_dom_node_remove(node_new);
|
740
752
|
lxb_dom_node_insert_child(node, node_new);
|
741
|
-
|
742
|
-
|
743
|
-
{
|
753
|
+
return new;
|
754
|
+
|
755
|
+
} else {
|
744
756
|
rb_raise(rb_eArgError, "Unsupported node type");
|
745
757
|
}
|
746
758
|
return Qnil;
|
@@ -758,12 +770,10 @@ nl_node_first_element_child(VALUE self)
|
|
758
770
|
lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
|
759
771
|
lxb_dom_node_t *cur;
|
760
772
|
|
761
|
-
if (parent == NULL)
|
762
|
-
{
|
773
|
+
if (parent == NULL) {
|
763
774
|
return Qnil;
|
764
775
|
}
|
765
|
-
switch (parent->type)
|
766
|
-
{
|
776
|
+
switch (parent->type) {
|
767
777
|
case LXB_DOM_NODE_TYPE_ELEMENT:
|
768
778
|
case LXB_DOM_NODE_TYPE_ENTITY:
|
769
779
|
case LXB_DOM_NODE_TYPE_DOCUMENT:
|
@@ -772,10 +782,8 @@ nl_node_first_element_child(VALUE self)
|
|
772
782
|
default:
|
773
783
|
return Qnil;
|
774
784
|
}
|
775
|
-
while (cur != NULL)
|
776
|
-
|
777
|
-
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
778
|
-
{
|
785
|
+
while (cur != NULL) {
|
786
|
+
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
779
787
|
return nl_rb_node_create(cur, nl_rb_document_get(self));
|
780
788
|
}
|
781
789
|
cur = cur->next;
|
@@ -789,12 +797,10 @@ nl_node_last_element_child(VALUE self)
|
|
789
797
|
lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
|
790
798
|
lxb_dom_node_t *cur;
|
791
799
|
|
792
|
-
if (parent == NULL)
|
793
|
-
{
|
800
|
+
if (parent == NULL) {
|
794
801
|
return Qnil;
|
795
802
|
}
|
796
|
-
switch (parent->type)
|
797
|
-
{
|
803
|
+
switch (parent->type) {
|
798
804
|
case LXB_DOM_NODE_TYPE_ELEMENT:
|
799
805
|
case LXB_DOM_NODE_TYPE_ENTITY:
|
800
806
|
case LXB_DOM_NODE_TYPE_DOCUMENT:
|
@@ -803,10 +809,8 @@ nl_node_last_element_child(VALUE self)
|
|
803
809
|
default:
|
804
810
|
return Qnil;
|
805
811
|
}
|
806
|
-
while (cur != NULL)
|
807
|
-
|
808
|
-
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT)
|
809
|
-
{
|
812
|
+
while (cur != NULL) {
|
813
|
+
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
810
814
|
return nl_rb_node_create(cur, nl_rb_document_get(self));
|
811
815
|
}
|
812
816
|
cur = cur->prev;
|
@@ -827,16 +831,20 @@ void Init_nl_node(void)
|
|
827
831
|
cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
|
828
832
|
rb_undef_alloc_func(cNokolexborNode);
|
829
833
|
|
834
|
+
cNokolexborElement = rb_define_class_under(mNokolexbor, "Element", cNokolexborNode);
|
835
|
+
cNokolexborCharacterData = rb_define_class_under(mNokolexbor, "CharacterData", cNokolexborNode);
|
836
|
+
|
830
837
|
rb_define_singleton_method(cNokolexborNode, "new", nl_node_new, -1);
|
831
838
|
rb_define_method(cNokolexborNode, "content", nl_node_content, 0);
|
839
|
+
rb_define_method(cNokolexborNode, "content=", nl_node_content_set, 1);
|
832
840
|
rb_define_method(cNokolexborNode, "[]", nl_node_get_attr, 1);
|
833
841
|
rb_define_method(cNokolexborNode, "[]=", nl_node_set_attr, 2);
|
834
842
|
rb_define_method(cNokolexborNode, "remove_attr", nl_node_remove_attr, 1);
|
835
843
|
rb_define_method(cNokolexborNode, "==", nl_node_equals, 1);
|
836
844
|
rb_define_method(cNokolexborNode, "css_impl", nl_node_css, 1);
|
837
845
|
rb_define_method(cNokolexborNode, "at_css_impl", nl_node_at_css, 1);
|
838
|
-
rb_define_method(cNokolexborNode, "inner_html", nl_node_inner_html,
|
839
|
-
rb_define_method(cNokolexborNode, "outer_html", nl_node_outer_html,
|
846
|
+
rb_define_method(cNokolexborNode, "inner_html", nl_node_inner_html, -1);
|
847
|
+
rb_define_method(cNokolexborNode, "outer_html", nl_node_outer_html, -1);
|
840
848
|
rb_define_method(cNokolexborNode, "key?", nl_node_has_key, 1);
|
841
849
|
rb_define_method(cNokolexborNode, "keys", nl_node_keys, 0);
|
842
850
|
rb_define_method(cNokolexborNode, "values", nl_node_values, 0);
|
@@ -860,12 +868,17 @@ void Init_nl_node(void)
|
|
860
868
|
rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
|
861
869
|
|
862
870
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
871
|
+
rb_define_alias(cNokolexborNode, "get_attribute", "[]");
|
863
872
|
rb_define_alias(cNokolexborNode, "set_attr", "[]=");
|
873
|
+
rb_define_alias(cNokolexborNode, "set_attribute", "[]=");
|
874
|
+
rb_define_alias(cNokolexborNode, "has_attribute?", "key?");
|
864
875
|
rb_define_alias(cNokolexborNode, "delete", "remove_attr");
|
876
|
+
rb_define_alias(cNokolexborNode, "remove_attribute", "remove_attr");
|
865
877
|
rb_define_alias(cNokolexborNode, "text", "content");
|
866
878
|
rb_define_alias(cNokolexborNode, "inner_text", "content");
|
867
879
|
rb_define_alias(cNokolexborNode, "to_str", "content");
|
868
880
|
rb_define_alias(cNokolexborNode, "to_html", "outer_html");
|
881
|
+
rb_define_alias(cNokolexborNode, "serialize", "outer_html");
|
869
882
|
rb_define_alias(cNokolexborNode, "to_s", "outer_html");
|
870
883
|
rb_define_alias(cNokolexborNode, "unlink", "remove");
|
871
884
|
rb_define_alias(cNokolexborNode, "type", "node_type");
|