nokolexbor 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/extconf.rb +12 -6
- data/ext/nokolexbor/memory.c +7 -0
- data/ext/nokolexbor/nl_document.c +23 -5
- data/ext/nokolexbor/nl_node.c +95 -64
- data/ext/nokolexbor/nl_node_set.c +57 -22
- data/ext/nokolexbor/nl_xpath_context.c +82 -17
- data/ext/nokolexbor/nokolexbor.h +2 -0
- data/ext/nokolexbor/xml_error.c +225 -27
- data/lib/nokolexbor/node.rb +2 -2
- data/lib/nokolexbor/node_set.rb +5 -1
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor/xpath.rb +69 -0
- data/lib/nokolexbor.rb +1 -0
- data/patches/0003-lexbor-attach-template-content-to-self.patch +13 -0
- metadata +21 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4f935c5e81aed7b5d964e7332a9abeaa1f44dec7e2532f01c89dfd17df14ded
|
4
|
+
data.tar.gz: 91c58aca5a9b16a3cfa7e2436dfb86feeb777daa3615b8b3a95b234981cc67d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af6ae7b07bde270ea275a5b21221c7e90923f195a6a62ed7b85174f1d9f31f017661dc50c7d8dd1c3d39096a5c1ecbb3469f4496f2e628a3b7026f82ca3994c5
|
7
|
+
data.tar.gz: eea3a38dcbd81cdb7c54996248d6eaa2d2c51b964d79d3131ba04594dd449a141b32b225bc7a879844c92418f3c04e6b63c77c9c6bef86f8ee619fa6a47b03a3
|
data/ext/nokolexbor/extconf.rb
CHANGED
@@ -1,16 +1,22 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
require 'timeout'
|
3
3
|
|
4
|
-
# For debugging
|
5
|
-
# CONFIG["optflags"] = "-O0"
|
6
|
-
# CONFIG["debugflags"] = "-ggdb3"
|
7
|
-
|
8
4
|
cmake_flags = [ ENV["CMAKE_FLAGS"] ]
|
9
5
|
cmake_flags << "-DLEXBOR_BUILD_TESTS_CPP=OFF"
|
10
6
|
cmake_flags << "-DLEXBOR_BUILD_SHARED=OFF"
|
11
7
|
cmake_flags << "-DLEXBOR_BUILD_STATIC=ON"
|
12
|
-
|
13
|
-
|
8
|
+
|
9
|
+
if ENV['NOKOLEXBOR_DEBUG'] || ENV['NOKOLEXBOR_ASAN']
|
10
|
+
CONFIG["optflags"] = "-O0"
|
11
|
+
CONFIG["debugflags"] = "-ggdb3"
|
12
|
+
cmake_flags << "-DLEXBOR_OPTIMIZATION_LEVEL='-O0 -g'"
|
13
|
+
end
|
14
|
+
|
15
|
+
if ENV['NOKOLEXBOR_ASAN']
|
16
|
+
$LDFLAGS << " -fsanitize=address"
|
17
|
+
$CFLAGS << " -fsanitize=address -DNOKOLEXBOR_ASAN"
|
18
|
+
cmake_flags << "-DLEXBOR_BUILD_WITH_ASAN=ON"
|
19
|
+
end
|
14
20
|
|
15
21
|
append_cflags("-DLEXBOR_STATIC")
|
16
22
|
append_cflags("-DLIBXML_STATIC")
|
data/ext/nokolexbor/memory.c
CHANGED
@@ -13,6 +13,11 @@
|
|
13
13
|
#include <ruby.h>
|
14
14
|
#include "lexbor/core/base.h"
|
15
15
|
|
16
|
+
// Disable using ruby memory functions when ASAN is enabled,
|
17
|
+
// otherwise memory leak info will be all about ruby which
|
18
|
+
// is useless.
|
19
|
+
#ifndef NOKOLEXBOR_ASAN
|
20
|
+
|
16
21
|
void *
|
17
22
|
lexbor_malloc(size_t size)
|
18
23
|
{
|
@@ -37,3 +42,5 @@ lexbor_free(void *dst)
|
|
37
42
|
ruby_xfree(dst);
|
38
43
|
return NULL;
|
39
44
|
}
|
45
|
+
|
46
|
+
#endif
|
@@ -5,7 +5,7 @@ extern VALUE cNokolexborNode;
|
|
5
5
|
VALUE cNokolexborDocument;
|
6
6
|
|
7
7
|
static void
|
8
|
-
free_nl_document(
|
8
|
+
free_nl_document(lxb_html_document_t *document)
|
9
9
|
{
|
10
10
|
lxb_html_document_destroy(document);
|
11
11
|
}
|
@@ -22,10 +22,20 @@ const rb_data_type_t nl_document_type = {
|
|
22
22
|
};
|
23
23
|
|
24
24
|
static VALUE
|
25
|
-
nl_document_parse(VALUE self, VALUE
|
25
|
+
nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
26
26
|
{
|
27
|
+
VALUE id_read = rb_intern("read");
|
28
|
+
VALUE rb_html;
|
29
|
+
if (rb_respond_to(rb_string_or_io, id_read))
|
30
|
+
{
|
31
|
+
rb_html = rb_funcall(rb_string_or_io, id_read, 0);
|
32
|
+
}
|
33
|
+
else
|
34
|
+
{
|
35
|
+
rb_html = rb_string_or_io;
|
36
|
+
}
|
27
37
|
const char *html_c = StringValuePtr(rb_html);
|
28
|
-
|
38
|
+
size_t html_len = RSTRING_LEN(rb_html);
|
29
39
|
|
30
40
|
lxb_html_document_t *document;
|
31
41
|
|
@@ -35,13 +45,13 @@ nl_document_parse(VALUE self, VALUE rb_html)
|
|
35
45
|
rb_raise(rb_eRuntimeError, "Error creating document");
|
36
46
|
}
|
37
47
|
|
38
|
-
lxb_status_t status = lxb_html_document_parse(document, html_c, html_len);
|
48
|
+
lxb_status_t status = lxb_html_document_parse(document, (const lxb_char_t *)html_c, html_len);
|
39
49
|
if (status != LXB_STATUS_OK)
|
40
50
|
{
|
41
51
|
nl_raise_lexbor_error(status);
|
42
52
|
}
|
43
53
|
|
44
|
-
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type,
|
54
|
+
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
|
45
55
|
}
|
46
56
|
|
47
57
|
static VALUE
|
@@ -50,6 +60,14 @@ nl_document_new(VALUE self)
|
|
50
60
|
return nl_document_parse(self, rb_str_new("", 0));
|
51
61
|
}
|
52
62
|
|
63
|
+
lxb_dom_document_t *
|
64
|
+
nl_rb_document_unwrap(VALUE rb_doc)
|
65
|
+
{
|
66
|
+
lxb_dom_document_t *doc;
|
67
|
+
TypedData_Get_Struct(rb_doc, lxb_dom_document_t, &nl_document_type, doc);
|
68
|
+
return doc;
|
69
|
+
}
|
70
|
+
|
53
71
|
void Init_nl_document(void)
|
54
72
|
{
|
55
73
|
cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -43,7 +43,7 @@ nl_rb_node_unwrap(VALUE rb_node)
|
|
43
43
|
lxb_dom_node_t *node;
|
44
44
|
if (rb_obj_class(rb_node) == cNokolexborDocument)
|
45
45
|
{
|
46
|
-
TypedData_Get_Struct(rb_node,
|
46
|
+
TypedData_Get_Struct(rb_node, lxb_dom_node_t, &nl_document_type, node);
|
47
47
|
}
|
48
48
|
else
|
49
49
|
{
|
@@ -56,7 +56,6 @@ static VALUE
|
|
56
56
|
nl_node_new(int argc, VALUE *argv, VALUE klass)
|
57
57
|
{
|
58
58
|
lxb_dom_document_t *document;
|
59
|
-
lxb_dom_node_t *node;
|
60
59
|
VALUE rb_name;
|
61
60
|
VALUE rb_document;
|
62
61
|
VALUE rest;
|
@@ -68,9 +67,9 @@ nl_node_new(int argc, VALUE *argv, VALUE klass)
|
|
68
67
|
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
69
68
|
}
|
70
69
|
|
71
|
-
|
70
|
+
document = nl_rb_document_unwrap(rb_document);
|
72
71
|
|
73
|
-
lxb_dom_element_t *element = lxb_dom_document_create_element(document, StringValueCStr(rb_name), RSTRING_LEN(rb_name), NULL);
|
72
|
+
lxb_dom_element_t *element = lxb_dom_document_create_element(document, (const lxb_char_t *)StringValueCStr(rb_name), RSTRING_LEN(rb_name), NULL);
|
74
73
|
if (element == NULL)
|
75
74
|
{
|
76
75
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
@@ -97,7 +96,7 @@ nl_node_content(VALUE self)
|
|
97
96
|
{
|
98
97
|
return rb_str_new("", 0);
|
99
98
|
}
|
100
|
-
VALUE rb_str = rb_utf8_str_new(text, str_len);
|
99
|
+
VALUE rb_str = rb_utf8_str_new((char *)text, str_len);
|
101
100
|
lxb_dom_document_destroy_text(node->owner_document, text);
|
102
101
|
|
103
102
|
return rb_str;
|
@@ -115,19 +114,19 @@ nl_node_get_attr(VALUE self, VALUE rb_attr)
|
|
115
114
|
|
116
115
|
VALUE rb_attr_s = rb_String(rb_attr);
|
117
116
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
118
|
-
|
117
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
119
118
|
|
120
|
-
lxb_dom_element_t *element =
|
119
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
121
120
|
|
122
|
-
if (!lxb_dom_element_has_attribute(element, attr_c, attr_len))
|
121
|
+
if (!lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len))
|
123
122
|
{
|
124
123
|
return Qnil;
|
125
124
|
}
|
126
125
|
|
127
126
|
size_t attr_value_len;
|
128
|
-
|
127
|
+
const lxb_char_t *attr_value = lxb_dom_element_get_attribute(element, (const lxb_char_t *)attr_c, attr_len, &attr_value_len);
|
129
128
|
|
130
|
-
return rb_utf8_str_new(attr_value, attr_value_len);
|
129
|
+
return rb_utf8_str_new((const char *)attr_value, attr_value_len);
|
131
130
|
}
|
132
131
|
|
133
132
|
static VALUE
|
@@ -144,13 +143,13 @@ nl_node_set_attr(VALUE self, VALUE rb_attr, VALUE rb_value)
|
|
144
143
|
VALUE rb_value_s = rb_String(rb_value);
|
145
144
|
|
146
145
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
147
|
-
|
146
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
148
147
|
const char *value_c = RSTRING_PTR(rb_value_s);
|
149
|
-
|
148
|
+
size_t value_len = RSTRING_LEN(rb_value_s);
|
150
149
|
|
151
|
-
lxb_dom_element_t *element =
|
150
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
152
151
|
|
153
|
-
lxb_dom_element_set_attribute(element, attr_c, attr_len, value_c, value_len);
|
152
|
+
lxb_dom_element_set_attribute(element, (const lxb_char_t *)attr_c, attr_len, (const lxb_char_t *)value_c, value_len);
|
154
153
|
|
155
154
|
return rb_value;
|
156
155
|
}
|
@@ -168,14 +167,14 @@ nl_node_remove_attr(VALUE self, VALUE rb_attr)
|
|
168
167
|
VALUE rb_attr_s = rb_String(rb_attr);
|
169
168
|
|
170
169
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
171
|
-
|
170
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
172
171
|
|
173
|
-
lxb_dom_element_t *element =
|
172
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
174
173
|
|
175
|
-
return lxb_dom_element_remove_attribute(element, attr_c, attr_len) == LXB_STATUS_OK ? Qtrue : Qfalse;
|
174
|
+
return lxb_dom_element_remove_attribute(element, (const lxb_char_t *)attr_c, attr_len) == LXB_STATUS_OK ? Qtrue : Qfalse;
|
176
175
|
}
|
177
176
|
|
178
|
-
|
177
|
+
lxb_status_t
|
179
178
|
nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx)
|
180
179
|
{
|
181
180
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
@@ -188,7 +187,7 @@ nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *sp
|
|
188
187
|
return LXB_STATUS_STOP;
|
189
188
|
}
|
190
189
|
|
191
|
-
|
190
|
+
lxb_status_t
|
192
191
|
nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx)
|
193
192
|
{
|
194
193
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
@@ -200,45 +199,52 @@ nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec,
|
|
200
199
|
return LXB_STATUS_OK;
|
201
200
|
}
|
202
201
|
|
203
|
-
|
202
|
+
lxb_status_t
|
204
203
|
nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
205
204
|
{
|
206
205
|
const char *selector_c = StringValuePtr(selector);
|
207
|
-
|
206
|
+
size_t selector_len = RSTRING_LEN(selector);
|
208
207
|
|
209
208
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
210
209
|
|
210
|
+
lxb_status_t status;
|
211
|
+
lxb_css_parser_t *parser = NULL;
|
212
|
+
lxb_selectors_t *selectors = NULL;
|
213
|
+
lxb_css_selector_list_t *list = NULL;
|
214
|
+
|
211
215
|
/* Create CSS parser. */
|
212
|
-
|
213
|
-
|
216
|
+
parser = lxb_css_parser_create();
|
217
|
+
status = lxb_css_parser_init(parser, NULL, NULL);
|
214
218
|
if (status != LXB_STATUS_OK)
|
215
219
|
{
|
216
|
-
|
220
|
+
goto cleanup;
|
217
221
|
}
|
218
222
|
|
219
223
|
/* Selectors. */
|
220
|
-
|
224
|
+
selectors = lxb_selectors_create();
|
221
225
|
status = lxb_selectors_init(selectors);
|
222
226
|
if (status != LXB_STATUS_OK)
|
223
227
|
{
|
224
|
-
|
228
|
+
goto cleanup;
|
225
229
|
}
|
226
230
|
|
227
231
|
/* Parse and get the log. */
|
228
232
|
// TODO: Cache the list for reuse, improves performance
|
229
|
-
|
233
|
+
list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
|
230
234
|
if (parser->status != LXB_STATUS_OK)
|
231
235
|
{
|
232
|
-
|
236
|
+
status = parser->status;
|
237
|
+
goto cleanup;
|
233
238
|
}
|
234
239
|
|
235
240
|
/* Find HTML nodes by CSS Selectors. */
|
236
241
|
status = lxb_selectors_find(selectors, node, list, cb, ctx);
|
237
242
|
if (status != LXB_STATUS_OK)
|
238
243
|
{
|
239
|
-
|
244
|
+
goto cleanup;
|
240
245
|
}
|
241
246
|
|
247
|
+
cleanup:
|
242
248
|
/* Destroy Selectors object. */
|
243
249
|
(void)lxb_selectors_destroy(selectors, true);
|
244
250
|
|
@@ -247,20 +253,22 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
247
253
|
|
248
254
|
/* Destroy all object for all CSS Selector List. */
|
249
255
|
lxb_css_selector_list_destroy_memory(list);
|
256
|
+
|
257
|
+
return status;
|
250
258
|
}
|
251
259
|
|
252
260
|
static void
|
253
261
|
mark_node_orders(lxb_dom_node_t *root)
|
254
262
|
{
|
255
|
-
|
256
|
-
root->user = count;
|
263
|
+
size_t count = 1;
|
264
|
+
root->user = (void *)count;
|
257
265
|
lxb_dom_node_t *node = root;
|
258
266
|
do
|
259
267
|
{
|
260
268
|
if (node->first_child != NULL)
|
261
269
|
{
|
262
270
|
node = node->first_child;
|
263
|
-
node->user = ++count;
|
271
|
+
node->user = (void *)++count;
|
264
272
|
}
|
265
273
|
else
|
266
274
|
{
|
@@ -275,7 +283,7 @@ mark_node_orders(lxb_dom_node_t *root)
|
|
275
283
|
}
|
276
284
|
|
277
285
|
node = node->next;
|
278
|
-
node->user = ++count;
|
286
|
+
node->user = (void *)++count;
|
279
287
|
}
|
280
288
|
|
281
289
|
} while (true);
|
@@ -290,7 +298,7 @@ void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_arr
|
|
290
298
|
int need_order = 0;
|
291
299
|
// Check if we have already markded orders, note that
|
292
300
|
// we need to order again if new nodes are added to the document
|
293
|
-
for (
|
301
|
+
for (size_t i = 0; i < array->length; i++)
|
294
302
|
{
|
295
303
|
if (((lxb_dom_node_t *)array->list[i])->user == 0)
|
296
304
|
{
|
@@ -300,37 +308,53 @@ void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_arr
|
|
300
308
|
}
|
301
309
|
if (need_order)
|
302
310
|
{
|
303
|
-
mark_node_orders(doc);
|
311
|
+
mark_node_orders(&doc->node);
|
304
312
|
}
|
305
|
-
css_result_tim_sort(&array->list[0], array->length);
|
313
|
+
css_result_tim_sort((lxb_dom_node_t **)&array->list[0], array->length);
|
306
314
|
}
|
307
315
|
}
|
308
316
|
|
309
|
-
VALUE
|
317
|
+
static VALUE
|
310
318
|
nl_node_at_css(VALUE self, VALUE selector)
|
311
319
|
{
|
312
320
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
313
321
|
lexbor_array_t *array = lexbor_array_create();
|
314
322
|
|
315
|
-
nl_node_find(self, selector, nl_node_at_css_callback, array);
|
323
|
+
lxb_status_t status = nl_node_find(self, selector, nl_node_at_css_callback, array);
|
324
|
+
|
325
|
+
if (status != LXB_STATUS_OK)
|
326
|
+
{
|
327
|
+
lexbor_array_destroy(array, true);
|
328
|
+
nl_raise_lexbor_error(status);
|
329
|
+
}
|
316
330
|
|
317
331
|
if (array->length == 0)
|
318
332
|
{
|
333
|
+
lexbor_array_destroy(array, true);
|
319
334
|
return Qnil;
|
320
335
|
}
|
321
336
|
|
322
337
|
sort_nodes_if_necessary(selector, node->owner_document, array);
|
323
338
|
|
324
|
-
|
339
|
+
VALUE ret = nl_rb_node_create(array->list[0], nl_rb_document_get(self));
|
340
|
+
|
341
|
+
lexbor_array_destroy(array, true);
|
342
|
+
|
343
|
+
return ret;
|
325
344
|
}
|
326
345
|
|
327
|
-
VALUE
|
346
|
+
static VALUE
|
328
347
|
nl_node_css(VALUE self, VALUE selector)
|
329
348
|
{
|
330
349
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
331
350
|
lexbor_array_t *array = lexbor_array_create();
|
332
351
|
|
333
|
-
nl_node_find(self, selector, nl_node_css_callback, array);
|
352
|
+
lxb_status_t status = nl_node_find(self, selector, nl_node_css_callback, array);
|
353
|
+
if (status != LXB_STATUS_OK)
|
354
|
+
{
|
355
|
+
lexbor_array_destroy(array, true);
|
356
|
+
nl_raise_lexbor_error(status);
|
357
|
+
}
|
334
358
|
|
335
359
|
sort_nodes_if_necessary(selector, node->owner_document, array);
|
336
360
|
|
@@ -354,7 +378,7 @@ nl_node_inner_html(VALUE self)
|
|
354
378
|
|
355
379
|
if (str.data != NULL)
|
356
380
|
{
|
357
|
-
VALUE ret = rb_utf8_str_new(str.data, str.length);
|
381
|
+
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
358
382
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
359
383
|
return ret;
|
360
384
|
}
|
@@ -379,7 +403,7 @@ nl_node_outer_html(VALUE self)
|
|
379
403
|
|
380
404
|
if (str.data != NULL)
|
381
405
|
{
|
382
|
-
VALUE ret = rb_utf8_str_new(str.data, str.length);
|
406
|
+
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
383
407
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
384
408
|
return ret;
|
385
409
|
}
|
@@ -399,11 +423,11 @@ nl_node_has_key(VALUE self, VALUE rb_attr)
|
|
399
423
|
|
400
424
|
VALUE rb_attr_s = rb_String(rb_attr);
|
401
425
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
402
|
-
|
426
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
403
427
|
|
404
|
-
lxb_dom_element_t *element =
|
428
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
405
429
|
|
406
|
-
return lxb_dom_element_has_attribute(element, attr_c, attr_len) ? Qtrue : Qfalse;
|
430
|
+
return lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len) ? Qtrue : Qfalse;
|
407
431
|
}
|
408
432
|
|
409
433
|
static VALUE
|
@@ -417,13 +441,13 @@ nl_node_keys(VALUE self)
|
|
417
441
|
return ary_keys;
|
418
442
|
}
|
419
443
|
|
420
|
-
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(
|
444
|
+
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
421
445
|
|
422
446
|
while (attr != NULL)
|
423
447
|
{
|
424
448
|
size_t tmp_len;
|
425
|
-
lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
426
|
-
rb_ary_push(ary_keys, rb_utf8_str_new(tmp, tmp_len));
|
449
|
+
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
450
|
+
rb_ary_push(ary_keys, rb_utf8_str_new((const char *)tmp, tmp_len));
|
427
451
|
|
428
452
|
attr = lxb_dom_element_next_attribute(attr);
|
429
453
|
}
|
@@ -442,15 +466,19 @@ nl_node_values(VALUE self)
|
|
442
466
|
return ary_values;
|
443
467
|
}
|
444
468
|
|
445
|
-
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(
|
469
|
+
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
446
470
|
|
447
471
|
while (attr != NULL)
|
448
472
|
{
|
449
473
|
size_t tmp_len;
|
450
|
-
lxb_char_t *tmp = lxb_dom_attr_value(attr, &tmp_len);
|
474
|
+
const lxb_char_t *tmp = lxb_dom_attr_value(attr, &tmp_len);
|
451
475
|
if (tmp != NULL)
|
452
476
|
{
|
453
|
-
rb_ary_push(ary_values, rb_utf8_str_new(tmp, tmp_len));
|
477
|
+
rb_ary_push(ary_values, rb_utf8_str_new((const char *)tmp, tmp_len));
|
478
|
+
}
|
479
|
+
else
|
480
|
+
{
|
481
|
+
rb_ary_push(ary_values, rb_str_new("", 0));
|
454
482
|
}
|
455
483
|
|
456
484
|
attr = lxb_dom_element_next_attribute(attr);
|
@@ -470,16 +498,16 @@ nl_node_attrs(VALUE self)
|
|
470
498
|
return rb_hash;
|
471
499
|
}
|
472
500
|
|
473
|
-
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(
|
501
|
+
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
474
502
|
|
475
503
|
while (attr != NULL)
|
476
504
|
{
|
477
505
|
size_t tmp_len;
|
478
|
-
lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
479
|
-
VALUE rb_key = rb_utf8_str_new(tmp, tmp_len);
|
506
|
+
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
507
|
+
VALUE rb_key = rb_utf8_str_new((const char *)tmp, tmp_len);
|
480
508
|
|
481
509
|
tmp = lxb_dom_attr_value(attr, &tmp_len);
|
482
|
-
VALUE rb_value = tmp != NULL ? rb_utf8_str_new(tmp, tmp_len) :
|
510
|
+
VALUE rb_value = tmp != NULL ? rb_utf8_str_new((const char *)tmp, tmp_len) : rb_str_new("", 0);
|
483
511
|
|
484
512
|
rb_hash_aset(rb_hash, rb_key, rb_value);
|
485
513
|
|
@@ -604,15 +632,16 @@ nl_node_name(VALUE self)
|
|
604
632
|
{
|
605
633
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
606
634
|
size_t len;
|
607
|
-
lxb_char_t *name = lxb_dom_node_name_qualified(node, &len);
|
608
|
-
return rb_utf8_str_new(name, len);
|
635
|
+
const lxb_char_t *name = lxb_dom_node_name_qualified(node, &len);
|
636
|
+
return rb_utf8_str_new((const char *)name, len);
|
609
637
|
}
|
610
638
|
|
611
639
|
static lxb_dom_node_t *
|
612
|
-
nl_node_parse_fragment(
|
640
|
+
nl_node_parse_fragment(lxb_dom_document_t *doc, lxb_char_t *html, size_t size)
|
613
641
|
{
|
614
642
|
size_t tag_name_len;
|
615
|
-
|
643
|
+
lxb_html_document_t *html_doc = lxb_html_interface_document(doc);
|
644
|
+
const lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(html_doc), LXB_TAG__UNDEF, &tag_name_len);
|
616
645
|
if (tag_name == NULL)
|
617
646
|
{
|
618
647
|
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
@@ -622,7 +651,7 @@ nl_node_parse_fragment(lxb_html_document_t *doc, lxb_char_t *html, size_t size)
|
|
622
651
|
{
|
623
652
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
624
653
|
}
|
625
|
-
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(
|
654
|
+
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(html_doc, element, html, size);
|
626
655
|
if (frag_root == NULL)
|
627
656
|
{
|
628
657
|
rb_raise(rb_eArgError, "Error parsing HTML");
|
@@ -637,7 +666,7 @@ nl_node_fragment(VALUE self, VALUE html)
|
|
637
666
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
638
667
|
lxb_dom_document_t *doc = node->owner_document;
|
639
668
|
|
640
|
-
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(html), RSTRING_LEN(html));
|
669
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(html), RSTRING_LEN(html));
|
641
670
|
return nl_rb_node_create(frag_root, nl_rb_document_get(self));
|
642
671
|
}
|
643
672
|
|
@@ -663,7 +692,7 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
|
663
692
|
|
664
693
|
if (TYPE(new) == T_STRING)
|
665
694
|
{
|
666
|
-
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
695
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
667
696
|
|
668
697
|
while (frag_root->first_child != NULL)
|
669
698
|
{
|
@@ -694,7 +723,7 @@ nl_node_add_child(VALUE self, VALUE new)
|
|
694
723
|
|
695
724
|
if (TYPE(new) == T_STRING)
|
696
725
|
{
|
697
|
-
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
726
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
698
727
|
|
699
728
|
while (frag_root->first_child != NULL)
|
700
729
|
{
|
@@ -832,11 +861,13 @@ void Init_nl_node(void)
|
|
832
861
|
|
833
862
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
834
863
|
rb_define_alias(cNokolexborNode, "set_attr", "[]=");
|
864
|
+
rb_define_alias(cNokolexborNode, "delete", "remove_attr");
|
835
865
|
rb_define_alias(cNokolexborNode, "text", "content");
|
836
866
|
rb_define_alias(cNokolexborNode, "inner_text", "content");
|
837
867
|
rb_define_alias(cNokolexborNode, "to_str", "content");
|
838
868
|
rb_define_alias(cNokolexborNode, "to_html", "outer_html");
|
839
869
|
rb_define_alias(cNokolexborNode, "to_s", "outer_html");
|
870
|
+
rb_define_alias(cNokolexborNode, "unlink", "remove");
|
840
871
|
rb_define_alias(cNokolexborNode, "type", "node_type");
|
841
872
|
rb_define_alias(cNokolexborNode, "dup", "clone");
|
842
873
|
}
|