nokolexbor 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/nl_document.c +12 -4
- data/ext/nokolexbor/nl_node.c +65 -55
- data/ext/nokolexbor/nl_node_set.c +43 -22
- data/ext/nokolexbor/nl_xpath_context.c +77 -17
- data/ext/nokolexbor/nokolexbor.h +2 -0
- data/ext/nokolexbor/xml_error.c +225 -27
- data/lib/nokolexbor/node.rb +2 -2
- data/lib/nokolexbor/node_set.rb +5 -1
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor/xpath.rb +69 -0
- data/lib/nokolexbor.rb +1 -0
- metadata +21 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e93c293e8d506960077c772b822d4f75103d6583b8864e56f343a8ad6229c7a
|
4
|
+
data.tar.gz: 1baaaed733eae123b895e21021709fed7a7e185a10e8c27cf0d43105e7e4a272
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 665232217ba5f1b0a53ad67dfcbb38244685fab9242bf718b1364b59c3b623e5150bdd471284c3424fb2c319761c30f1022d7792e1edf848e3c4c0a41de89806
|
7
|
+
data.tar.gz: 81cc64f435de161807ce0a6d3a0c8b0875a51f0f49307a763be8be4c70b3eefd0a5ea36fb364d2fec2e37b6b3307d549fc0f0bbd3afe35f47b6b797ea5998b96
|
@@ -5,7 +5,7 @@ extern VALUE cNokolexborNode;
|
|
5
5
|
VALUE cNokolexborDocument;
|
6
6
|
|
7
7
|
static void
|
8
|
-
free_nl_document(
|
8
|
+
free_nl_document(lxb_html_document_t *document)
|
9
9
|
{
|
10
10
|
lxb_html_document_destroy(document);
|
11
11
|
}
|
@@ -25,7 +25,7 @@ static VALUE
|
|
25
25
|
nl_document_parse(VALUE self, VALUE rb_html)
|
26
26
|
{
|
27
27
|
const char *html_c = StringValuePtr(rb_html);
|
28
|
-
|
28
|
+
size_t html_len = RSTRING_LEN(rb_html);
|
29
29
|
|
30
30
|
lxb_html_document_t *document;
|
31
31
|
|
@@ -35,13 +35,13 @@ nl_document_parse(VALUE self, VALUE rb_html)
|
|
35
35
|
rb_raise(rb_eRuntimeError, "Error creating document");
|
36
36
|
}
|
37
37
|
|
38
|
-
lxb_status_t status = lxb_html_document_parse(document, html_c, html_len);
|
38
|
+
lxb_status_t status = lxb_html_document_parse(document, (const lxb_char_t *)html_c, html_len);
|
39
39
|
if (status != LXB_STATUS_OK)
|
40
40
|
{
|
41
41
|
nl_raise_lexbor_error(status);
|
42
42
|
}
|
43
43
|
|
44
|
-
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type,
|
44
|
+
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
|
45
45
|
}
|
46
46
|
|
47
47
|
static VALUE
|
@@ -50,6 +50,14 @@ nl_document_new(VALUE self)
|
|
50
50
|
return nl_document_parse(self, rb_str_new("", 0));
|
51
51
|
}
|
52
52
|
|
53
|
+
lxb_dom_document_t *
|
54
|
+
nl_rb_document_unwrap(VALUE rb_doc)
|
55
|
+
{
|
56
|
+
lxb_dom_document_t *doc;
|
57
|
+
TypedData_Get_Struct(rb_doc, lxb_dom_document_t, &nl_document_type, doc);
|
58
|
+
return doc;
|
59
|
+
}
|
60
|
+
|
53
61
|
void Init_nl_document(void)
|
54
62
|
{
|
55
63
|
cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -43,7 +43,7 @@ nl_rb_node_unwrap(VALUE rb_node)
|
|
43
43
|
lxb_dom_node_t *node;
|
44
44
|
if (rb_obj_class(rb_node) == cNokolexborDocument)
|
45
45
|
{
|
46
|
-
TypedData_Get_Struct(rb_node,
|
46
|
+
TypedData_Get_Struct(rb_node, lxb_dom_node_t, &nl_document_type, node);
|
47
47
|
}
|
48
48
|
else
|
49
49
|
{
|
@@ -56,7 +56,6 @@ static VALUE
|
|
56
56
|
nl_node_new(int argc, VALUE *argv, VALUE klass)
|
57
57
|
{
|
58
58
|
lxb_dom_document_t *document;
|
59
|
-
lxb_dom_node_t *node;
|
60
59
|
VALUE rb_name;
|
61
60
|
VALUE rb_document;
|
62
61
|
VALUE rest;
|
@@ -68,9 +67,9 @@ nl_node_new(int argc, VALUE *argv, VALUE klass)
|
|
68
67
|
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
69
68
|
}
|
70
69
|
|
71
|
-
|
70
|
+
document = nl_rb_document_unwrap(rb_document);
|
72
71
|
|
73
|
-
lxb_dom_element_t *element = lxb_dom_document_create_element(document, StringValueCStr(rb_name), RSTRING_LEN(rb_name), NULL);
|
72
|
+
lxb_dom_element_t *element = lxb_dom_document_create_element(document, (const lxb_char_t *)StringValueCStr(rb_name), RSTRING_LEN(rb_name), NULL);
|
74
73
|
if (element == NULL)
|
75
74
|
{
|
76
75
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
@@ -97,7 +96,7 @@ nl_node_content(VALUE self)
|
|
97
96
|
{
|
98
97
|
return rb_str_new("", 0);
|
99
98
|
}
|
100
|
-
VALUE rb_str = rb_utf8_str_new(text, str_len);
|
99
|
+
VALUE rb_str = rb_utf8_str_new((char *)text, str_len);
|
101
100
|
lxb_dom_document_destroy_text(node->owner_document, text);
|
102
101
|
|
103
102
|
return rb_str;
|
@@ -115,19 +114,19 @@ nl_node_get_attr(VALUE self, VALUE rb_attr)
|
|
115
114
|
|
116
115
|
VALUE rb_attr_s = rb_String(rb_attr);
|
117
116
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
118
|
-
|
117
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
119
118
|
|
120
|
-
lxb_dom_element_t *element =
|
119
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
121
120
|
|
122
|
-
if (!lxb_dom_element_has_attribute(element, attr_c, attr_len))
|
121
|
+
if (!lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len))
|
123
122
|
{
|
124
123
|
return Qnil;
|
125
124
|
}
|
126
125
|
|
127
126
|
size_t attr_value_len;
|
128
|
-
|
127
|
+
const lxb_char_t *attr_value = lxb_dom_element_get_attribute(element, (const lxb_char_t *)attr_c, attr_len, &attr_value_len);
|
129
128
|
|
130
|
-
return rb_utf8_str_new(attr_value, attr_value_len);
|
129
|
+
return rb_utf8_str_new((const char *)attr_value, attr_value_len);
|
131
130
|
}
|
132
131
|
|
133
132
|
static VALUE
|
@@ -144,13 +143,13 @@ nl_node_set_attr(VALUE self, VALUE rb_attr, VALUE rb_value)
|
|
144
143
|
VALUE rb_value_s = rb_String(rb_value);
|
145
144
|
|
146
145
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
147
|
-
|
146
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
148
147
|
const char *value_c = RSTRING_PTR(rb_value_s);
|
149
|
-
|
148
|
+
size_t value_len = RSTRING_LEN(rb_value_s);
|
150
149
|
|
151
|
-
lxb_dom_element_t *element =
|
150
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
152
151
|
|
153
|
-
lxb_dom_element_set_attribute(element, attr_c, attr_len, value_c, value_len);
|
152
|
+
lxb_dom_element_set_attribute(element, (const lxb_char_t *)attr_c, attr_len, (const lxb_char_t *)value_c, value_len);
|
154
153
|
|
155
154
|
return rb_value;
|
156
155
|
}
|
@@ -168,14 +167,14 @@ nl_node_remove_attr(VALUE self, VALUE rb_attr)
|
|
168
167
|
VALUE rb_attr_s = rb_String(rb_attr);
|
169
168
|
|
170
169
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
171
|
-
|
170
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
172
171
|
|
173
|
-
lxb_dom_element_t *element =
|
172
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
174
173
|
|
175
|
-
return lxb_dom_element_remove_attribute(element, attr_c, attr_len) == LXB_STATUS_OK ? Qtrue : Qfalse;
|
174
|
+
return lxb_dom_element_remove_attribute(element, (const lxb_char_t *)attr_c, attr_len) == LXB_STATUS_OK ? Qtrue : Qfalse;
|
176
175
|
}
|
177
176
|
|
178
|
-
|
177
|
+
lxb_status_t
|
179
178
|
nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx)
|
180
179
|
{
|
181
180
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
@@ -188,7 +187,7 @@ nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *sp
|
|
188
187
|
return LXB_STATUS_STOP;
|
189
188
|
}
|
190
189
|
|
191
|
-
|
190
|
+
lxb_status_t
|
192
191
|
nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx)
|
193
192
|
{
|
194
193
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
@@ -200,11 +199,11 @@ nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec,
|
|
200
199
|
return LXB_STATUS_OK;
|
201
200
|
}
|
202
201
|
|
203
|
-
|
202
|
+
void
|
204
203
|
nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
205
204
|
{
|
206
205
|
const char *selector_c = StringValuePtr(selector);
|
207
|
-
|
206
|
+
size_t selector_len = RSTRING_LEN(selector);
|
208
207
|
|
209
208
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
210
209
|
|
@@ -226,7 +225,7 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
226
225
|
|
227
226
|
/* Parse and get the log. */
|
228
227
|
// TODO: Cache the list for reuse, improves performance
|
229
|
-
lxb_css_selector_list_t *list = lxb_css_selectors_parse_relative_list(parser, selector_c, selector_len);
|
228
|
+
lxb_css_selector_list_t *list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
|
230
229
|
if (parser->status != LXB_STATUS_OK)
|
231
230
|
{
|
232
231
|
nl_raise_lexbor_error(parser->status);
|
@@ -252,15 +251,15 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
252
251
|
static void
|
253
252
|
mark_node_orders(lxb_dom_node_t *root)
|
254
253
|
{
|
255
|
-
|
256
|
-
root->user = count;
|
254
|
+
size_t count = 1;
|
255
|
+
root->user = (void *)count;
|
257
256
|
lxb_dom_node_t *node = root;
|
258
257
|
do
|
259
258
|
{
|
260
259
|
if (node->first_child != NULL)
|
261
260
|
{
|
262
261
|
node = node->first_child;
|
263
|
-
node->user = ++count;
|
262
|
+
node->user = (void *)++count;
|
264
263
|
}
|
265
264
|
else
|
266
265
|
{
|
@@ -275,7 +274,7 @@ mark_node_orders(lxb_dom_node_t *root)
|
|
275
274
|
}
|
276
275
|
|
277
276
|
node = node->next;
|
278
|
-
node->user = ++count;
|
277
|
+
node->user = (void *)++count;
|
279
278
|
}
|
280
279
|
|
281
280
|
} while (true);
|
@@ -290,7 +289,7 @@ void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_arr
|
|
290
289
|
int need_order = 0;
|
291
290
|
// Check if we have already markded orders, note that
|
292
291
|
// we need to order again if new nodes are added to the document
|
293
|
-
for (
|
292
|
+
for (size_t i = 0; i < array->length; i++)
|
294
293
|
{
|
295
294
|
if (((lxb_dom_node_t *)array->list[i])->user == 0)
|
296
295
|
{
|
@@ -300,13 +299,13 @@ void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_arr
|
|
300
299
|
}
|
301
300
|
if (need_order)
|
302
301
|
{
|
303
|
-
mark_node_orders(doc);
|
302
|
+
mark_node_orders(&doc->node);
|
304
303
|
}
|
305
|
-
css_result_tim_sort(&array->list[0], array->length);
|
304
|
+
css_result_tim_sort((lxb_dom_node_t **)&array->list[0], array->length);
|
306
305
|
}
|
307
306
|
}
|
308
307
|
|
309
|
-
VALUE
|
308
|
+
static VALUE
|
310
309
|
nl_node_at_css(VALUE self, VALUE selector)
|
311
310
|
{
|
312
311
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
@@ -321,10 +320,14 @@ nl_node_at_css(VALUE self, VALUE selector)
|
|
321
320
|
|
322
321
|
sort_nodes_if_necessary(selector, node->owner_document, array);
|
323
322
|
|
324
|
-
|
323
|
+
VALUE ret = nl_rb_node_create(array->list[0], nl_rb_document_get(self));
|
324
|
+
|
325
|
+
lexbor_array_destroy(array, true);
|
326
|
+
|
327
|
+
return ret;
|
325
328
|
}
|
326
329
|
|
327
|
-
VALUE
|
330
|
+
static VALUE
|
328
331
|
nl_node_css(VALUE self, VALUE selector)
|
329
332
|
{
|
330
333
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
@@ -354,7 +357,7 @@ nl_node_inner_html(VALUE self)
|
|
354
357
|
|
355
358
|
if (str.data != NULL)
|
356
359
|
{
|
357
|
-
VALUE ret = rb_utf8_str_new(str.data, str.length);
|
360
|
+
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
358
361
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
359
362
|
return ret;
|
360
363
|
}
|
@@ -379,7 +382,7 @@ nl_node_outer_html(VALUE self)
|
|
379
382
|
|
380
383
|
if (str.data != NULL)
|
381
384
|
{
|
382
|
-
VALUE ret = rb_utf8_str_new(str.data, str.length);
|
385
|
+
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
383
386
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
384
387
|
return ret;
|
385
388
|
}
|
@@ -399,11 +402,11 @@ nl_node_has_key(VALUE self, VALUE rb_attr)
|
|
399
402
|
|
400
403
|
VALUE rb_attr_s = rb_String(rb_attr);
|
401
404
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
402
|
-
|
405
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
403
406
|
|
404
|
-
lxb_dom_element_t *element =
|
407
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
405
408
|
|
406
|
-
return lxb_dom_element_has_attribute(element, attr_c, attr_len) ? Qtrue : Qfalse;
|
409
|
+
return lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len) ? Qtrue : Qfalse;
|
407
410
|
}
|
408
411
|
|
409
412
|
static VALUE
|
@@ -417,13 +420,13 @@ nl_node_keys(VALUE self)
|
|
417
420
|
return ary_keys;
|
418
421
|
}
|
419
422
|
|
420
|
-
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(
|
423
|
+
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
421
424
|
|
422
425
|
while (attr != NULL)
|
423
426
|
{
|
424
427
|
size_t tmp_len;
|
425
|
-
lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
426
|
-
rb_ary_push(ary_keys, rb_utf8_str_new(tmp, tmp_len));
|
428
|
+
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
429
|
+
rb_ary_push(ary_keys, rb_utf8_str_new((const char *)tmp, tmp_len));
|
427
430
|
|
428
431
|
attr = lxb_dom_element_next_attribute(attr);
|
429
432
|
}
|
@@ -442,15 +445,19 @@ nl_node_values(VALUE self)
|
|
442
445
|
return ary_values;
|
443
446
|
}
|
444
447
|
|
445
|
-
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(
|
448
|
+
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
446
449
|
|
447
450
|
while (attr != NULL)
|
448
451
|
{
|
449
452
|
size_t tmp_len;
|
450
|
-
lxb_char_t *tmp = lxb_dom_attr_value(attr, &tmp_len);
|
453
|
+
const lxb_char_t *tmp = lxb_dom_attr_value(attr, &tmp_len);
|
451
454
|
if (tmp != NULL)
|
452
455
|
{
|
453
|
-
rb_ary_push(ary_values, rb_utf8_str_new(tmp, tmp_len));
|
456
|
+
rb_ary_push(ary_values, rb_utf8_str_new((const char *)tmp, tmp_len));
|
457
|
+
}
|
458
|
+
else
|
459
|
+
{
|
460
|
+
rb_ary_push(ary_values, rb_str_new("", 0));
|
454
461
|
}
|
455
462
|
|
456
463
|
attr = lxb_dom_element_next_attribute(attr);
|
@@ -470,16 +477,16 @@ nl_node_attrs(VALUE self)
|
|
470
477
|
return rb_hash;
|
471
478
|
}
|
472
479
|
|
473
|
-
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(
|
480
|
+
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
474
481
|
|
475
482
|
while (attr != NULL)
|
476
483
|
{
|
477
484
|
size_t tmp_len;
|
478
|
-
lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
479
|
-
VALUE rb_key = rb_utf8_str_new(tmp, tmp_len);
|
485
|
+
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
486
|
+
VALUE rb_key = rb_utf8_str_new((const char *)tmp, tmp_len);
|
480
487
|
|
481
488
|
tmp = lxb_dom_attr_value(attr, &tmp_len);
|
482
|
-
VALUE rb_value = tmp != NULL ? rb_utf8_str_new(tmp, tmp_len) :
|
489
|
+
VALUE rb_value = tmp != NULL ? rb_utf8_str_new((const char *)tmp, tmp_len) : rb_str_new("", 0);
|
483
490
|
|
484
491
|
rb_hash_aset(rb_hash, rb_key, rb_value);
|
485
492
|
|
@@ -604,15 +611,16 @@ nl_node_name(VALUE self)
|
|
604
611
|
{
|
605
612
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
606
613
|
size_t len;
|
607
|
-
lxb_char_t *name = lxb_dom_node_name_qualified(node, &len);
|
608
|
-
return rb_utf8_str_new(name, len);
|
614
|
+
const lxb_char_t *name = lxb_dom_node_name_qualified(node, &len);
|
615
|
+
return rb_utf8_str_new((const char *)name, len);
|
609
616
|
}
|
610
617
|
|
611
618
|
static lxb_dom_node_t *
|
612
|
-
nl_node_parse_fragment(
|
619
|
+
nl_node_parse_fragment(lxb_dom_document_t *doc, lxb_char_t *html, size_t size)
|
613
620
|
{
|
614
621
|
size_t tag_name_len;
|
615
|
-
|
622
|
+
lxb_html_document_t *html_doc = lxb_html_interface_document(doc);
|
623
|
+
const lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(html_doc), LXB_TAG__UNDEF, &tag_name_len);
|
616
624
|
if (tag_name == NULL)
|
617
625
|
{
|
618
626
|
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
@@ -622,7 +630,7 @@ nl_node_parse_fragment(lxb_html_document_t *doc, lxb_char_t *html, size_t size)
|
|
622
630
|
{
|
623
631
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
624
632
|
}
|
625
|
-
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(
|
633
|
+
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(html_doc, element, html, size);
|
626
634
|
if (frag_root == NULL)
|
627
635
|
{
|
628
636
|
rb_raise(rb_eArgError, "Error parsing HTML");
|
@@ -637,7 +645,7 @@ nl_node_fragment(VALUE self, VALUE html)
|
|
637
645
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
638
646
|
lxb_dom_document_t *doc = node->owner_document;
|
639
647
|
|
640
|
-
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(html), RSTRING_LEN(html));
|
648
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(html), RSTRING_LEN(html));
|
641
649
|
return nl_rb_node_create(frag_root, nl_rb_document_get(self));
|
642
650
|
}
|
643
651
|
|
@@ -663,7 +671,7 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
|
663
671
|
|
664
672
|
if (TYPE(new) == T_STRING)
|
665
673
|
{
|
666
|
-
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
674
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
667
675
|
|
668
676
|
while (frag_root->first_child != NULL)
|
669
677
|
{
|
@@ -694,7 +702,7 @@ nl_node_add_child(VALUE self, VALUE new)
|
|
694
702
|
|
695
703
|
if (TYPE(new) == T_STRING)
|
696
704
|
{
|
697
|
-
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
705
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
698
706
|
|
699
707
|
while (frag_root->first_child != NULL)
|
700
708
|
{
|
@@ -832,11 +840,13 @@ void Init_nl_node(void)
|
|
832
840
|
|
833
841
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
834
842
|
rb_define_alias(cNokolexborNode, "set_attr", "[]=");
|
843
|
+
rb_define_alias(cNokolexborNode, "delete", "remove_attr");
|
835
844
|
rb_define_alias(cNokolexborNode, "text", "content");
|
836
845
|
rb_define_alias(cNokolexborNode, "inner_text", "content");
|
837
846
|
rb_define_alias(cNokolexborNode, "to_str", "content");
|
838
847
|
rb_define_alias(cNokolexborNode, "to_html", "outer_html");
|
839
848
|
rb_define_alias(cNokolexborNode, "to_s", "outer_html");
|
849
|
+
rb_define_alias(cNokolexborNode, "unlink", "remove");
|
840
850
|
rb_define_alias(cNokolexborNode, "type", "node_type");
|
841
851
|
rb_define_alias(cNokolexborNode, "dup", "clone");
|
842
852
|
}
|
@@ -5,15 +5,15 @@ extern VALUE cNokolexborNode;
|
|
5
5
|
VALUE cNokolexborNodeSet;
|
6
6
|
extern rb_data_type_t nl_document_type;
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
void nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx);
|
9
|
+
void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array);
|
10
|
+
lxb_status_t nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
|
11
|
+
lxb_status_t nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
|
12
12
|
|
13
13
|
lxb_status_t
|
14
14
|
lexbor_array_push_unique(lexbor_array_t *array, void *value)
|
15
15
|
{
|
16
|
-
for (
|
16
|
+
for (size_t i = 0; i < array->length; i++)
|
17
17
|
if (array->list[i] == value)
|
18
18
|
return LXB_STATUS_STOPPED;
|
19
19
|
|
@@ -91,7 +91,7 @@ nl_node_set_delete(VALUE self, VALUE rb_node)
|
|
91
91
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
92
92
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
93
93
|
|
94
|
-
|
94
|
+
size_t i;
|
95
95
|
for (i = 0; i < array->length; i++)
|
96
96
|
if (array->list[i] == node)
|
97
97
|
{
|
@@ -113,7 +113,7 @@ nl_node_set_is_include(VALUE self, VALUE rb_node)
|
|
113
113
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
114
114
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
115
115
|
|
116
|
-
for (
|
116
|
+
for (size_t i = 0; i < array->length; i++)
|
117
117
|
if (array->list[i] == node)
|
118
118
|
{
|
119
119
|
return Qtrue;
|
@@ -169,7 +169,7 @@ nl_node_set_subseq(VALUE self, long beg, long len)
|
|
169
169
|
}
|
170
170
|
}
|
171
171
|
|
172
|
-
for (
|
172
|
+
for (long j = beg; j < beg + len; ++j)
|
173
173
|
{
|
174
174
|
lxb_status_t status = lexbor_array_push(new_array, old_array->list[j]);
|
175
175
|
if (status != LXB_STATUS_OK)
|
@@ -177,7 +177,7 @@ nl_node_set_subseq(VALUE self, long beg, long len)
|
|
177
177
|
nl_raise_lexbor_error(status);
|
178
178
|
}
|
179
179
|
}
|
180
|
-
return
|
180
|
+
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
181
181
|
}
|
182
182
|
|
183
183
|
static VALUE
|
@@ -231,7 +231,7 @@ nl_node_set_to_array(VALUE self)
|
|
231
231
|
|
232
232
|
VALUE list = rb_ary_new2(array->length);
|
233
233
|
VALUE doc = nl_rb_document_get(self);
|
234
|
-
for (
|
234
|
+
for (size_t i = 0; i < array->length; i++)
|
235
235
|
{
|
236
236
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
237
237
|
VALUE rb_node = nl_rb_node_create(node, doc);
|
@@ -267,7 +267,7 @@ nl_node_set_union(VALUE self, VALUE other)
|
|
267
267
|
memcpy(new_array->list, self_array->list, sizeof(lxb_dom_node_t *) * self_array->length);
|
268
268
|
new_array->length = self_array->length;
|
269
269
|
|
270
|
-
for (
|
270
|
+
for (size_t i = 0; i < other_array->length; i++)
|
271
271
|
{
|
272
272
|
lexbor_array_push_unique(new_array, other_array->list[i]);
|
273
273
|
}
|
@@ -275,16 +275,15 @@ nl_node_set_union(VALUE self, VALUE other)
|
|
275
275
|
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
276
276
|
}
|
277
277
|
|
278
|
-
static
|
279
|
-
nl_node_set_find(VALUE self, VALUE selector,
|
278
|
+
static void
|
279
|
+
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
|
280
280
|
{
|
281
|
-
|
282
|
-
lxb_dom_document_t *doc;
|
283
|
-
TypedData_Get_Struct(rb_doc, lxb_dom_document_t, &nl_document_type, doc);
|
281
|
+
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
284
282
|
if (doc == NULL)
|
285
283
|
{
|
286
284
|
rb_raise(rb_eRuntimeError, "Error getting document");
|
287
285
|
}
|
286
|
+
// Wrap direct children with a temporary fragment so that they can be searched
|
288
287
|
lxb_dom_document_fragment_t *frag = lxb_dom_document_fragment_interface_create(doc);
|
289
288
|
if (frag == NULL)
|
290
289
|
{
|
@@ -302,7 +301,7 @@ nl_node_set_find(VALUE self, VALUE selector, nl_node_find_f finder)
|
|
302
301
|
}
|
303
302
|
}
|
304
303
|
// Backup original node data and re-group them into a fragment
|
305
|
-
for (
|
304
|
+
for (size_t i = 0; i < array->length; i++)
|
306
305
|
{
|
307
306
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
308
307
|
lxb_dom_node_t *backup_node = malloc(sizeof(lxb_dom_node_t));
|
@@ -320,29 +319,51 @@ nl_node_set_find(VALUE self, VALUE selector, nl_node_find_f finder)
|
|
320
319
|
}
|
321
320
|
VALUE rb_frag = nl_rb_node_create(&frag->node, nl_rb_document_get(self));
|
322
321
|
|
323
|
-
|
322
|
+
nl_node_find(rb_frag, selector, cb, ctx);
|
324
323
|
|
325
324
|
lxb_dom_document_fragment_interface_destroy(frag);
|
326
325
|
// Restore original node data
|
327
|
-
for (
|
326
|
+
for (size_t i = 0; i < array->length; i++)
|
328
327
|
{
|
329
328
|
memcpy(array->list[i], backup_array->list[i], sizeof(lxb_dom_node_t));
|
330
329
|
free(backup_array->list[i]);
|
331
330
|
}
|
332
331
|
lexbor_array_destroy(backup_array, true);
|
333
|
-
return ret;
|
334
332
|
}
|
335
333
|
|
336
334
|
static VALUE
|
337
335
|
nl_node_set_at_css(VALUE self, VALUE selector)
|
338
336
|
{
|
339
|
-
|
337
|
+
lexbor_array_t *array = lexbor_array_create();
|
338
|
+
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
339
|
+
|
340
|
+
nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
341
|
+
|
342
|
+
if (array->length == 0)
|
343
|
+
{
|
344
|
+
return Qnil;
|
345
|
+
}
|
346
|
+
|
347
|
+
sort_nodes_if_necessary(selector, doc, array);
|
348
|
+
|
349
|
+
VALUE ret = nl_rb_node_create(array->list[0], nl_rb_document_get(self));
|
350
|
+
|
351
|
+
lexbor_array_destroy(array, true);
|
352
|
+
|
353
|
+
return ret;
|
340
354
|
}
|
341
355
|
|
342
356
|
static VALUE
|
343
357
|
nl_node_set_css(VALUE self, VALUE selector)
|
344
358
|
{
|
345
|
-
|
359
|
+
lexbor_array_t *array = lexbor_array_create();
|
360
|
+
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
361
|
+
|
362
|
+
nl_node_set_find(self, selector, nl_node_css_callback, array);
|
363
|
+
|
364
|
+
sort_nodes_if_necessary(selector, doc, array);
|
365
|
+
|
366
|
+
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
346
367
|
}
|
347
368
|
|
348
369
|
void Init_nl_node_set(void)
|
@@ -7,9 +7,13 @@
|
|
7
7
|
#include "libxml/xpathInternals.h"
|
8
8
|
#include "libxml/parserInternals.h"
|
9
9
|
|
10
|
+
#define RBSTR_OR_QNIL(_str) (_str ? rb_utf8_str_new_cstr(_str) : Qnil)
|
11
|
+
|
10
12
|
extern VALUE mNokolexbor;
|
11
13
|
extern VALUE cNokolexborNodeSet;
|
12
|
-
VALUE
|
14
|
+
VALUE cNokolexborXpathContext;
|
15
|
+
VALUE mNokolexborXpath;
|
16
|
+
VALUE cNokolexborXpathSyntaxError;
|
13
17
|
|
14
18
|
static void
|
15
19
|
free_xml_xpath_context(xmlXPathContextPtr ctx)
|
@@ -24,7 +28,7 @@ free_xml_xpath_context(xmlXPathContextPtr ctx)
|
|
24
28
|
* Register the namespace with +prefix+ and +uri+.
|
25
29
|
*/
|
26
30
|
static VALUE
|
27
|
-
|
31
|
+
nl_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri)
|
28
32
|
{
|
29
33
|
xmlXPathContextPtr ctx;
|
30
34
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
@@ -42,7 +46,7 @@ rb_xml_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri)
|
|
42
46
|
* Register the variable +name+ with +value+.
|
43
47
|
*/
|
44
48
|
static VALUE
|
45
|
-
|
49
|
+
nl_xpath_context_register_variable(VALUE self, VALUE name, VALUE value)
|
46
50
|
{
|
47
51
|
xmlXPathContextPtr ctx;
|
48
52
|
xmlXPathObjectPtr xmlValue;
|
@@ -69,7 +73,7 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
69
73
|
switch (c_xpath_object->type)
|
70
74
|
{
|
71
75
|
case XPATH_STRING:
|
72
|
-
rb_retval = rb_utf8_str_new_cstr(c_xpath_object->stringval);
|
76
|
+
rb_retval = rb_utf8_str_new_cstr((const char *)c_xpath_object->stringval);
|
73
77
|
xmlFree(c_xpath_object->stringval);
|
74
78
|
return rb_retval;
|
75
79
|
|
@@ -106,6 +110,60 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
106
110
|
}
|
107
111
|
}
|
108
112
|
|
113
|
+
static VALUE
|
114
|
+
nl_xpath_wrap_syntax_error(xmlErrorPtr error)
|
115
|
+
{
|
116
|
+
VALUE msg, e;
|
117
|
+
|
118
|
+
msg = (error && error->message) ? rb_utf8_str_new_cstr(error->message) : Qnil;
|
119
|
+
|
120
|
+
e = rb_class_new_instance(
|
121
|
+
1,
|
122
|
+
&msg,
|
123
|
+
cNokolexborXpathSyntaxError);
|
124
|
+
|
125
|
+
if (error)
|
126
|
+
{
|
127
|
+
rb_iv_set(e, "@domain", INT2NUM(error->domain));
|
128
|
+
rb_iv_set(e, "@code", INT2NUM(error->code));
|
129
|
+
rb_iv_set(e, "@level", INT2NUM((short)error->level));
|
130
|
+
rb_iv_set(e, "@file", RBSTR_OR_QNIL(error->file));
|
131
|
+
rb_iv_set(e, "@line", INT2NUM(error->line));
|
132
|
+
rb_iv_set(e, "@str1", RBSTR_OR_QNIL(error->str1));
|
133
|
+
rb_iv_set(e, "@str2", RBSTR_OR_QNIL(error->str2));
|
134
|
+
rb_iv_set(e, "@str3", RBSTR_OR_QNIL(error->str3));
|
135
|
+
rb_iv_set(e, "@int1", INT2NUM(error->int1));
|
136
|
+
rb_iv_set(e, "@column", INT2NUM(error->int2));
|
137
|
+
}
|
138
|
+
|
139
|
+
return e;
|
140
|
+
}
|
141
|
+
|
142
|
+
static void nl_xpath_error_array_pusher(void *ctx, xmlErrorPtr error)
|
143
|
+
{
|
144
|
+
VALUE list = (VALUE)ctx;
|
145
|
+
Check_Type(list, T_ARRAY);
|
146
|
+
rb_ary_push(list, nl_xpath_wrap_syntax_error(error));
|
147
|
+
}
|
148
|
+
|
149
|
+
static void
|
150
|
+
nl_xpath_generic_exception_pusher(void *ctx, const char *msg, ...)
|
151
|
+
{
|
152
|
+
VALUE rb_errors = (VALUE)ctx;
|
153
|
+
VALUE rb_message;
|
154
|
+
VALUE rb_exception;
|
155
|
+
|
156
|
+
Check_Type(rb_errors, T_ARRAY);
|
157
|
+
|
158
|
+
va_list args;
|
159
|
+
va_start(args, msg);
|
160
|
+
rb_message = rb_vsprintf(msg, args);
|
161
|
+
va_end(args);
|
162
|
+
|
163
|
+
rb_exception = rb_exc_new_str(cNokolexborXpathSyntaxError, rb_message);
|
164
|
+
rb_ary_push(rb_errors, rb_exception);
|
165
|
+
}
|
166
|
+
|
109
167
|
/*
|
110
168
|
* call-seq:
|
111
169
|
* evaluate(search_path, handler = nil)
|
@@ -113,7 +171,7 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
113
171
|
* Evaluate the +search_path+ returning an XML::XPath object.
|
114
172
|
*/
|
115
173
|
static VALUE
|
116
|
-
|
174
|
+
nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
117
175
|
{
|
118
176
|
VALUE search_path, xpath_handler;
|
119
177
|
VALUE retval = Qnil;
|
@@ -137,13 +195,13 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
137
195
|
// xmlXPathRegisterFuncLookup(ctx, handler_lookup, (void *)xpath_handler);
|
138
196
|
// }
|
139
197
|
|
140
|
-
|
141
|
-
|
198
|
+
xmlSetStructuredErrorFunc((void *)errors, nl_xpath_error_array_pusher);
|
199
|
+
xmlSetGenericErrorFunc((void *)errors, nl_xpath_generic_exception_pusher);
|
142
200
|
|
143
201
|
xpath = xmlXPathEvalExpression(query, ctx);
|
144
202
|
|
145
|
-
|
146
|
-
|
203
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
204
|
+
xmlSetGenericErrorFunc(NULL, NULL);
|
147
205
|
|
148
206
|
if (xpath == NULL)
|
149
207
|
{
|
@@ -156,7 +214,7 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
156
214
|
retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
|
157
215
|
}
|
158
216
|
|
159
|
-
|
217
|
+
xmlXPathFreeNodeSetList(xpath);
|
160
218
|
|
161
219
|
return retval;
|
162
220
|
}
|
@@ -168,7 +226,7 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
168
226
|
* Create a new XPathContext with +node+ as the reference point.
|
169
227
|
*/
|
170
228
|
static VALUE
|
171
|
-
|
229
|
+
nl_xpath_context_new(VALUE klass, VALUE rb_node)
|
172
230
|
{
|
173
231
|
xmlXPathContextPtr ctx;
|
174
232
|
VALUE self;
|
@@ -188,13 +246,15 @@ void Init_nl_xpath_context(void)
|
|
188
246
|
{
|
189
247
|
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
190
248
|
|
191
|
-
|
249
|
+
cNokolexborXpathContext = rb_define_class_under(mNokolexbor, "XPathContext", rb_cObject);
|
250
|
+
mNokolexborXpath = rb_define_module_under(mNokolexbor, "XPath");
|
251
|
+
cNokolexborXpathSyntaxError = rb_define_class_under(mNokolexborXpath, "SyntaxError", rb_eStandardError);
|
192
252
|
|
193
|
-
rb_undef_alloc_func(
|
253
|
+
rb_undef_alloc_func(cNokolexborXpathContext);
|
194
254
|
|
195
|
-
rb_define_singleton_method(
|
255
|
+
rb_define_singleton_method(cNokolexborXpathContext, "new", nl_xpath_context_new, 1);
|
196
256
|
|
197
|
-
rb_define_method(
|
198
|
-
rb_define_method(
|
199
|
-
rb_define_method(
|
257
|
+
rb_define_method(cNokolexborXpathContext, "evaluate", nl_xpath_context_evaluate, -1);
|
258
|
+
rb_define_method(cNokolexborXpathContext, "register_variable", nl_xpath_context_register_variable, 2);
|
259
|
+
rb_define_method(cNokolexborXpathContext, "register_ns", nl_xpath_context_register_ns, 2);
|
200
260
|
}
|
data/ext/nokolexbor/nokolexbor.h
CHANGED
@@ -28,6 +28,8 @@ lxb_inline VALUE nl_rb_document_get(VALUE rb_node_or_doc)
|
|
28
28
|
return rb_iv_get(rb_node_or_doc, "@document");
|
29
29
|
}
|
30
30
|
|
31
|
+
lxb_dom_document_t * nl_rb_document_unwrap(VALUE rb_doc);
|
32
|
+
|
31
33
|
const lxb_char_t *
|
32
34
|
lxb_dom_node_name_qualified(lxb_dom_node_t *node, size_t *len);
|
33
35
|
|
data/ext/nokolexbor/xml_error.c
CHANGED
@@ -3,12 +3,38 @@
|
|
3
3
|
#include <stdarg.h>
|
4
4
|
#include "libxml/xmlerror.h"
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
#define XML_GET_VAR_STR(msg, str) { \
|
7
|
+
int size, prev_size = -1; \
|
8
|
+
int chars; \
|
9
|
+
char *larger; \
|
10
|
+
va_list ap; \
|
11
|
+
\
|
12
|
+
str = (char *) xmlMalloc(150); \
|
13
|
+
if (str != NULL) { \
|
14
|
+
\
|
15
|
+
size = 150; \
|
16
|
+
\
|
17
|
+
while (size < 64000) { \
|
18
|
+
va_start(ap, msg); \
|
19
|
+
chars = vsnprintf(str, size, msg, ap); \
|
20
|
+
va_end(ap); \
|
21
|
+
if ((chars > -1) && (chars < size)) { \
|
22
|
+
if (prev_size == chars) { \
|
23
|
+
break; \
|
24
|
+
} else { \
|
25
|
+
prev_size = chars; \
|
26
|
+
} \
|
27
|
+
} \
|
28
|
+
if (chars > -1) \
|
29
|
+
size += chars + 1; \
|
30
|
+
else \
|
31
|
+
size += 100; \
|
32
|
+
if ((larger = (char *) xmlRealloc(str, size)) == NULL) {\
|
33
|
+
break; \
|
34
|
+
} \
|
35
|
+
str = larger; \
|
36
|
+
}} \
|
37
|
+
}
|
12
38
|
|
13
39
|
/**
|
14
40
|
* xmlGenericErrorDefaultFunc:
|
@@ -20,22 +46,57 @@ void *_xmlGenericErrorContext = NULL;
|
|
20
46
|
*/
|
21
47
|
void XMLCDECL
|
22
48
|
xmlGenericErrorDefaultFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) {
|
23
|
-
|
49
|
+
}
|
24
50
|
|
25
|
-
|
26
|
-
|
51
|
+
/**
|
52
|
+
* xmlCopyError:
|
53
|
+
* @from: a source error
|
54
|
+
* @to: a target error
|
55
|
+
*
|
56
|
+
* Save the original error to the new place.
|
57
|
+
*
|
58
|
+
* Returns 0 in case of success and -1 in case of error.
|
59
|
+
*/
|
60
|
+
int
|
61
|
+
xmlCopyError(xmlErrorPtr from, xmlErrorPtr to) {
|
62
|
+
char *message, *file, *str1, *str2, *str3;
|
27
63
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
64
|
+
if ((from == NULL) || (to == NULL))
|
65
|
+
return(-1);
|
66
|
+
|
67
|
+
message = (char *) xmlStrdup((xmlChar *) from->message);
|
68
|
+
file = (char *) xmlStrdup ((xmlChar *) from->file);
|
69
|
+
str1 = (char *) xmlStrdup ((xmlChar *) from->str1);
|
70
|
+
str2 = (char *) xmlStrdup ((xmlChar *) from->str2);
|
71
|
+
str3 = (char *) xmlStrdup ((xmlChar *) from->str3);
|
32
72
|
|
33
|
-
|
73
|
+
if (to->message != NULL)
|
74
|
+
xmlFree(to->message);
|
75
|
+
if (to->file != NULL)
|
76
|
+
xmlFree(to->file);
|
77
|
+
if (to->str1 != NULL)
|
78
|
+
xmlFree(to->str1);
|
79
|
+
if (to->str2 != NULL)
|
80
|
+
xmlFree(to->str2);
|
81
|
+
if (to->str3 != NULL)
|
82
|
+
xmlFree(to->str3);
|
83
|
+
to->domain = from->domain;
|
84
|
+
to->code = from->code;
|
85
|
+
to->level = from->level;
|
86
|
+
to->line = from->line;
|
87
|
+
to->node = from->node;
|
88
|
+
to->int1 = from->int1;
|
89
|
+
to->int2 = from->int2;
|
90
|
+
to->node = from->node;
|
91
|
+
to->ctxt = from->ctxt;
|
92
|
+
to->message = message;
|
93
|
+
to->file = file;
|
94
|
+
to->str1 = str1;
|
95
|
+
to->str2 = str2;
|
96
|
+
to->str3 = str3;
|
34
97
|
|
35
|
-
|
36
|
-
|
37
|
-
// return (&_xmlGenericError);
|
38
|
-
// }
|
98
|
+
return 0;
|
99
|
+
}
|
39
100
|
|
40
101
|
/**
|
41
102
|
* __xmlRaiseError:
|
@@ -63,16 +124,112 @@ xmlGenericErrorDefaultFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) {
|
|
63
124
|
*/
|
64
125
|
void XMLCDECL
|
65
126
|
__xmlRaiseError(xmlStructuredErrorFunc schannel,
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
127
|
+
xmlGenericErrorFunc channel, void *data, void *ctx,
|
128
|
+
void *nod, int domain, int code, xmlErrorLevel level,
|
129
|
+
const char *file, int line, const char *str1,
|
130
|
+
const char *str2, const char *str3, int int1, int col,
|
131
|
+
const char *msg, ...)
|
71
132
|
{
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
133
|
+
xmlParserCtxtPtr ctxt = NULL;
|
134
|
+
lxb_dom_node_t_ptr node = (lxb_dom_node_t_ptr)nod;
|
135
|
+
char *str = NULL;
|
136
|
+
xmlParserInputPtr input = NULL;
|
137
|
+
xmlErrorPtr to = &xmlLastError;
|
138
|
+
lxb_dom_node_t_ptr baseptr = NULL;
|
139
|
+
|
140
|
+
if (code == XML_ERR_OK)
|
141
|
+
return;
|
142
|
+
if ((xmlGetWarningsDefaultValue == 0) && (level == XML_ERR_WARNING))
|
143
|
+
return;
|
144
|
+
if ((domain == XML_FROM_PARSER) || (domain == XML_FROM_HTML) ||
|
145
|
+
(domain == XML_FROM_DTD) || (domain == XML_FROM_NAMESPACE) ||
|
146
|
+
(domain == XML_FROM_IO) || (domain == XML_FROM_VALID))
|
147
|
+
{
|
148
|
+
ctxt = (xmlParserCtxtPtr)ctx;
|
149
|
+
if ((schannel == NULL) && (ctxt != NULL) && (ctxt->sax != NULL) &&
|
150
|
+
(ctxt->sax->initialized == XML_SAX2_MAGIC) &&
|
151
|
+
(ctxt->sax->serror != NULL))
|
152
|
+
{
|
153
|
+
schannel = ctxt->sax->serror;
|
154
|
+
data = ctxt->userData;
|
155
|
+
}
|
156
|
+
}
|
157
|
+
/*
|
158
|
+
* Check if structured error handler set
|
159
|
+
*/
|
160
|
+
if (schannel == NULL)
|
161
|
+
{
|
162
|
+
schannel = xmlStructuredError;
|
163
|
+
/*
|
164
|
+
* if user has defined handler, change data ptr to user's choice
|
165
|
+
*/
|
166
|
+
if (schannel != NULL)
|
167
|
+
data = xmlStructuredErrorContext;
|
168
|
+
}
|
169
|
+
/*
|
170
|
+
* Formatting the message
|
171
|
+
*/
|
172
|
+
if (msg == NULL)
|
173
|
+
{
|
174
|
+
str = (char *)xmlStrdup(BAD_CAST "No error message provided");
|
175
|
+
}
|
176
|
+
else
|
177
|
+
{
|
178
|
+
XML_GET_VAR_STR(msg, str);
|
179
|
+
}
|
180
|
+
|
181
|
+
/*
|
182
|
+
* specific processing if a parser context is provided
|
183
|
+
*/
|
184
|
+
if (ctxt != NULL)
|
185
|
+
{
|
186
|
+
if (file == NULL)
|
187
|
+
{
|
188
|
+
input = ctxt->input;
|
189
|
+
if ((input != NULL) && (input->filename == NULL) &&
|
190
|
+
(ctxt->inputNr > 1))
|
191
|
+
{
|
192
|
+
input = ctxt->inputTab[ctxt->inputNr - 2];
|
193
|
+
}
|
194
|
+
if (input != NULL)
|
195
|
+
{
|
196
|
+
file = input->filename;
|
197
|
+
line = input->line;
|
198
|
+
col = input->col;
|
199
|
+
}
|
200
|
+
}
|
201
|
+
to = &ctxt->lastError;
|
202
|
+
}
|
203
|
+
|
204
|
+
/*
|
205
|
+
* Save the information about the error
|
206
|
+
*/
|
207
|
+
xmlResetError(to);
|
208
|
+
to->domain = domain;
|
209
|
+
to->code = code;
|
210
|
+
to->message = str;
|
211
|
+
to->level = level;
|
212
|
+
if (file != NULL)
|
213
|
+
to->file = (char *)xmlStrdup((const xmlChar *)file);
|
214
|
+
to->line = line;
|
215
|
+
if (str1 != NULL)
|
216
|
+
to->str1 = (char *)xmlStrdup((const xmlChar *)str1);
|
217
|
+
if (str2 != NULL)
|
218
|
+
to->str2 = (char *)xmlStrdup((const xmlChar *)str2);
|
219
|
+
if (str3 != NULL)
|
220
|
+
to->str3 = (char *)xmlStrdup((const xmlChar *)str3);
|
221
|
+
to->int1 = int1;
|
222
|
+
to->int2 = col;
|
223
|
+
to->node = node;
|
224
|
+
to->ctxt = ctx;
|
225
|
+
|
226
|
+
if (to != &xmlLastError)
|
227
|
+
xmlCopyError(to, &xmlLastError);
|
228
|
+
|
229
|
+
if (schannel != NULL)
|
230
|
+
{
|
231
|
+
schannel(data, to);
|
232
|
+
}
|
76
233
|
}
|
77
234
|
|
78
235
|
/**
|
@@ -131,4 +288,45 @@ __xmlSimpleError(int domain, int code, lxb_dom_node_t_ptr node,
|
|
131
288
|
code, XML_ERR_ERROR, NULL, 0, extra,
|
132
289
|
NULL, NULL, 0, 0, msg, extra);
|
133
290
|
}
|
291
|
+
}
|
292
|
+
|
293
|
+
/**
|
294
|
+
* xmlSetGenericErrorFunc:
|
295
|
+
* @ctx: the new error handling context
|
296
|
+
* @handler: the new handler function
|
297
|
+
*
|
298
|
+
* Function to reset the handler and the error context for out of
|
299
|
+
* context error messages.
|
300
|
+
* This simply means that @handler will be called for subsequent
|
301
|
+
* error messages while not parsing nor validating. And @ctx will
|
302
|
+
* be passed as first argument to @handler
|
303
|
+
* One can simply force messages to be emitted to another FILE * than
|
304
|
+
* stderr by setting @ctx to this file handle and @handler to NULL.
|
305
|
+
* For multi-threaded applications, this must be set separately for each thread.
|
306
|
+
*/
|
307
|
+
void
|
308
|
+
xmlSetGenericErrorFunc(void *ctx, xmlGenericErrorFunc handler) {
|
309
|
+
xmlGenericErrorContext = ctx;
|
310
|
+
if (handler != NULL)
|
311
|
+
xmlGenericError = handler;
|
312
|
+
else
|
313
|
+
xmlGenericError = xmlGenericErrorDefaultFunc;
|
314
|
+
}
|
315
|
+
|
316
|
+
/**
|
317
|
+
* xmlSetStructuredErrorFunc:
|
318
|
+
* @ctx: the new error handling context
|
319
|
+
* @handler: the new handler function
|
320
|
+
*
|
321
|
+
* Function to reset the handler and the error context for out of
|
322
|
+
* context structured error messages.
|
323
|
+
* This simply means that @handler will be called for subsequent
|
324
|
+
* error messages while not parsing nor validating. And @ctx will
|
325
|
+
* be passed as first argument to @handler
|
326
|
+
* For multi-threaded applications, this must be set separately for each thread.
|
327
|
+
*/
|
328
|
+
void
|
329
|
+
xmlSetStructuredErrorFunc(void *ctx, xmlStructuredErrorFunc handler) {
|
330
|
+
xmlStructuredErrorContext = ctx;
|
331
|
+
xmlStructuredError = handler;
|
134
332
|
}
|
data/lib/nokolexbor/node.rb
CHANGED
data/lib/nokolexbor/node_set.rb
CHANGED
data/lib/nokolexbor/version.rb
CHANGED
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokolexbor
|
4
|
+
module XPath
|
5
|
+
class SyntaxError < StandardError
|
6
|
+
attr_reader :domain
|
7
|
+
attr_reader :code
|
8
|
+
attr_reader :level
|
9
|
+
attr_reader :file
|
10
|
+
attr_reader :line
|
11
|
+
attr_reader :str1
|
12
|
+
attr_reader :str2
|
13
|
+
attr_reader :str3
|
14
|
+
attr_reader :int1
|
15
|
+
attr_reader :column
|
16
|
+
|
17
|
+
###
|
18
|
+
# return true if this is a non error
|
19
|
+
def none?
|
20
|
+
level == 0
|
21
|
+
end
|
22
|
+
|
23
|
+
###
|
24
|
+
# return true if this is a warning
|
25
|
+
def warning?
|
26
|
+
level == 1
|
27
|
+
end
|
28
|
+
|
29
|
+
###
|
30
|
+
# return true if this is an error
|
31
|
+
def error?
|
32
|
+
level == 2
|
33
|
+
end
|
34
|
+
|
35
|
+
###
|
36
|
+
# return true if this error is fatal
|
37
|
+
def fatal?
|
38
|
+
level == 3
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_s
|
42
|
+
message = super.chomp
|
43
|
+
[location_to_s, level_to_s, message]
|
44
|
+
.compact.join(": ")
|
45
|
+
.force_encoding(message.encoding)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def level_to_s
|
51
|
+
case level
|
52
|
+
when 3 then "FATAL"
|
53
|
+
when 2 then "ERROR"
|
54
|
+
when 1 then "WARNING"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def nil_or_zero?(attribute)
|
59
|
+
attribute.nil? || attribute.zero?
|
60
|
+
end
|
61
|
+
|
62
|
+
def location_to_s
|
63
|
+
return nil if nil_or_zero?(line) && nil_or_zero?(column)
|
64
|
+
|
65
|
+
"#{line}:#{column}"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/nokolexbor.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5.0'
|
27
41
|
description: Nokolexbor is a high performance HTML5 parser, with support for both
|
28
42
|
CSS selectors and XPath. It's API is designed to be compatible with Nokogiri.
|
29
43
|
email: zyc9012@gmail.com
|
@@ -105,6 +119,7 @@ files:
|
|
105
119
|
- lib/nokolexbor/node.rb
|
106
120
|
- lib/nokolexbor/node_set.rb
|
107
121
|
- lib/nokolexbor/version.rb
|
122
|
+
- lib/nokolexbor/xpath.rb
|
108
123
|
- lib/nokolexbor/xpath_context.rb
|
109
124
|
- patches/0001-lexbor-support-text-pseudo-element.patch
|
110
125
|
- patches/0002-lexbor-match-id-class-case-sensitive.patch
|
@@ -522,7 +537,7 @@ licenses:
|
|
522
537
|
- MIT
|
523
538
|
metadata:
|
524
539
|
msys2_mingw_dependencies: cmake
|
525
|
-
post_install_message:
|
540
|
+
post_install_message:
|
526
541
|
rdoc_options: []
|
527
542
|
require_paths:
|
528
543
|
- lib
|
@@ -537,8 +552,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
537
552
|
- !ruby/object:Gem::Version
|
538
553
|
version: '0'
|
539
554
|
requirements: []
|
540
|
-
rubygems_version: 3.1
|
541
|
-
signing_key:
|
555
|
+
rubygems_version: 3.0.3.1
|
556
|
+
signing_key:
|
542
557
|
specification_version: 4
|
543
558
|
summary: High performance HTML5 parser, with support for both CSS selectors and XPath.
|
544
559
|
test_files: []
|