nokolexbor 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/nl_document.c +12 -4
- data/ext/nokolexbor/nl_node.c +65 -55
- data/ext/nokolexbor/nl_node_set.c +43 -22
- data/ext/nokolexbor/nl_xpath_context.c +77 -17
- data/ext/nokolexbor/nokolexbor.h +2 -0
- data/ext/nokolexbor/xml_error.c +225 -27
- data/lib/nokolexbor/node.rb +2 -2
- data/lib/nokolexbor/node_set.rb +5 -1
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor/xpath.rb +69 -0
- data/lib/nokolexbor.rb +1 -0
- metadata +21 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e93c293e8d506960077c772b822d4f75103d6583b8864e56f343a8ad6229c7a
|
4
|
+
data.tar.gz: 1baaaed733eae123b895e21021709fed7a7e185a10e8c27cf0d43105e7e4a272
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 665232217ba5f1b0a53ad67dfcbb38244685fab9242bf718b1364b59c3b623e5150bdd471284c3424fb2c319761c30f1022d7792e1edf848e3c4c0a41de89806
|
7
|
+
data.tar.gz: 81cc64f435de161807ce0a6d3a0c8b0875a51f0f49307a763be8be4c70b3eefd0a5ea36fb364d2fec2e37b6b3307d549fc0f0bbd3afe35f47b6b797ea5998b96
|
@@ -5,7 +5,7 @@ extern VALUE cNokolexborNode;
|
|
5
5
|
VALUE cNokolexborDocument;
|
6
6
|
|
7
7
|
static void
|
8
|
-
free_nl_document(
|
8
|
+
free_nl_document(lxb_html_document_t *document)
|
9
9
|
{
|
10
10
|
lxb_html_document_destroy(document);
|
11
11
|
}
|
@@ -25,7 +25,7 @@ static VALUE
|
|
25
25
|
nl_document_parse(VALUE self, VALUE rb_html)
|
26
26
|
{
|
27
27
|
const char *html_c = StringValuePtr(rb_html);
|
28
|
-
|
28
|
+
size_t html_len = RSTRING_LEN(rb_html);
|
29
29
|
|
30
30
|
lxb_html_document_t *document;
|
31
31
|
|
@@ -35,13 +35,13 @@ nl_document_parse(VALUE self, VALUE rb_html)
|
|
35
35
|
rb_raise(rb_eRuntimeError, "Error creating document");
|
36
36
|
}
|
37
37
|
|
38
|
-
lxb_status_t status = lxb_html_document_parse(document, html_c, html_len);
|
38
|
+
lxb_status_t status = lxb_html_document_parse(document, (const lxb_char_t *)html_c, html_len);
|
39
39
|
if (status != LXB_STATUS_OK)
|
40
40
|
{
|
41
41
|
nl_raise_lexbor_error(status);
|
42
42
|
}
|
43
43
|
|
44
|
-
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type,
|
44
|
+
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
|
45
45
|
}
|
46
46
|
|
47
47
|
static VALUE
|
@@ -50,6 +50,14 @@ nl_document_new(VALUE self)
|
|
50
50
|
return nl_document_parse(self, rb_str_new("", 0));
|
51
51
|
}
|
52
52
|
|
53
|
+
lxb_dom_document_t *
|
54
|
+
nl_rb_document_unwrap(VALUE rb_doc)
|
55
|
+
{
|
56
|
+
lxb_dom_document_t *doc;
|
57
|
+
TypedData_Get_Struct(rb_doc, lxb_dom_document_t, &nl_document_type, doc);
|
58
|
+
return doc;
|
59
|
+
}
|
60
|
+
|
53
61
|
void Init_nl_document(void)
|
54
62
|
{
|
55
63
|
cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -43,7 +43,7 @@ nl_rb_node_unwrap(VALUE rb_node)
|
|
43
43
|
lxb_dom_node_t *node;
|
44
44
|
if (rb_obj_class(rb_node) == cNokolexborDocument)
|
45
45
|
{
|
46
|
-
TypedData_Get_Struct(rb_node,
|
46
|
+
TypedData_Get_Struct(rb_node, lxb_dom_node_t, &nl_document_type, node);
|
47
47
|
}
|
48
48
|
else
|
49
49
|
{
|
@@ -56,7 +56,6 @@ static VALUE
|
|
56
56
|
nl_node_new(int argc, VALUE *argv, VALUE klass)
|
57
57
|
{
|
58
58
|
lxb_dom_document_t *document;
|
59
|
-
lxb_dom_node_t *node;
|
60
59
|
VALUE rb_name;
|
61
60
|
VALUE rb_document;
|
62
61
|
VALUE rest;
|
@@ -68,9 +67,9 @@ nl_node_new(int argc, VALUE *argv, VALUE klass)
|
|
68
67
|
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
69
68
|
}
|
70
69
|
|
71
|
-
|
70
|
+
document = nl_rb_document_unwrap(rb_document);
|
72
71
|
|
73
|
-
lxb_dom_element_t *element = lxb_dom_document_create_element(document, StringValueCStr(rb_name), RSTRING_LEN(rb_name), NULL);
|
72
|
+
lxb_dom_element_t *element = lxb_dom_document_create_element(document, (const lxb_char_t *)StringValueCStr(rb_name), RSTRING_LEN(rb_name), NULL);
|
74
73
|
if (element == NULL)
|
75
74
|
{
|
76
75
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
@@ -97,7 +96,7 @@ nl_node_content(VALUE self)
|
|
97
96
|
{
|
98
97
|
return rb_str_new("", 0);
|
99
98
|
}
|
100
|
-
VALUE rb_str = rb_utf8_str_new(text, str_len);
|
99
|
+
VALUE rb_str = rb_utf8_str_new((char *)text, str_len);
|
101
100
|
lxb_dom_document_destroy_text(node->owner_document, text);
|
102
101
|
|
103
102
|
return rb_str;
|
@@ -115,19 +114,19 @@ nl_node_get_attr(VALUE self, VALUE rb_attr)
|
|
115
114
|
|
116
115
|
VALUE rb_attr_s = rb_String(rb_attr);
|
117
116
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
118
|
-
|
117
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
119
118
|
|
120
|
-
lxb_dom_element_t *element =
|
119
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
121
120
|
|
122
|
-
if (!lxb_dom_element_has_attribute(element, attr_c, attr_len))
|
121
|
+
if (!lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len))
|
123
122
|
{
|
124
123
|
return Qnil;
|
125
124
|
}
|
126
125
|
|
127
126
|
size_t attr_value_len;
|
128
|
-
|
127
|
+
const lxb_char_t *attr_value = lxb_dom_element_get_attribute(element, (const lxb_char_t *)attr_c, attr_len, &attr_value_len);
|
129
128
|
|
130
|
-
return rb_utf8_str_new(attr_value, attr_value_len);
|
129
|
+
return rb_utf8_str_new((const char *)attr_value, attr_value_len);
|
131
130
|
}
|
132
131
|
|
133
132
|
static VALUE
|
@@ -144,13 +143,13 @@ nl_node_set_attr(VALUE self, VALUE rb_attr, VALUE rb_value)
|
|
144
143
|
VALUE rb_value_s = rb_String(rb_value);
|
145
144
|
|
146
145
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
147
|
-
|
146
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
148
147
|
const char *value_c = RSTRING_PTR(rb_value_s);
|
149
|
-
|
148
|
+
size_t value_len = RSTRING_LEN(rb_value_s);
|
150
149
|
|
151
|
-
lxb_dom_element_t *element =
|
150
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
152
151
|
|
153
|
-
lxb_dom_element_set_attribute(element, attr_c, attr_len, value_c, value_len);
|
152
|
+
lxb_dom_element_set_attribute(element, (const lxb_char_t *)attr_c, attr_len, (const lxb_char_t *)value_c, value_len);
|
154
153
|
|
155
154
|
return rb_value;
|
156
155
|
}
|
@@ -168,14 +167,14 @@ nl_node_remove_attr(VALUE self, VALUE rb_attr)
|
|
168
167
|
VALUE rb_attr_s = rb_String(rb_attr);
|
169
168
|
|
170
169
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
171
|
-
|
170
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
172
171
|
|
173
|
-
lxb_dom_element_t *element =
|
172
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
174
173
|
|
175
|
-
return lxb_dom_element_remove_attribute(element, attr_c, attr_len) == LXB_STATUS_OK ? Qtrue : Qfalse;
|
174
|
+
return lxb_dom_element_remove_attribute(element, (const lxb_char_t *)attr_c, attr_len) == LXB_STATUS_OK ? Qtrue : Qfalse;
|
176
175
|
}
|
177
176
|
|
178
|
-
|
177
|
+
lxb_status_t
|
179
178
|
nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx)
|
180
179
|
{
|
181
180
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
@@ -188,7 +187,7 @@ nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *sp
|
|
188
187
|
return LXB_STATUS_STOP;
|
189
188
|
}
|
190
189
|
|
191
|
-
|
190
|
+
lxb_status_t
|
192
191
|
nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx)
|
193
192
|
{
|
194
193
|
lexbor_array_t *array = (lexbor_array_t *)ctx;
|
@@ -200,11 +199,11 @@ nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec,
|
|
200
199
|
return LXB_STATUS_OK;
|
201
200
|
}
|
202
201
|
|
203
|
-
|
202
|
+
void
|
204
203
|
nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
205
204
|
{
|
206
205
|
const char *selector_c = StringValuePtr(selector);
|
207
|
-
|
206
|
+
size_t selector_len = RSTRING_LEN(selector);
|
208
207
|
|
209
208
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
210
209
|
|
@@ -226,7 +225,7 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
226
225
|
|
227
226
|
/* Parse and get the log. */
|
228
227
|
// TODO: Cache the list for reuse, improves performance
|
229
|
-
lxb_css_selector_list_t *list = lxb_css_selectors_parse_relative_list(parser, selector_c, selector_len);
|
228
|
+
lxb_css_selector_list_t *list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
|
230
229
|
if (parser->status != LXB_STATUS_OK)
|
231
230
|
{
|
232
231
|
nl_raise_lexbor_error(parser->status);
|
@@ -252,15 +251,15 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
252
251
|
static void
|
253
252
|
mark_node_orders(lxb_dom_node_t *root)
|
254
253
|
{
|
255
|
-
|
256
|
-
root->user = count;
|
254
|
+
size_t count = 1;
|
255
|
+
root->user = (void *)count;
|
257
256
|
lxb_dom_node_t *node = root;
|
258
257
|
do
|
259
258
|
{
|
260
259
|
if (node->first_child != NULL)
|
261
260
|
{
|
262
261
|
node = node->first_child;
|
263
|
-
node->user = ++count;
|
262
|
+
node->user = (void *)++count;
|
264
263
|
}
|
265
264
|
else
|
266
265
|
{
|
@@ -275,7 +274,7 @@ mark_node_orders(lxb_dom_node_t *root)
|
|
275
274
|
}
|
276
275
|
|
277
276
|
node = node->next;
|
278
|
-
node->user = ++count;
|
277
|
+
node->user = (void *)++count;
|
279
278
|
}
|
280
279
|
|
281
280
|
} while (true);
|
@@ -290,7 +289,7 @@ void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_arr
|
|
290
289
|
int need_order = 0;
|
291
290
|
// Check if we have already markded orders, note that
|
292
291
|
// we need to order again if new nodes are added to the document
|
293
|
-
for (
|
292
|
+
for (size_t i = 0; i < array->length; i++)
|
294
293
|
{
|
295
294
|
if (((lxb_dom_node_t *)array->list[i])->user == 0)
|
296
295
|
{
|
@@ -300,13 +299,13 @@ void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_arr
|
|
300
299
|
}
|
301
300
|
if (need_order)
|
302
301
|
{
|
303
|
-
mark_node_orders(doc);
|
302
|
+
mark_node_orders(&doc->node);
|
304
303
|
}
|
305
|
-
css_result_tim_sort(&array->list[0], array->length);
|
304
|
+
css_result_tim_sort((lxb_dom_node_t **)&array->list[0], array->length);
|
306
305
|
}
|
307
306
|
}
|
308
307
|
|
309
|
-
VALUE
|
308
|
+
static VALUE
|
310
309
|
nl_node_at_css(VALUE self, VALUE selector)
|
311
310
|
{
|
312
311
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
@@ -321,10 +320,14 @@ nl_node_at_css(VALUE self, VALUE selector)
|
|
321
320
|
|
322
321
|
sort_nodes_if_necessary(selector, node->owner_document, array);
|
323
322
|
|
324
|
-
|
323
|
+
VALUE ret = nl_rb_node_create(array->list[0], nl_rb_document_get(self));
|
324
|
+
|
325
|
+
lexbor_array_destroy(array, true);
|
326
|
+
|
327
|
+
return ret;
|
325
328
|
}
|
326
329
|
|
327
|
-
VALUE
|
330
|
+
static VALUE
|
328
331
|
nl_node_css(VALUE self, VALUE selector)
|
329
332
|
{
|
330
333
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
@@ -354,7 +357,7 @@ nl_node_inner_html(VALUE self)
|
|
354
357
|
|
355
358
|
if (str.data != NULL)
|
356
359
|
{
|
357
|
-
VALUE ret = rb_utf8_str_new(str.data, str.length);
|
360
|
+
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
358
361
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
359
362
|
return ret;
|
360
363
|
}
|
@@ -379,7 +382,7 @@ nl_node_outer_html(VALUE self)
|
|
379
382
|
|
380
383
|
if (str.data != NULL)
|
381
384
|
{
|
382
|
-
VALUE ret = rb_utf8_str_new(str.data, str.length);
|
385
|
+
VALUE ret = rb_utf8_str_new((const char *)str.data, str.length);
|
383
386
|
lexbor_str_destroy(&str, node->owner_document->text, false);
|
384
387
|
return ret;
|
385
388
|
}
|
@@ -399,11 +402,11 @@ nl_node_has_key(VALUE self, VALUE rb_attr)
|
|
399
402
|
|
400
403
|
VALUE rb_attr_s = rb_String(rb_attr);
|
401
404
|
const char *attr_c = RSTRING_PTR(rb_attr_s);
|
402
|
-
|
405
|
+
size_t attr_len = RSTRING_LEN(rb_attr_s);
|
403
406
|
|
404
|
-
lxb_dom_element_t *element =
|
407
|
+
lxb_dom_element_t *element = lxb_dom_interface_element(node);
|
405
408
|
|
406
|
-
return lxb_dom_element_has_attribute(element, attr_c, attr_len) ? Qtrue : Qfalse;
|
409
|
+
return lxb_dom_element_has_attribute(element, (const lxb_char_t *)attr_c, attr_len) ? Qtrue : Qfalse;
|
407
410
|
}
|
408
411
|
|
409
412
|
static VALUE
|
@@ -417,13 +420,13 @@ nl_node_keys(VALUE self)
|
|
417
420
|
return ary_keys;
|
418
421
|
}
|
419
422
|
|
420
|
-
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(
|
423
|
+
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
421
424
|
|
422
425
|
while (attr != NULL)
|
423
426
|
{
|
424
427
|
size_t tmp_len;
|
425
|
-
lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
426
|
-
rb_ary_push(ary_keys, rb_utf8_str_new(tmp, tmp_len));
|
428
|
+
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
429
|
+
rb_ary_push(ary_keys, rb_utf8_str_new((const char *)tmp, tmp_len));
|
427
430
|
|
428
431
|
attr = lxb_dom_element_next_attribute(attr);
|
429
432
|
}
|
@@ -442,15 +445,19 @@ nl_node_values(VALUE self)
|
|
442
445
|
return ary_values;
|
443
446
|
}
|
444
447
|
|
445
|
-
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(
|
448
|
+
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
446
449
|
|
447
450
|
while (attr != NULL)
|
448
451
|
{
|
449
452
|
size_t tmp_len;
|
450
|
-
lxb_char_t *tmp = lxb_dom_attr_value(attr, &tmp_len);
|
453
|
+
const lxb_char_t *tmp = lxb_dom_attr_value(attr, &tmp_len);
|
451
454
|
if (tmp != NULL)
|
452
455
|
{
|
453
|
-
rb_ary_push(ary_values, rb_utf8_str_new(tmp, tmp_len));
|
456
|
+
rb_ary_push(ary_values, rb_utf8_str_new((const char *)tmp, tmp_len));
|
457
|
+
}
|
458
|
+
else
|
459
|
+
{
|
460
|
+
rb_ary_push(ary_values, rb_str_new("", 0));
|
454
461
|
}
|
455
462
|
|
456
463
|
attr = lxb_dom_element_next_attribute(attr);
|
@@ -470,16 +477,16 @@ nl_node_attrs(VALUE self)
|
|
470
477
|
return rb_hash;
|
471
478
|
}
|
472
479
|
|
473
|
-
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(
|
480
|
+
lxb_dom_attr_t *attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
474
481
|
|
475
482
|
while (attr != NULL)
|
476
483
|
{
|
477
484
|
size_t tmp_len;
|
478
|
-
lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
479
|
-
VALUE rb_key = rb_utf8_str_new(tmp, tmp_len);
|
485
|
+
const lxb_char_t *tmp = lxb_dom_attr_qualified_name(attr, &tmp_len);
|
486
|
+
VALUE rb_key = rb_utf8_str_new((const char *)tmp, tmp_len);
|
480
487
|
|
481
488
|
tmp = lxb_dom_attr_value(attr, &tmp_len);
|
482
|
-
VALUE rb_value = tmp != NULL ? rb_utf8_str_new(tmp, tmp_len) :
|
489
|
+
VALUE rb_value = tmp != NULL ? rb_utf8_str_new((const char *)tmp, tmp_len) : rb_str_new("", 0);
|
483
490
|
|
484
491
|
rb_hash_aset(rb_hash, rb_key, rb_value);
|
485
492
|
|
@@ -604,15 +611,16 @@ nl_node_name(VALUE self)
|
|
604
611
|
{
|
605
612
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
606
613
|
size_t len;
|
607
|
-
lxb_char_t *name = lxb_dom_node_name_qualified(node, &len);
|
608
|
-
return rb_utf8_str_new(name, len);
|
614
|
+
const lxb_char_t *name = lxb_dom_node_name_qualified(node, &len);
|
615
|
+
return rb_utf8_str_new((const char *)name, len);
|
609
616
|
}
|
610
617
|
|
611
618
|
static lxb_dom_node_t *
|
612
|
-
nl_node_parse_fragment(
|
619
|
+
nl_node_parse_fragment(lxb_dom_document_t *doc, lxb_char_t *html, size_t size)
|
613
620
|
{
|
614
621
|
size_t tag_name_len;
|
615
|
-
|
622
|
+
lxb_html_document_t *html_doc = lxb_html_interface_document(doc);
|
623
|
+
const lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(html_doc), LXB_TAG__UNDEF, &tag_name_len);
|
616
624
|
if (tag_name == NULL)
|
617
625
|
{
|
618
626
|
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
@@ -622,7 +630,7 @@ nl_node_parse_fragment(lxb_html_document_t *doc, lxb_char_t *html, size_t size)
|
|
622
630
|
{
|
623
631
|
rb_raise(rb_eRuntimeError, "Error creating element");
|
624
632
|
}
|
625
|
-
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(
|
633
|
+
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(html_doc, element, html, size);
|
626
634
|
if (frag_root == NULL)
|
627
635
|
{
|
628
636
|
rb_raise(rb_eArgError, "Error parsing HTML");
|
@@ -637,7 +645,7 @@ nl_node_fragment(VALUE self, VALUE html)
|
|
637
645
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
638
646
|
lxb_dom_document_t *doc = node->owner_document;
|
639
647
|
|
640
|
-
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(html), RSTRING_LEN(html));
|
648
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(html), RSTRING_LEN(html));
|
641
649
|
return nl_rb_node_create(frag_root, nl_rb_document_get(self));
|
642
650
|
}
|
643
651
|
|
@@ -663,7 +671,7 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
|
663
671
|
|
664
672
|
if (TYPE(new) == T_STRING)
|
665
673
|
{
|
666
|
-
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
674
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
667
675
|
|
668
676
|
while (frag_root->first_child != NULL)
|
669
677
|
{
|
@@ -694,7 +702,7 @@ nl_node_add_child(VALUE self, VALUE new)
|
|
694
702
|
|
695
703
|
if (TYPE(new) == T_STRING)
|
696
704
|
{
|
697
|
-
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
705
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, (lxb_char_t *)RSTRING_PTR(new), RSTRING_LEN(new));
|
698
706
|
|
699
707
|
while (frag_root->first_child != NULL)
|
700
708
|
{
|
@@ -832,11 +840,13 @@ void Init_nl_node(void)
|
|
832
840
|
|
833
841
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
834
842
|
rb_define_alias(cNokolexborNode, "set_attr", "[]=");
|
843
|
+
rb_define_alias(cNokolexborNode, "delete", "remove_attr");
|
835
844
|
rb_define_alias(cNokolexborNode, "text", "content");
|
836
845
|
rb_define_alias(cNokolexborNode, "inner_text", "content");
|
837
846
|
rb_define_alias(cNokolexborNode, "to_str", "content");
|
838
847
|
rb_define_alias(cNokolexborNode, "to_html", "outer_html");
|
839
848
|
rb_define_alias(cNokolexborNode, "to_s", "outer_html");
|
849
|
+
rb_define_alias(cNokolexborNode, "unlink", "remove");
|
840
850
|
rb_define_alias(cNokolexborNode, "type", "node_type");
|
841
851
|
rb_define_alias(cNokolexborNode, "dup", "clone");
|
842
852
|
}
|
@@ -5,15 +5,15 @@ extern VALUE cNokolexborNode;
|
|
5
5
|
VALUE cNokolexborNodeSet;
|
6
6
|
extern rb_data_type_t nl_document_type;
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
void nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx);
|
9
|
+
void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array);
|
10
|
+
lxb_status_t nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
|
11
|
+
lxb_status_t nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
|
12
12
|
|
13
13
|
lxb_status_t
|
14
14
|
lexbor_array_push_unique(lexbor_array_t *array, void *value)
|
15
15
|
{
|
16
|
-
for (
|
16
|
+
for (size_t i = 0; i < array->length; i++)
|
17
17
|
if (array->list[i] == value)
|
18
18
|
return LXB_STATUS_STOPPED;
|
19
19
|
|
@@ -91,7 +91,7 @@ nl_node_set_delete(VALUE self, VALUE rb_node)
|
|
91
91
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
92
92
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
93
93
|
|
94
|
-
|
94
|
+
size_t i;
|
95
95
|
for (i = 0; i < array->length; i++)
|
96
96
|
if (array->list[i] == node)
|
97
97
|
{
|
@@ -113,7 +113,7 @@ nl_node_set_is_include(VALUE self, VALUE rb_node)
|
|
113
113
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
114
114
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
115
115
|
|
116
|
-
for (
|
116
|
+
for (size_t i = 0; i < array->length; i++)
|
117
117
|
if (array->list[i] == node)
|
118
118
|
{
|
119
119
|
return Qtrue;
|
@@ -169,7 +169,7 @@ nl_node_set_subseq(VALUE self, long beg, long len)
|
|
169
169
|
}
|
170
170
|
}
|
171
171
|
|
172
|
-
for (
|
172
|
+
for (long j = beg; j < beg + len; ++j)
|
173
173
|
{
|
174
174
|
lxb_status_t status = lexbor_array_push(new_array, old_array->list[j]);
|
175
175
|
if (status != LXB_STATUS_OK)
|
@@ -177,7 +177,7 @@ nl_node_set_subseq(VALUE self, long beg, long len)
|
|
177
177
|
nl_raise_lexbor_error(status);
|
178
178
|
}
|
179
179
|
}
|
180
|
-
return
|
180
|
+
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
181
181
|
}
|
182
182
|
|
183
183
|
static VALUE
|
@@ -231,7 +231,7 @@ nl_node_set_to_array(VALUE self)
|
|
231
231
|
|
232
232
|
VALUE list = rb_ary_new2(array->length);
|
233
233
|
VALUE doc = nl_rb_document_get(self);
|
234
|
-
for (
|
234
|
+
for (size_t i = 0; i < array->length; i++)
|
235
235
|
{
|
236
236
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
237
237
|
VALUE rb_node = nl_rb_node_create(node, doc);
|
@@ -267,7 +267,7 @@ nl_node_set_union(VALUE self, VALUE other)
|
|
267
267
|
memcpy(new_array->list, self_array->list, sizeof(lxb_dom_node_t *) * self_array->length);
|
268
268
|
new_array->length = self_array->length;
|
269
269
|
|
270
|
-
for (
|
270
|
+
for (size_t i = 0; i < other_array->length; i++)
|
271
271
|
{
|
272
272
|
lexbor_array_push_unique(new_array, other_array->list[i]);
|
273
273
|
}
|
@@ -275,16 +275,15 @@ nl_node_set_union(VALUE self, VALUE other)
|
|
275
275
|
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
276
276
|
}
|
277
277
|
|
278
|
-
static
|
279
|
-
nl_node_set_find(VALUE self, VALUE selector,
|
278
|
+
static void
|
279
|
+
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
|
280
280
|
{
|
281
|
-
|
282
|
-
lxb_dom_document_t *doc;
|
283
|
-
TypedData_Get_Struct(rb_doc, lxb_dom_document_t, &nl_document_type, doc);
|
281
|
+
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
284
282
|
if (doc == NULL)
|
285
283
|
{
|
286
284
|
rb_raise(rb_eRuntimeError, "Error getting document");
|
287
285
|
}
|
286
|
+
// Wrap direct children with a temporary fragment so that they can be searched
|
288
287
|
lxb_dom_document_fragment_t *frag = lxb_dom_document_fragment_interface_create(doc);
|
289
288
|
if (frag == NULL)
|
290
289
|
{
|
@@ -302,7 +301,7 @@ nl_node_set_find(VALUE self, VALUE selector, nl_node_find_f finder)
|
|
302
301
|
}
|
303
302
|
}
|
304
303
|
// Backup original node data and re-group them into a fragment
|
305
|
-
for (
|
304
|
+
for (size_t i = 0; i < array->length; i++)
|
306
305
|
{
|
307
306
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
308
307
|
lxb_dom_node_t *backup_node = malloc(sizeof(lxb_dom_node_t));
|
@@ -320,29 +319,51 @@ nl_node_set_find(VALUE self, VALUE selector, nl_node_find_f finder)
|
|
320
319
|
}
|
321
320
|
VALUE rb_frag = nl_rb_node_create(&frag->node, nl_rb_document_get(self));
|
322
321
|
|
323
|
-
|
322
|
+
nl_node_find(rb_frag, selector, cb, ctx);
|
324
323
|
|
325
324
|
lxb_dom_document_fragment_interface_destroy(frag);
|
326
325
|
// Restore original node data
|
327
|
-
for (
|
326
|
+
for (size_t i = 0; i < array->length; i++)
|
328
327
|
{
|
329
328
|
memcpy(array->list[i], backup_array->list[i], sizeof(lxb_dom_node_t));
|
330
329
|
free(backup_array->list[i]);
|
331
330
|
}
|
332
331
|
lexbor_array_destroy(backup_array, true);
|
333
|
-
return ret;
|
334
332
|
}
|
335
333
|
|
336
334
|
static VALUE
|
337
335
|
nl_node_set_at_css(VALUE self, VALUE selector)
|
338
336
|
{
|
339
|
-
|
337
|
+
lexbor_array_t *array = lexbor_array_create();
|
338
|
+
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
339
|
+
|
340
|
+
nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
341
|
+
|
342
|
+
if (array->length == 0)
|
343
|
+
{
|
344
|
+
return Qnil;
|
345
|
+
}
|
346
|
+
|
347
|
+
sort_nodes_if_necessary(selector, doc, array);
|
348
|
+
|
349
|
+
VALUE ret = nl_rb_node_create(array->list[0], nl_rb_document_get(self));
|
350
|
+
|
351
|
+
lexbor_array_destroy(array, true);
|
352
|
+
|
353
|
+
return ret;
|
340
354
|
}
|
341
355
|
|
342
356
|
static VALUE
|
343
357
|
nl_node_set_css(VALUE self, VALUE selector)
|
344
358
|
{
|
345
|
-
|
359
|
+
lexbor_array_t *array = lexbor_array_create();
|
360
|
+
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
361
|
+
|
362
|
+
nl_node_set_find(self, selector, nl_node_css_callback, array);
|
363
|
+
|
364
|
+
sort_nodes_if_necessary(selector, doc, array);
|
365
|
+
|
366
|
+
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
346
367
|
}
|
347
368
|
|
348
369
|
void Init_nl_node_set(void)
|
@@ -7,9 +7,13 @@
|
|
7
7
|
#include "libxml/xpathInternals.h"
|
8
8
|
#include "libxml/parserInternals.h"
|
9
9
|
|
10
|
+
#define RBSTR_OR_QNIL(_str) (_str ? rb_utf8_str_new_cstr(_str) : Qnil)
|
11
|
+
|
10
12
|
extern VALUE mNokolexbor;
|
11
13
|
extern VALUE cNokolexborNodeSet;
|
12
|
-
VALUE
|
14
|
+
VALUE cNokolexborXpathContext;
|
15
|
+
VALUE mNokolexborXpath;
|
16
|
+
VALUE cNokolexborXpathSyntaxError;
|
13
17
|
|
14
18
|
static void
|
15
19
|
free_xml_xpath_context(xmlXPathContextPtr ctx)
|
@@ -24,7 +28,7 @@ free_xml_xpath_context(xmlXPathContextPtr ctx)
|
|
24
28
|
* Register the namespace with +prefix+ and +uri+.
|
25
29
|
*/
|
26
30
|
static VALUE
|
27
|
-
|
31
|
+
nl_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri)
|
28
32
|
{
|
29
33
|
xmlXPathContextPtr ctx;
|
30
34
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
@@ -42,7 +46,7 @@ rb_xml_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri)
|
|
42
46
|
* Register the variable +name+ with +value+.
|
43
47
|
*/
|
44
48
|
static VALUE
|
45
|
-
|
49
|
+
nl_xpath_context_register_variable(VALUE self, VALUE name, VALUE value)
|
46
50
|
{
|
47
51
|
xmlXPathContextPtr ctx;
|
48
52
|
xmlXPathObjectPtr xmlValue;
|
@@ -69,7 +73,7 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
69
73
|
switch (c_xpath_object->type)
|
70
74
|
{
|
71
75
|
case XPATH_STRING:
|
72
|
-
rb_retval = rb_utf8_str_new_cstr(c_xpath_object->stringval);
|
76
|
+
rb_retval = rb_utf8_str_new_cstr((const char *)c_xpath_object->stringval);
|
73
77
|
xmlFree(c_xpath_object->stringval);
|
74
78
|
return rb_retval;
|
75
79
|
|
@@ -106,6 +110,60 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
106
110
|
}
|
107
111
|
}
|
108
112
|
|
113
|
+
static VALUE
|
114
|
+
nl_xpath_wrap_syntax_error(xmlErrorPtr error)
|
115
|
+
{
|
116
|
+
VALUE msg, e;
|
117
|
+
|
118
|
+
msg = (error && error->message) ? rb_utf8_str_new_cstr(error->message) : Qnil;
|
119
|
+
|
120
|
+
e = rb_class_new_instance(
|
121
|
+
1,
|
122
|
+
&msg,
|
123
|
+
cNokolexborXpathSyntaxError);
|
124
|
+
|
125
|
+
if (error)
|
126
|
+
{
|
127
|
+
rb_iv_set(e, "@domain", INT2NUM(error->domain));
|
128
|
+
rb_iv_set(e, "@code", INT2NUM(error->code));
|
129
|
+
rb_iv_set(e, "@level", INT2NUM((short)error->level));
|
130
|
+
rb_iv_set(e, "@file", RBSTR_OR_QNIL(error->file));
|
131
|
+
rb_iv_set(e, "@line", INT2NUM(error->line));
|
132
|
+
rb_iv_set(e, "@str1", RBSTR_OR_QNIL(error->str1));
|
133
|
+
rb_iv_set(e, "@str2", RBSTR_OR_QNIL(error->str2));
|
134
|
+
rb_iv_set(e, "@str3", RBSTR_OR_QNIL(error->str3));
|
135
|
+
rb_iv_set(e, "@int1", INT2NUM(error->int1));
|
136
|
+
rb_iv_set(e, "@column", INT2NUM(error->int2));
|
137
|
+
}
|
138
|
+
|
139
|
+
return e;
|
140
|
+
}
|
141
|
+
|
142
|
+
static void nl_xpath_error_array_pusher(void *ctx, xmlErrorPtr error)
|
143
|
+
{
|
144
|
+
VALUE list = (VALUE)ctx;
|
145
|
+
Check_Type(list, T_ARRAY);
|
146
|
+
rb_ary_push(list, nl_xpath_wrap_syntax_error(error));
|
147
|
+
}
|
148
|
+
|
149
|
+
static void
|
150
|
+
nl_xpath_generic_exception_pusher(void *ctx, const char *msg, ...)
|
151
|
+
{
|
152
|
+
VALUE rb_errors = (VALUE)ctx;
|
153
|
+
VALUE rb_message;
|
154
|
+
VALUE rb_exception;
|
155
|
+
|
156
|
+
Check_Type(rb_errors, T_ARRAY);
|
157
|
+
|
158
|
+
va_list args;
|
159
|
+
va_start(args, msg);
|
160
|
+
rb_message = rb_vsprintf(msg, args);
|
161
|
+
va_end(args);
|
162
|
+
|
163
|
+
rb_exception = rb_exc_new_str(cNokolexborXpathSyntaxError, rb_message);
|
164
|
+
rb_ary_push(rb_errors, rb_exception);
|
165
|
+
}
|
166
|
+
|
109
167
|
/*
|
110
168
|
* call-seq:
|
111
169
|
* evaluate(search_path, handler = nil)
|
@@ -113,7 +171,7 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
113
171
|
* Evaluate the +search_path+ returning an XML::XPath object.
|
114
172
|
*/
|
115
173
|
static VALUE
|
116
|
-
|
174
|
+
nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
117
175
|
{
|
118
176
|
VALUE search_path, xpath_handler;
|
119
177
|
VALUE retval = Qnil;
|
@@ -137,13 +195,13 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
137
195
|
// xmlXPathRegisterFuncLookup(ctx, handler_lookup, (void *)xpath_handler);
|
138
196
|
// }
|
139
197
|
|
140
|
-
|
141
|
-
|
198
|
+
xmlSetStructuredErrorFunc((void *)errors, nl_xpath_error_array_pusher);
|
199
|
+
xmlSetGenericErrorFunc((void *)errors, nl_xpath_generic_exception_pusher);
|
142
200
|
|
143
201
|
xpath = xmlXPathEvalExpression(query, ctx);
|
144
202
|
|
145
|
-
|
146
|
-
|
203
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
204
|
+
xmlSetGenericErrorFunc(NULL, NULL);
|
147
205
|
|
148
206
|
if (xpath == NULL)
|
149
207
|
{
|
@@ -156,7 +214,7 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
156
214
|
retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
|
157
215
|
}
|
158
216
|
|
159
|
-
|
217
|
+
xmlXPathFreeNodeSetList(xpath);
|
160
218
|
|
161
219
|
return retval;
|
162
220
|
}
|
@@ -168,7 +226,7 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
168
226
|
* Create a new XPathContext with +node+ as the reference point.
|
169
227
|
*/
|
170
228
|
static VALUE
|
171
|
-
|
229
|
+
nl_xpath_context_new(VALUE klass, VALUE rb_node)
|
172
230
|
{
|
173
231
|
xmlXPathContextPtr ctx;
|
174
232
|
VALUE self;
|
@@ -188,13 +246,15 @@ void Init_nl_xpath_context(void)
|
|
188
246
|
{
|
189
247
|
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
190
248
|
|
191
|
-
|
249
|
+
cNokolexborXpathContext = rb_define_class_under(mNokolexbor, "XPathContext", rb_cObject);
|
250
|
+
mNokolexborXpath = rb_define_module_under(mNokolexbor, "XPath");
|
251
|
+
cNokolexborXpathSyntaxError = rb_define_class_under(mNokolexborXpath, "SyntaxError", rb_eStandardError);
|
192
252
|
|
193
|
-
rb_undef_alloc_func(
|
253
|
+
rb_undef_alloc_func(cNokolexborXpathContext);
|
194
254
|
|
195
|
-
rb_define_singleton_method(
|
255
|
+
rb_define_singleton_method(cNokolexborXpathContext, "new", nl_xpath_context_new, 1);
|
196
256
|
|
197
|
-
rb_define_method(
|
198
|
-
rb_define_method(
|
199
|
-
rb_define_method(
|
257
|
+
rb_define_method(cNokolexborXpathContext, "evaluate", nl_xpath_context_evaluate, -1);
|
258
|
+
rb_define_method(cNokolexborXpathContext, "register_variable", nl_xpath_context_register_variable, 2);
|
259
|
+
rb_define_method(cNokolexborXpathContext, "register_ns", nl_xpath_context_register_ns, 2);
|
200
260
|
}
|
data/ext/nokolexbor/nokolexbor.h
CHANGED
@@ -28,6 +28,8 @@ lxb_inline VALUE nl_rb_document_get(VALUE rb_node_or_doc)
|
|
28
28
|
return rb_iv_get(rb_node_or_doc, "@document");
|
29
29
|
}
|
30
30
|
|
31
|
+
lxb_dom_document_t * nl_rb_document_unwrap(VALUE rb_doc);
|
32
|
+
|
31
33
|
const lxb_char_t *
|
32
34
|
lxb_dom_node_name_qualified(lxb_dom_node_t *node, size_t *len);
|
33
35
|
|
data/ext/nokolexbor/xml_error.c
CHANGED
@@ -3,12 +3,38 @@
|
|
3
3
|
#include <stdarg.h>
|
4
4
|
#include "libxml/xmlerror.h"
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
#define XML_GET_VAR_STR(msg, str) { \
|
7
|
+
int size, prev_size = -1; \
|
8
|
+
int chars; \
|
9
|
+
char *larger; \
|
10
|
+
va_list ap; \
|
11
|
+
\
|
12
|
+
str = (char *) xmlMalloc(150); \
|
13
|
+
if (str != NULL) { \
|
14
|
+
\
|
15
|
+
size = 150; \
|
16
|
+
\
|
17
|
+
while (size < 64000) { \
|
18
|
+
va_start(ap, msg); \
|
19
|
+
chars = vsnprintf(str, size, msg, ap); \
|
20
|
+
va_end(ap); \
|
21
|
+
if ((chars > -1) && (chars < size)) { \
|
22
|
+
if (prev_size == chars) { \
|
23
|
+
break; \
|
24
|
+
} else { \
|
25
|
+
prev_size = chars; \
|
26
|
+
} \
|
27
|
+
} \
|
28
|
+
if (chars > -1) \
|
29
|
+
size += chars + 1; \
|
30
|
+
else \
|
31
|
+
size += 100; \
|
32
|
+
if ((larger = (char *) xmlRealloc(str, size)) == NULL) {\
|
33
|
+
break; \
|
34
|
+
} \
|
35
|
+
str = larger; \
|
36
|
+
}} \
|
37
|
+
}
|
12
38
|
|
13
39
|
/**
|
14
40
|
* xmlGenericErrorDefaultFunc:
|
@@ -20,22 +46,57 @@ void *_xmlGenericErrorContext = NULL;
|
|
20
46
|
*/
|
21
47
|
void XMLCDECL
|
22
48
|
xmlGenericErrorDefaultFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) {
|
23
|
-
|
49
|
+
}
|
24
50
|
|
25
|
-
|
26
|
-
|
51
|
+
/**
|
52
|
+
* xmlCopyError:
|
53
|
+
* @from: a source error
|
54
|
+
* @to: a target error
|
55
|
+
*
|
56
|
+
* Save the original error to the new place.
|
57
|
+
*
|
58
|
+
* Returns 0 in case of success and -1 in case of error.
|
59
|
+
*/
|
60
|
+
int
|
61
|
+
xmlCopyError(xmlErrorPtr from, xmlErrorPtr to) {
|
62
|
+
char *message, *file, *str1, *str2, *str3;
|
27
63
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
64
|
+
if ((from == NULL) || (to == NULL))
|
65
|
+
return(-1);
|
66
|
+
|
67
|
+
message = (char *) xmlStrdup((xmlChar *) from->message);
|
68
|
+
file = (char *) xmlStrdup ((xmlChar *) from->file);
|
69
|
+
str1 = (char *) xmlStrdup ((xmlChar *) from->str1);
|
70
|
+
str2 = (char *) xmlStrdup ((xmlChar *) from->str2);
|
71
|
+
str3 = (char *) xmlStrdup ((xmlChar *) from->str3);
|
32
72
|
|
33
|
-
|
73
|
+
if (to->message != NULL)
|
74
|
+
xmlFree(to->message);
|
75
|
+
if (to->file != NULL)
|
76
|
+
xmlFree(to->file);
|
77
|
+
if (to->str1 != NULL)
|
78
|
+
xmlFree(to->str1);
|
79
|
+
if (to->str2 != NULL)
|
80
|
+
xmlFree(to->str2);
|
81
|
+
if (to->str3 != NULL)
|
82
|
+
xmlFree(to->str3);
|
83
|
+
to->domain = from->domain;
|
84
|
+
to->code = from->code;
|
85
|
+
to->level = from->level;
|
86
|
+
to->line = from->line;
|
87
|
+
to->node = from->node;
|
88
|
+
to->int1 = from->int1;
|
89
|
+
to->int2 = from->int2;
|
90
|
+
to->node = from->node;
|
91
|
+
to->ctxt = from->ctxt;
|
92
|
+
to->message = message;
|
93
|
+
to->file = file;
|
94
|
+
to->str1 = str1;
|
95
|
+
to->str2 = str2;
|
96
|
+
to->str3 = str3;
|
34
97
|
|
35
|
-
|
36
|
-
|
37
|
-
// return (&_xmlGenericError);
|
38
|
-
// }
|
98
|
+
return 0;
|
99
|
+
}
|
39
100
|
|
40
101
|
/**
|
41
102
|
* __xmlRaiseError:
|
@@ -63,16 +124,112 @@ xmlGenericErrorDefaultFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) {
|
|
63
124
|
*/
|
64
125
|
void XMLCDECL
|
65
126
|
__xmlRaiseError(xmlStructuredErrorFunc schannel,
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
127
|
+
xmlGenericErrorFunc channel, void *data, void *ctx,
|
128
|
+
void *nod, int domain, int code, xmlErrorLevel level,
|
129
|
+
const char *file, int line, const char *str1,
|
130
|
+
const char *str2, const char *str3, int int1, int col,
|
131
|
+
const char *msg, ...)
|
71
132
|
{
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
133
|
+
xmlParserCtxtPtr ctxt = NULL;
|
134
|
+
lxb_dom_node_t_ptr node = (lxb_dom_node_t_ptr)nod;
|
135
|
+
char *str = NULL;
|
136
|
+
xmlParserInputPtr input = NULL;
|
137
|
+
xmlErrorPtr to = &xmlLastError;
|
138
|
+
lxb_dom_node_t_ptr baseptr = NULL;
|
139
|
+
|
140
|
+
if (code == XML_ERR_OK)
|
141
|
+
return;
|
142
|
+
if ((xmlGetWarningsDefaultValue == 0) && (level == XML_ERR_WARNING))
|
143
|
+
return;
|
144
|
+
if ((domain == XML_FROM_PARSER) || (domain == XML_FROM_HTML) ||
|
145
|
+
(domain == XML_FROM_DTD) || (domain == XML_FROM_NAMESPACE) ||
|
146
|
+
(domain == XML_FROM_IO) || (domain == XML_FROM_VALID))
|
147
|
+
{
|
148
|
+
ctxt = (xmlParserCtxtPtr)ctx;
|
149
|
+
if ((schannel == NULL) && (ctxt != NULL) && (ctxt->sax != NULL) &&
|
150
|
+
(ctxt->sax->initialized == XML_SAX2_MAGIC) &&
|
151
|
+
(ctxt->sax->serror != NULL))
|
152
|
+
{
|
153
|
+
schannel = ctxt->sax->serror;
|
154
|
+
data = ctxt->userData;
|
155
|
+
}
|
156
|
+
}
|
157
|
+
/*
|
158
|
+
* Check if structured error handler set
|
159
|
+
*/
|
160
|
+
if (schannel == NULL)
|
161
|
+
{
|
162
|
+
schannel = xmlStructuredError;
|
163
|
+
/*
|
164
|
+
* if user has defined handler, change data ptr to user's choice
|
165
|
+
*/
|
166
|
+
if (schannel != NULL)
|
167
|
+
data = xmlStructuredErrorContext;
|
168
|
+
}
|
169
|
+
/*
|
170
|
+
* Formatting the message
|
171
|
+
*/
|
172
|
+
if (msg == NULL)
|
173
|
+
{
|
174
|
+
str = (char *)xmlStrdup(BAD_CAST "No error message provided");
|
175
|
+
}
|
176
|
+
else
|
177
|
+
{
|
178
|
+
XML_GET_VAR_STR(msg, str);
|
179
|
+
}
|
180
|
+
|
181
|
+
/*
|
182
|
+
* specific processing if a parser context is provided
|
183
|
+
*/
|
184
|
+
if (ctxt != NULL)
|
185
|
+
{
|
186
|
+
if (file == NULL)
|
187
|
+
{
|
188
|
+
input = ctxt->input;
|
189
|
+
if ((input != NULL) && (input->filename == NULL) &&
|
190
|
+
(ctxt->inputNr > 1))
|
191
|
+
{
|
192
|
+
input = ctxt->inputTab[ctxt->inputNr - 2];
|
193
|
+
}
|
194
|
+
if (input != NULL)
|
195
|
+
{
|
196
|
+
file = input->filename;
|
197
|
+
line = input->line;
|
198
|
+
col = input->col;
|
199
|
+
}
|
200
|
+
}
|
201
|
+
to = &ctxt->lastError;
|
202
|
+
}
|
203
|
+
|
204
|
+
/*
|
205
|
+
* Save the information about the error
|
206
|
+
*/
|
207
|
+
xmlResetError(to);
|
208
|
+
to->domain = domain;
|
209
|
+
to->code = code;
|
210
|
+
to->message = str;
|
211
|
+
to->level = level;
|
212
|
+
if (file != NULL)
|
213
|
+
to->file = (char *)xmlStrdup((const xmlChar *)file);
|
214
|
+
to->line = line;
|
215
|
+
if (str1 != NULL)
|
216
|
+
to->str1 = (char *)xmlStrdup((const xmlChar *)str1);
|
217
|
+
if (str2 != NULL)
|
218
|
+
to->str2 = (char *)xmlStrdup((const xmlChar *)str2);
|
219
|
+
if (str3 != NULL)
|
220
|
+
to->str3 = (char *)xmlStrdup((const xmlChar *)str3);
|
221
|
+
to->int1 = int1;
|
222
|
+
to->int2 = col;
|
223
|
+
to->node = node;
|
224
|
+
to->ctxt = ctx;
|
225
|
+
|
226
|
+
if (to != &xmlLastError)
|
227
|
+
xmlCopyError(to, &xmlLastError);
|
228
|
+
|
229
|
+
if (schannel != NULL)
|
230
|
+
{
|
231
|
+
schannel(data, to);
|
232
|
+
}
|
76
233
|
}
|
77
234
|
|
78
235
|
/**
|
@@ -131,4 +288,45 @@ __xmlSimpleError(int domain, int code, lxb_dom_node_t_ptr node,
|
|
131
288
|
code, XML_ERR_ERROR, NULL, 0, extra,
|
132
289
|
NULL, NULL, 0, 0, msg, extra);
|
133
290
|
}
|
291
|
+
}
|
292
|
+
|
293
|
+
/**
|
294
|
+
* xmlSetGenericErrorFunc:
|
295
|
+
* @ctx: the new error handling context
|
296
|
+
* @handler: the new handler function
|
297
|
+
*
|
298
|
+
* Function to reset the handler and the error context for out of
|
299
|
+
* context error messages.
|
300
|
+
* This simply means that @handler will be called for subsequent
|
301
|
+
* error messages while not parsing nor validating. And @ctx will
|
302
|
+
* be passed as first argument to @handler
|
303
|
+
* One can simply force messages to be emitted to another FILE * than
|
304
|
+
* stderr by setting @ctx to this file handle and @handler to NULL.
|
305
|
+
* For multi-threaded applications, this must be set separately for each thread.
|
306
|
+
*/
|
307
|
+
void
|
308
|
+
xmlSetGenericErrorFunc(void *ctx, xmlGenericErrorFunc handler) {
|
309
|
+
xmlGenericErrorContext = ctx;
|
310
|
+
if (handler != NULL)
|
311
|
+
xmlGenericError = handler;
|
312
|
+
else
|
313
|
+
xmlGenericError = xmlGenericErrorDefaultFunc;
|
314
|
+
}
|
315
|
+
|
316
|
+
/**
|
317
|
+
* xmlSetStructuredErrorFunc:
|
318
|
+
* @ctx: the new error handling context
|
319
|
+
* @handler: the new handler function
|
320
|
+
*
|
321
|
+
* Function to reset the handler and the error context for out of
|
322
|
+
* context structured error messages.
|
323
|
+
* This simply means that @handler will be called for subsequent
|
324
|
+
* error messages while not parsing nor validating. And @ctx will
|
325
|
+
* be passed as first argument to @handler
|
326
|
+
* For multi-threaded applications, this must be set separately for each thread.
|
327
|
+
*/
|
328
|
+
void
|
329
|
+
xmlSetStructuredErrorFunc(void *ctx, xmlStructuredErrorFunc handler) {
|
330
|
+
xmlStructuredErrorContext = ctx;
|
331
|
+
xmlStructuredError = handler;
|
134
332
|
}
|
data/lib/nokolexbor/node.rb
CHANGED
data/lib/nokolexbor/node_set.rb
CHANGED
data/lib/nokolexbor/version.rb
CHANGED
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokolexbor
|
4
|
+
module XPath
|
5
|
+
class SyntaxError < StandardError
|
6
|
+
attr_reader :domain
|
7
|
+
attr_reader :code
|
8
|
+
attr_reader :level
|
9
|
+
attr_reader :file
|
10
|
+
attr_reader :line
|
11
|
+
attr_reader :str1
|
12
|
+
attr_reader :str2
|
13
|
+
attr_reader :str3
|
14
|
+
attr_reader :int1
|
15
|
+
attr_reader :column
|
16
|
+
|
17
|
+
###
|
18
|
+
# return true if this is a non error
|
19
|
+
def none?
|
20
|
+
level == 0
|
21
|
+
end
|
22
|
+
|
23
|
+
###
|
24
|
+
# return true if this is a warning
|
25
|
+
def warning?
|
26
|
+
level == 1
|
27
|
+
end
|
28
|
+
|
29
|
+
###
|
30
|
+
# return true if this is an error
|
31
|
+
def error?
|
32
|
+
level == 2
|
33
|
+
end
|
34
|
+
|
35
|
+
###
|
36
|
+
# return true if this error is fatal
|
37
|
+
def fatal?
|
38
|
+
level == 3
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_s
|
42
|
+
message = super.chomp
|
43
|
+
[location_to_s, level_to_s, message]
|
44
|
+
.compact.join(": ")
|
45
|
+
.force_encoding(message.encoding)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def level_to_s
|
51
|
+
case level
|
52
|
+
when 3 then "FATAL"
|
53
|
+
when 2 then "ERROR"
|
54
|
+
when 1 then "WARNING"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def nil_or_zero?(attribute)
|
59
|
+
attribute.nil? || attribute.zero?
|
60
|
+
end
|
61
|
+
|
62
|
+
def location_to_s
|
63
|
+
return nil if nil_or_zero?(line) && nil_or_zero?(column)
|
64
|
+
|
65
|
+
"#{line}:#{column}"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/nokolexbor.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5.0'
|
27
41
|
description: Nokolexbor is a high performance HTML5 parser, with support for both
|
28
42
|
CSS selectors and XPath. It's API is designed to be compatible with Nokogiri.
|
29
43
|
email: zyc9012@gmail.com
|
@@ -105,6 +119,7 @@ files:
|
|
105
119
|
- lib/nokolexbor/node.rb
|
106
120
|
- lib/nokolexbor/node_set.rb
|
107
121
|
- lib/nokolexbor/version.rb
|
122
|
+
- lib/nokolexbor/xpath.rb
|
108
123
|
- lib/nokolexbor/xpath_context.rb
|
109
124
|
- patches/0001-lexbor-support-text-pseudo-element.patch
|
110
125
|
- patches/0002-lexbor-match-id-class-case-sensitive.patch
|
@@ -522,7 +537,7 @@ licenses:
|
|
522
537
|
- MIT
|
523
538
|
metadata:
|
524
539
|
msys2_mingw_dependencies: cmake
|
525
|
-
post_install_message:
|
540
|
+
post_install_message:
|
526
541
|
rdoc_options: []
|
527
542
|
require_paths:
|
528
543
|
- lib
|
@@ -537,8 +552,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
537
552
|
- !ruby/object:Gem::Version
|
538
553
|
version: '0'
|
539
554
|
requirements: []
|
540
|
-
rubygems_version: 3.1
|
541
|
-
signing_key:
|
555
|
+
rubygems_version: 3.0.3.1
|
556
|
+
signing_key:
|
542
557
|
specification_version: 4
|
543
558
|
summary: High performance HTML5 parser, with support for both CSS selectors and XPath.
|
544
559
|
test_files: []
|