nokolexbor 0.2.6 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/libxml/xpathInternals.h +4 -5
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +186 -173
- data/ext/nokolexbor/nl_node_set.c +35 -70
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +122 -26
- data/ext/nokolexbor/nokolexbor.c +10 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/ext/nokolexbor/xml_xpath.c +7 -0
- data/lib/nokolexbor/document.rb +96 -1
- data/lib/nokolexbor/node.rb +109 -1
- data/lib/nokolexbor/node_set.rb +23 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +5 -14
- data/patches/0002-lexbor-match-id-class-case-sensitive.patch +2 -2
- data/vendor/lexbor/source/lexbor/core/lexbor.h +8 -0
- data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/memory.c +24 -4
- data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/memory.c +24 -4
- data/vendor/lexbor/source/lexbor/selectors/selectors.c +1 -2
- metadata +7 -5
- data/ext/nokolexbor/memory.c +0 -46
@@ -55,8 +55,7 @@ nl_node_set_allocate(VALUE klass)
|
|
55
55
|
VALUE
|
56
56
|
nl_rb_node_set_create_with_data(lexbor_array_t *array, VALUE rb_document)
|
57
57
|
{
|
58
|
-
if (array == NULL)
|
59
|
-
{
|
58
|
+
if (array == NULL) {
|
60
59
|
array = lexbor_array_create();
|
61
60
|
}
|
62
61
|
VALUE ret = TypedData_Wrap_Struct(cNokolexborNodeSet, &nl_node_set_type, array);
|
@@ -77,8 +76,7 @@ nl_node_set_push(VALUE self, VALUE rb_node)
|
|
77
76
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
78
77
|
|
79
78
|
lxb_status_t status = lexbor_array_push_unique(array, node);
|
80
|
-
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED)
|
81
|
-
{
|
79
|
+
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED) {
|
82
80
|
nl_raise_lexbor_error(status);
|
83
81
|
}
|
84
82
|
|
@@ -93,13 +91,11 @@ nl_node_set_delete(VALUE self, VALUE rb_node)
|
|
93
91
|
|
94
92
|
size_t i;
|
95
93
|
for (i = 0; i < array->length; i++)
|
96
|
-
if (array->list[i] == node)
|
97
|
-
{
|
94
|
+
if (array->list[i] == node) {
|
98
95
|
break;
|
99
96
|
}
|
100
97
|
|
101
|
-
if (i >= array->length)
|
102
|
-
{
|
98
|
+
if (i >= array->length) {
|
103
99
|
// not found
|
104
100
|
return Qnil;
|
105
101
|
}
|
@@ -114,8 +110,7 @@ nl_node_set_is_include(VALUE self, VALUE rb_node)
|
|
114
110
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
115
111
|
|
116
112
|
for (size_t i = 0; i < array->length; i++)
|
117
|
-
if (array->list[i] == node)
|
118
|
-
{
|
113
|
+
if (array->list[i] == node) {
|
119
114
|
return Qtrue;
|
120
115
|
}
|
121
116
|
|
@@ -126,13 +121,11 @@ static VALUE
|
|
126
121
|
nl_node_set_index_at(VALUE self, long offset)
|
127
122
|
{
|
128
123
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
129
|
-
if (offset >= (long)array->length || abs((int)offset) > (long)array->length)
|
130
|
-
{
|
124
|
+
if (offset >= (long)array->length || abs((int)offset) > (long)array->length) {
|
131
125
|
return Qnil;
|
132
126
|
}
|
133
127
|
|
134
|
-
if (offset < 0)
|
135
|
-
{
|
128
|
+
if (offset < 0) {
|
136
129
|
offset += array->length;
|
137
130
|
}
|
138
131
|
|
@@ -145,35 +138,28 @@ nl_node_set_subseq(VALUE self, long beg, long len)
|
|
145
138
|
{
|
146
139
|
lexbor_array_t *old_array = nl_rb_node_set_unwrap(self);
|
147
140
|
|
148
|
-
if (beg > (long)old_array->length)
|
149
|
-
{
|
141
|
+
if (beg > (long)old_array->length) {
|
150
142
|
return Qnil;
|
151
143
|
}
|
152
|
-
if (beg < 0 || len < 0)
|
153
|
-
{
|
144
|
+
if (beg < 0 || len < 0) {
|
154
145
|
return Qnil;
|
155
146
|
}
|
156
147
|
|
157
|
-
if ((beg + len) > (long)old_array->length)
|
158
|
-
{
|
148
|
+
if ((beg + len) > (long)old_array->length) {
|
159
149
|
len = old_array->length - beg;
|
160
150
|
}
|
161
151
|
|
162
152
|
lexbor_array_t *new_array = lexbor_array_create();
|
163
|
-
if (len > 0)
|
164
|
-
{
|
153
|
+
if (len > 0) {
|
165
154
|
lxb_status_t status = lexbor_array_init(new_array, len);
|
166
|
-
if (status != LXB_STATUS_OK)
|
167
|
-
{
|
155
|
+
if (status != LXB_STATUS_OK) {
|
168
156
|
nl_raise_lexbor_error(status);
|
169
157
|
}
|
170
158
|
}
|
171
159
|
|
172
|
-
for (long j = beg; j < beg + len; ++j)
|
173
|
-
{
|
160
|
+
for (long j = beg; j < beg + len; ++j) {
|
174
161
|
lxb_status_t status = lexbor_array_push(new_array, old_array->list[j]);
|
175
|
-
if (status != LXB_STATUS_OK)
|
176
|
-
{
|
162
|
+
if (status != LXB_STATUS_OK) {
|
177
163
|
nl_raise_lexbor_error(status);
|
178
164
|
}
|
179
165
|
}
|
@@ -188,31 +174,26 @@ nl_node_set_slice(int argc, VALUE *argv, VALUE self)
|
|
188
174
|
|
189
175
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
190
176
|
|
191
|
-
if (argc == 2)
|
192
|
-
{
|
177
|
+
if (argc == 2) {
|
193
178
|
beg = NUM2LONG(argv[0]);
|
194
179
|
len = NUM2LONG(argv[1]);
|
195
|
-
if (beg < 0)
|
196
|
-
{
|
180
|
+
if (beg < 0) {
|
197
181
|
beg += array->length;
|
198
182
|
}
|
199
183
|
return nl_node_set_subseq(self, beg, len);
|
200
184
|
}
|
201
185
|
|
202
|
-
if (argc != 1)
|
203
|
-
{
|
186
|
+
if (argc != 1) {
|
204
187
|
rb_scan_args(argc, argv, "11", NULL, NULL);
|
205
188
|
}
|
206
189
|
arg = argv[0];
|
207
190
|
|
208
|
-
if (FIXNUM_P(arg))
|
209
|
-
{
|
191
|
+
if (FIXNUM_P(arg)) {
|
210
192
|
return nl_node_set_index_at(self, FIX2LONG(arg));
|
211
193
|
}
|
212
194
|
|
213
195
|
/* if arg is Range */
|
214
|
-
switch (rb_range_beg_len(arg, &beg, &len, array->length, 0))
|
215
|
-
{
|
196
|
+
switch (rb_range_beg_len(arg, &beg, &len, array->length, 0)) {
|
216
197
|
case Qfalse:
|
217
198
|
break;
|
218
199
|
case Qnil:
|
@@ -231,8 +212,7 @@ nl_node_set_to_array(VALUE self)
|
|
231
212
|
|
232
213
|
VALUE list = rb_ary_new2(array->length);
|
233
214
|
VALUE doc = nl_rb_document_get(self);
|
234
|
-
for (size_t i = 0; i < array->length; i++)
|
235
|
-
{
|
215
|
+
for (size_t i = 0; i < array->length; i++) {
|
236
216
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
237
217
|
VALUE rb_node = nl_rb_node_create(node, doc);
|
238
218
|
rb_ary_push(list, rb_node);
|
@@ -244,31 +224,27 @@ nl_node_set_to_array(VALUE self)
|
|
244
224
|
static VALUE
|
245
225
|
nl_node_set_union(VALUE self, VALUE other)
|
246
226
|
{
|
247
|
-
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet))
|
248
|
-
{
|
227
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
|
249
228
|
rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
|
250
229
|
}
|
251
230
|
|
252
231
|
lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
|
253
232
|
lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
|
254
233
|
|
255
|
-
if (self_array->length + other_array->length == 0)
|
256
|
-
{
|
234
|
+
if (self_array->length + other_array->length == 0) {
|
257
235
|
return nl_rb_node_set_create_with_data(NULL, nl_rb_document_get(self));
|
258
236
|
}
|
259
237
|
|
260
238
|
lexbor_array_t *new_array = lexbor_array_create();
|
261
239
|
lxb_status_t status = lexbor_array_init(new_array, self_array->length + other_array->length);
|
262
|
-
if (status != LXB_STATUS_OK)
|
263
|
-
{
|
240
|
+
if (status != LXB_STATUS_OK) {
|
264
241
|
nl_raise_lexbor_error(status);
|
265
242
|
}
|
266
243
|
|
267
244
|
memcpy(new_array->list, self_array->list, sizeof(lxb_dom_node_t *) * self_array->length);
|
268
245
|
new_array->length = self_array->length;
|
269
246
|
|
270
|
-
for (size_t i = 0; i < other_array->length; i++)
|
271
|
-
{
|
247
|
+
for (size_t i = 0; i < other_array->length; i++) {
|
272
248
|
lexbor_array_push_unique(new_array, other_array->list[i]);
|
273
249
|
}
|
274
250
|
|
@@ -279,40 +255,33 @@ static lxb_status_t
|
|
279
255
|
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
280
256
|
{
|
281
257
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
282
|
-
if (doc == NULL)
|
283
|
-
{
|
258
|
+
if (doc == NULL) {
|
284
259
|
rb_raise(rb_eRuntimeError, "Error getting document");
|
285
260
|
}
|
286
261
|
// Wrap direct children with a temporary fragment so that they can be searched
|
287
262
|
lxb_dom_document_fragment_t *frag = lxb_dom_document_fragment_interface_create(doc);
|
288
|
-
if (frag == NULL)
|
289
|
-
{
|
263
|
+
if (frag == NULL) {
|
290
264
|
rb_raise(rb_eRuntimeError, "Error creating document fragment");
|
291
265
|
}
|
292
266
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
293
267
|
|
294
268
|
lexbor_array_t *backup_array = lexbor_array_create();
|
295
|
-
if (array->length > 0)
|
296
|
-
{
|
269
|
+
if (array->length > 0) {
|
297
270
|
lxb_status_t status = lexbor_array_init(backup_array, array->length);
|
298
|
-
if (status != LXB_STATUS_OK)
|
299
|
-
{
|
271
|
+
if (status != LXB_STATUS_OK) {
|
300
272
|
nl_raise_lexbor_error(status);
|
301
273
|
}
|
302
274
|
}
|
303
275
|
// Backup original node data and re-group them into a fragment
|
304
|
-
for (size_t i = 0; i < array->length; i++)
|
305
|
-
{
|
276
|
+
for (size_t i = 0; i < array->length; i++) {
|
306
277
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
307
278
|
lxb_dom_node_t *backup_node = malloc(sizeof(lxb_dom_node_t));
|
308
|
-
if (backup_node == NULL)
|
309
|
-
{
|
279
|
+
if (backup_node == NULL) {
|
310
280
|
nl_raise_lexbor_error(LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
311
281
|
}
|
312
282
|
memcpy(backup_node, node, sizeof(lxb_dom_node_t));
|
313
283
|
lxb_status_t status = lexbor_array_push(backup_array, backup_node);
|
314
|
-
if (status != LXB_STATUS_OK)
|
315
|
-
{
|
284
|
+
if (status != LXB_STATUS_OK) {
|
316
285
|
nl_raise_lexbor_error(LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
317
286
|
}
|
318
287
|
lxb_dom_node_insert_child(&frag->node, node);
|
@@ -323,8 +292,7 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
323
292
|
|
324
293
|
lxb_dom_document_fragment_interface_destroy(frag);
|
325
294
|
// Restore original node data
|
326
|
-
for (size_t i = 0; i < array->length; i++)
|
327
|
-
{
|
295
|
+
for (size_t i = 0; i < array->length; i++) {
|
328
296
|
memcpy(array->list[i], backup_array->list[i], sizeof(lxb_dom_node_t));
|
329
297
|
free(backup_array->list[i]);
|
330
298
|
}
|
@@ -341,14 +309,12 @@ nl_node_set_at_css(VALUE self, VALUE selector)
|
|
341
309
|
|
342
310
|
lxb_status_t status = nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
343
311
|
|
344
|
-
if (status != LXB_STATUS_OK)
|
345
|
-
{
|
312
|
+
if (status != LXB_STATUS_OK) {
|
346
313
|
lexbor_array_destroy(array, true);
|
347
314
|
nl_raise_lexbor_error(status);
|
348
315
|
}
|
349
316
|
|
350
|
-
if (array->length == 0)
|
351
|
-
{
|
317
|
+
if (array->length == 0) {
|
352
318
|
lexbor_array_destroy(array, true);
|
353
319
|
return Qnil;
|
354
320
|
}
|
@@ -369,8 +335,7 @@ nl_node_set_css(VALUE self, VALUE selector)
|
|
369
335
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
370
336
|
|
371
337
|
lxb_status_t status = nl_node_set_find(self, selector, nl_node_css_callback, array);
|
372
|
-
if (status != LXB_STATUS_OK)
|
373
|
-
{
|
338
|
+
if (status != LXB_STATUS_OK) {
|
374
339
|
lexbor_array_destroy(array, true);
|
375
340
|
nl_raise_lexbor_error(status);
|
376
341
|
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborText;
|
4
|
+
extern VALUE cNokolexborCharacterData;
|
5
|
+
extern VALUE mNokolexbor;
|
6
|
+
|
7
|
+
static VALUE
|
8
|
+
nl_text_new(int argc, VALUE *argv, VALUE klass)
|
9
|
+
{
|
10
|
+
lxb_dom_document_t *document;
|
11
|
+
VALUE rb_text;
|
12
|
+
VALUE rb_document;
|
13
|
+
VALUE rest;
|
14
|
+
|
15
|
+
rb_scan_args(argc, argv, "2*", &rb_text, &rb_document, &rest);
|
16
|
+
|
17
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
18
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
19
|
+
}
|
20
|
+
|
21
|
+
document = nl_rb_document_unwrap(rb_document);
|
22
|
+
|
23
|
+
const char* c_text = StringValuePtr(rb_text);
|
24
|
+
size_t text_len = RSTRING_LEN(rb_text);
|
25
|
+
lxb_dom_text_t *element = lxb_dom_document_create_text_node(document, (const lxb_char_t *)c_text, text_len);
|
26
|
+
if (element == NULL) {
|
27
|
+
rb_raise(rb_eRuntimeError, "Error creating text node");
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE rb_node = nl_rb_node_create(&element->char_data.node, rb_document);
|
31
|
+
|
32
|
+
if (rb_block_given_p()) {
|
33
|
+
rb_yield(rb_node);
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_node;
|
37
|
+
}
|
38
|
+
|
39
|
+
void Init_nl_text(void)
|
40
|
+
{
|
41
|
+
cNokolexborText = rb_define_class_under(mNokolexbor, "Text", cNokolexborCharacterData);
|
42
|
+
|
43
|
+
rb_define_singleton_method(cNokolexborText, "new", nl_text_new, -1);
|
44
|
+
}
|
@@ -1,11 +1,11 @@
|
|
1
|
-
#include <ruby.h>
|
2
|
-
#include <ruby/util.h>
|
3
|
-
#include "nokolexbor.h"
|
4
1
|
#include "libxml.h"
|
5
2
|
#include "libxml/globals.h"
|
3
|
+
#include "libxml/parserInternals.h"
|
6
4
|
#include "libxml/xpath.h"
|
7
5
|
#include "libxml/xpathInternals.h"
|
8
|
-
#include "
|
6
|
+
#include "nokolexbor.h"
|
7
|
+
#include <ruby.h>
|
8
|
+
#include <ruby/util.h>
|
9
9
|
|
10
10
|
#define RBSTR_OR_QNIL(_str) (_str ? rb_utf8_str_new_cstr(_str) : Qnil)
|
11
11
|
|
@@ -15,12 +15,110 @@ VALUE cNokolexborXpathContext;
|
|
15
15
|
VALUE mNokolexborXpath;
|
16
16
|
VALUE cNokolexborXpathSyntaxError;
|
17
17
|
|
18
|
+
static const xmlChar *NOKOGIRI_PREFIX = (const xmlChar *)"nokogiri";
|
19
|
+
static const xmlChar *NOKOGIRI_URI = (const xmlChar *)"http://www.nokogiri.org/default_ns/ruby/extensions_functions";
|
20
|
+
static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
|
21
|
+
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
|
22
|
+
|
18
23
|
static void
|
19
24
|
free_xml_xpath_context(xmlXPathContextPtr ctx)
|
20
25
|
{
|
21
26
|
nl_xmlXPathFreeContext(ctx);
|
22
27
|
}
|
23
28
|
|
29
|
+
/* find a CSS class in an HTML element's `class` attribute */
|
30
|
+
static const xmlChar *
|
31
|
+
builtin_css_class(const xmlChar *str, const xmlChar *val)
|
32
|
+
{
|
33
|
+
int val_len;
|
34
|
+
|
35
|
+
if (str == NULL) {
|
36
|
+
return (NULL);
|
37
|
+
}
|
38
|
+
if (val == NULL) {
|
39
|
+
return (NULL);
|
40
|
+
}
|
41
|
+
|
42
|
+
val_len = nl_xmlStrlen(val);
|
43
|
+
if (val_len == 0) {
|
44
|
+
return (str);
|
45
|
+
}
|
46
|
+
|
47
|
+
while (*str != 0) {
|
48
|
+
if ((*str == *val) && !nl_xmlStrncmp(str, val, val_len)) {
|
49
|
+
const xmlChar *next_byte = str + val_len;
|
50
|
+
|
51
|
+
/* only match if the next byte is whitespace or end of string */
|
52
|
+
if ((*next_byte == 0) || (IS_BLANK_CH(*next_byte))) {
|
53
|
+
return ((const xmlChar *)str);
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
/* advance str to whitespace */
|
58
|
+
while ((*str != 0) && !IS_BLANK_CH(*str)) {
|
59
|
+
str++;
|
60
|
+
}
|
61
|
+
|
62
|
+
/* advance str to start of next word or end of string */
|
63
|
+
while ((*str != 0) && IS_BLANK_CH(*str)) {
|
64
|
+
str++;
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
return (NULL);
|
69
|
+
}
|
70
|
+
|
71
|
+
/* xmlXPathFunction to wrap builtin_css_class() */
|
72
|
+
static void
|
73
|
+
xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
74
|
+
{
|
75
|
+
xmlXPathObjectPtr hay, needle;
|
76
|
+
|
77
|
+
CHECK_ARITY(2);
|
78
|
+
|
79
|
+
CAST_TO_STRING;
|
80
|
+
needle = nl_xmlXPathValuePop(ctxt);
|
81
|
+
if ((needle == NULL) || (needle->type != XPATH_STRING)) {
|
82
|
+
nl_xmlXPathFreeObject(needle);
|
83
|
+
XP_ERROR(XPATH_INVALID_TYPE);
|
84
|
+
}
|
85
|
+
|
86
|
+
CAST_TO_STRING;
|
87
|
+
hay = nl_xmlXPathValuePop(ctxt);
|
88
|
+
if ((hay == NULL) || (hay->type != XPATH_STRING)) {
|
89
|
+
nl_xmlXPathFreeObject(hay);
|
90
|
+
nl_xmlXPathFreeObject(needle);
|
91
|
+
XP_ERROR(XPATH_INVALID_TYPE);
|
92
|
+
}
|
93
|
+
|
94
|
+
if (builtin_css_class(hay->stringval, needle->stringval)) {
|
95
|
+
nl_xmlXPathValuePush(ctxt, nl_xmlXPathNewBoolean(1));
|
96
|
+
} else {
|
97
|
+
nl_xmlXPathValuePush(ctxt, nl_xmlXPathNewBoolean(0));
|
98
|
+
}
|
99
|
+
|
100
|
+
nl_xmlXPathFreeObject(hay);
|
101
|
+
nl_xmlXPathFreeObject(needle);
|
102
|
+
}
|
103
|
+
|
104
|
+
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
|
105
|
+
static void
|
106
|
+
xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
107
|
+
{
|
108
|
+
xmlXPathObjectPtr element_name;
|
109
|
+
size_t tmp_len;
|
110
|
+
|
111
|
+
CHECK_ARITY(1);
|
112
|
+
CAST_TO_STRING;
|
113
|
+
CHECK_TYPE(XPATH_STRING);
|
114
|
+
element_name = nl_xmlXPathValuePop(ctxt);
|
115
|
+
|
116
|
+
const lxb_char_t *node_name = lxb_dom_node_name_qualified(ctxt->context->node, &tmp_len);
|
117
|
+
nl_xmlXPathValuePush(ctxt, nl_xmlXPathNewBoolean(nl_xmlStrEqual((xmlChar *)node_name, element_name->stringval)));
|
118
|
+
|
119
|
+
nl_xmlXPathFreeObject(element_name);
|
120
|
+
}
|
121
|
+
|
24
122
|
/*
|
25
123
|
* call-seq:
|
26
124
|
* register_ns(prefix, uri)
|
@@ -34,8 +132,8 @@ nl_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri)
|
|
34
132
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
35
133
|
|
36
134
|
nl_xmlXPathRegisterNs(ctx,
|
37
|
-
|
38
|
-
|
135
|
+
(const xmlChar *)StringValueCStr(prefix),
|
136
|
+
(const xmlChar *)StringValueCStr(uri));
|
39
137
|
return self;
|
40
138
|
}
|
41
139
|
|
@@ -55,8 +153,8 @@ nl_xpath_context_register_variable(VALUE self, VALUE name, VALUE value)
|
|
55
153
|
xmlValue = nl_xmlXPathNewCString(StringValueCStr(value));
|
56
154
|
|
57
155
|
nl_xmlXPathRegisterVariable(ctx,
|
58
|
-
|
59
|
-
|
156
|
+
(const xmlChar *)StringValueCStr(name),
|
157
|
+
xmlValue);
|
60
158
|
|
61
159
|
return self;
|
62
160
|
}
|
@@ -70,28 +168,23 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
70
168
|
{
|
71
169
|
VALUE rb_retval;
|
72
170
|
|
73
|
-
switch (c_xpath_object->type)
|
74
|
-
{
|
171
|
+
switch (c_xpath_object->type) {
|
75
172
|
case XPATH_STRING:
|
76
173
|
rb_retval = rb_utf8_str_new_cstr((const char *)c_xpath_object->stringval);
|
77
174
|
nl_xmlFree(c_xpath_object->stringval);
|
78
175
|
return rb_retval;
|
79
176
|
|
80
|
-
case XPATH_NODESET:
|
81
|
-
|
82
|
-
if (c_xpath_object->nodesetval == NULL)
|
83
|
-
{
|
177
|
+
case XPATH_NODESET: {
|
178
|
+
if (c_xpath_object->nodesetval == NULL) {
|
84
179
|
return nl_rb_node_set_create_with_data(NULL, rb_document);
|
85
180
|
}
|
86
|
-
if (c_xpath_object->nodesetval->nodeNr == 0)
|
87
|
-
{
|
181
|
+
if (c_xpath_object->nodesetval->nodeNr == 0) {
|
88
182
|
return nl_rb_node_set_create_with_data(NULL, rb_document);
|
89
183
|
}
|
90
184
|
|
91
185
|
lexbor_array_t *array = lexbor_array_create();
|
92
186
|
lxb_status_t status = lexbor_array_init(array, c_xpath_object->nodesetval->nodeNr);
|
93
|
-
if (status != LXB_STATUS_OK)
|
94
|
-
{
|
187
|
+
if (status != LXB_STATUS_OK) {
|
95
188
|
nl_raise_lexbor_error(status);
|
96
189
|
}
|
97
190
|
memcpy(array->list, c_xpath_object->nodesetval->nodeTab, sizeof(lxb_dom_node_t *) * c_xpath_object->nodesetval->nodeNr);
|
@@ -122,8 +215,7 @@ nl_xpath_wrap_syntax_error(xmlErrorPtr error)
|
|
122
215
|
&msg,
|
123
216
|
cNokolexborXpathSyntaxError);
|
124
217
|
|
125
|
-
if (error)
|
126
|
-
{
|
218
|
+
if (error) {
|
127
219
|
rb_iv_set(e, "@domain", INT2NUM(error->domain));
|
128
220
|
rb_iv_set(e, "@code", INT2NUM(error->code));
|
129
221
|
rb_iv_set(e, "@level", INT2NUM((short)error->level));
|
@@ -182,8 +274,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
182
274
|
|
183
275
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
184
276
|
|
185
|
-
if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1)
|
186
|
-
{
|
277
|
+
if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) {
|
187
278
|
xpath_handler = Qnil;
|
188
279
|
}
|
189
280
|
|
@@ -203,15 +294,13 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
203
294
|
nl_xmlSetStructuredErrorFunc(NULL, NULL);
|
204
295
|
nl_xmlSetGenericErrorFunc(NULL, NULL);
|
205
296
|
|
206
|
-
if (xpath == NULL)
|
207
|
-
{
|
297
|
+
if (xpath == NULL) {
|
208
298
|
nl_xmlXPathFreeObject(xpath);
|
209
299
|
rb_exc_raise(rb_ary_entry(errors, 0));
|
210
300
|
}
|
211
301
|
|
212
302
|
retval = xpath2ruby(xpath, ctx, nl_rb_document_get(self));
|
213
|
-
if (retval == Qundef)
|
214
|
-
{
|
303
|
+
if (retval == Qundef) {
|
215
304
|
retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
|
216
305
|
}
|
217
306
|
|
@@ -237,6 +326,13 @@ nl_xpath_context_new(VALUE klass, VALUE rb_node)
|
|
237
326
|
ctx = nl_xmlXPathNewContext(node->owner_document);
|
238
327
|
ctx->node = node;
|
239
328
|
|
329
|
+
nl_xmlXPathRegisterNs(ctx, NOKOGIRI_PREFIX, NOKOGIRI_URI);
|
330
|
+
nl_xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
331
|
+
nl_xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
332
|
+
xpath_builtin_css_class);
|
333
|
+
nl_xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
334
|
+
xpath_builtin_local_name_is);
|
335
|
+
|
240
336
|
self = Data_Wrap_Struct(klass, 0, free_xml_xpath_context, ctx);
|
241
337
|
rb_iv_set(self, "@document", nl_rb_document_get(rb_node));
|
242
338
|
|
data/ext/nokolexbor/nokolexbor.c
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
VALUE mNokolexbor;
|
4
4
|
VALUE eLexborError;
|
5
|
+
VALUE eLexborSyntaxError;
|
5
6
|
|
6
7
|
void nl_raise_lexbor_error(lxb_status_t error)
|
7
8
|
{
|
8
|
-
switch (error)
|
9
|
-
{
|
9
|
+
switch (error) {
|
10
10
|
case LXB_STATUS_ERROR:
|
11
11
|
rb_raise(eLexborError, "LXB_STATUS_ERROR");
|
12
12
|
case LXB_STATUS_ERROR_MEMORY_ALLOCATION:
|
@@ -30,7 +30,7 @@ void nl_raise_lexbor_error(lxb_status_t error)
|
|
30
30
|
case LXB_STATUS_ERROR_UNEXPECTED_RESULT:
|
31
31
|
rb_raise(eLexborError, "LXB_STATUS_ERROR_UNEXPECTED_RESULT");
|
32
32
|
case LXB_STATUS_ERROR_UNEXPECTED_DATA:
|
33
|
-
rb_raise(
|
33
|
+
rb_raise(eLexborSyntaxError, "LXB_STATUS_ERROR_UNEXPECTED_DATA");
|
34
34
|
case LXB_STATUS_ERROR_OVERFLOW:
|
35
35
|
rb_raise(eLexborError, "LXB_STATUS_ERROR_OVERFLOW");
|
36
36
|
case LXB_STATUS_CONTINUE:
|
@@ -54,10 +54,17 @@ void nl_raise_lexbor_error(lxb_status_t error)
|
|
54
54
|
|
55
55
|
void Init_nokolexbor(void)
|
56
56
|
{
|
57
|
+
#ifndef NOKOLEXBOR_ASAN
|
58
|
+
lexbor_memory_setup(ruby_xmalloc, ruby_xrealloc, ruby_xcalloc, ruby_xfree);
|
59
|
+
#endif
|
57
60
|
mNokolexbor = rb_define_module("Nokolexbor");
|
58
61
|
eLexborError = rb_define_class_under(mNokolexbor, "LexborError", rb_eStandardError);
|
62
|
+
eLexborSyntaxError = rb_define_class_under(mNokolexbor, "LexborSyntaxError", eLexborError);
|
59
63
|
Init_nl_node();
|
60
64
|
Init_nl_document();
|
65
|
+
Init_nl_text();
|
66
|
+
Init_nl_comment();
|
67
|
+
Init_nl_cdata();
|
61
68
|
Init_nl_node_set();
|
62
69
|
Init_nl_xpath_context();
|
63
70
|
}
|
data/ext/nokolexbor/nokolexbor.h
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
5
|
|
6
|
-
#include <lexbor/html/html.h>
|
7
6
|
#include <lexbor/css/css.h>
|
7
|
+
#include <lexbor/html/html.h>
|
8
8
|
#include <lexbor/selectors/selectors.h>
|
9
9
|
|
10
10
|
extern VALUE cNokolexborDocument;
|
@@ -12,6 +12,9 @@ extern VALUE cNokolexborDocument;
|
|
12
12
|
void Init_nl_document(void);
|
13
13
|
void Init_nl_node(void);
|
14
14
|
void Init_nl_node_set(void);
|
15
|
+
void Init_nl_text(void);
|
16
|
+
void Init_nl_comment(void);
|
17
|
+
void Init_nl_cdata(void);
|
15
18
|
void Init_nl_xpath_context(void);
|
16
19
|
|
17
20
|
void nl_raise_lexbor_error(lxb_status_t error);
|
@@ -21,14 +24,13 @@ VALUE nl_rb_node_set_create_with_data(lexbor_array_t *array, VALUE rb_document);
|
|
21
24
|
|
22
25
|
lxb_inline VALUE nl_rb_document_get(VALUE rb_node_or_doc)
|
23
26
|
{
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
return rb_iv_get(rb_node_or_doc, "@document");
|
27
|
+
if (rb_obj_is_kind_of(rb_node_or_doc, cNokolexborDocument)) {
|
28
|
+
return rb_node_or_doc;
|
29
|
+
}
|
30
|
+
return rb_iv_get(rb_node_or_doc, "@document");
|
29
31
|
}
|
30
32
|
|
31
|
-
lxb_dom_document_t *
|
33
|
+
lxb_dom_document_t *nl_rb_document_unwrap(VALUE rb_doc);
|
32
34
|
|
33
35
|
const lxb_char_t *
|
34
36
|
lxb_dom_node_name_qualified(lxb_dom_node_t *node, size_t *len);
|
data/ext/nokolexbor/xml_xpath.c
CHANGED
@@ -2897,6 +2897,10 @@ valuePop(xmlXPathParserContextPtr ctxt)
|
|
2897
2897
|
ctxt->valueTab[ctxt->valueNr] = NULL;
|
2898
2898
|
return (ret);
|
2899
2899
|
}
|
2900
|
+
|
2901
|
+
xmlXPathObjectPtr
|
2902
|
+
nl_xmlXPathValuePop(xmlXPathParserContextPtr ctxt) { return valuePop(ctxt); }
|
2903
|
+
|
2900
2904
|
/**
|
2901
2905
|
* valuePush:
|
2902
2906
|
* @ctxt: an XPath evaluation context
|
@@ -2941,6 +2945,9 @@ valuePush(xmlXPathParserContextPtr ctxt, xmlXPathObjectPtr value)
|
|
2941
2945
|
return (ctxt->valueNr++);
|
2942
2946
|
}
|
2943
2947
|
|
2948
|
+
int
|
2949
|
+
nl_xmlXPathValuePush(xmlXPathParserContextPtr ctxt, xmlXPathObjectPtr value) { return valuePush(ctxt, value); }
|
2950
|
+
|
2944
2951
|
/**
|
2945
2952
|
* nl_xmlXPathPopBoolean:
|
2946
2953
|
* @ctxt: an XPath parser context
|