nokolexbor 0.2.6 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/libxml/xpathInternals.h +4 -5
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +186 -173
- data/ext/nokolexbor/nl_node_set.c +35 -70
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +122 -26
- data/ext/nokolexbor/nokolexbor.c +10 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/ext/nokolexbor/xml_xpath.c +7 -0
- data/lib/nokolexbor/document.rb +96 -1
- data/lib/nokolexbor/node.rb +109 -1
- data/lib/nokolexbor/node_set.rb +23 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +5 -14
- data/patches/0002-lexbor-match-id-class-case-sensitive.patch +2 -2
- data/vendor/lexbor/source/lexbor/core/lexbor.h +8 -0
- data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/memory.c +24 -4
- data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/memory.c +24 -4
- data/vendor/lexbor/source/lexbor/selectors/selectors.c +1 -2
- metadata +7 -5
- data/ext/nokolexbor/memory.c +0 -46
@@ -55,8 +55,7 @@ nl_node_set_allocate(VALUE klass)
|
|
55
55
|
VALUE
|
56
56
|
nl_rb_node_set_create_with_data(lexbor_array_t *array, VALUE rb_document)
|
57
57
|
{
|
58
|
-
if (array == NULL)
|
59
|
-
{
|
58
|
+
if (array == NULL) {
|
60
59
|
array = lexbor_array_create();
|
61
60
|
}
|
62
61
|
VALUE ret = TypedData_Wrap_Struct(cNokolexborNodeSet, &nl_node_set_type, array);
|
@@ -77,8 +76,7 @@ nl_node_set_push(VALUE self, VALUE rb_node)
|
|
77
76
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
78
77
|
|
79
78
|
lxb_status_t status = lexbor_array_push_unique(array, node);
|
80
|
-
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED)
|
81
|
-
{
|
79
|
+
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED) {
|
82
80
|
nl_raise_lexbor_error(status);
|
83
81
|
}
|
84
82
|
|
@@ -93,13 +91,11 @@ nl_node_set_delete(VALUE self, VALUE rb_node)
|
|
93
91
|
|
94
92
|
size_t i;
|
95
93
|
for (i = 0; i < array->length; i++)
|
96
|
-
if (array->list[i] == node)
|
97
|
-
{
|
94
|
+
if (array->list[i] == node) {
|
98
95
|
break;
|
99
96
|
}
|
100
97
|
|
101
|
-
if (i >= array->length)
|
102
|
-
{
|
98
|
+
if (i >= array->length) {
|
103
99
|
// not found
|
104
100
|
return Qnil;
|
105
101
|
}
|
@@ -114,8 +110,7 @@ nl_node_set_is_include(VALUE self, VALUE rb_node)
|
|
114
110
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
115
111
|
|
116
112
|
for (size_t i = 0; i < array->length; i++)
|
117
|
-
if (array->list[i] == node)
|
118
|
-
{
|
113
|
+
if (array->list[i] == node) {
|
119
114
|
return Qtrue;
|
120
115
|
}
|
121
116
|
|
@@ -126,13 +121,11 @@ static VALUE
|
|
126
121
|
nl_node_set_index_at(VALUE self, long offset)
|
127
122
|
{
|
128
123
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
129
|
-
if (offset >= (long)array->length || abs((int)offset) > (long)array->length)
|
130
|
-
{
|
124
|
+
if (offset >= (long)array->length || abs((int)offset) > (long)array->length) {
|
131
125
|
return Qnil;
|
132
126
|
}
|
133
127
|
|
134
|
-
if (offset < 0)
|
135
|
-
{
|
128
|
+
if (offset < 0) {
|
136
129
|
offset += array->length;
|
137
130
|
}
|
138
131
|
|
@@ -145,35 +138,28 @@ nl_node_set_subseq(VALUE self, long beg, long len)
|
|
145
138
|
{
|
146
139
|
lexbor_array_t *old_array = nl_rb_node_set_unwrap(self);
|
147
140
|
|
148
|
-
if (beg > (long)old_array->length)
|
149
|
-
{
|
141
|
+
if (beg > (long)old_array->length) {
|
150
142
|
return Qnil;
|
151
143
|
}
|
152
|
-
if (beg < 0 || len < 0)
|
153
|
-
{
|
144
|
+
if (beg < 0 || len < 0) {
|
154
145
|
return Qnil;
|
155
146
|
}
|
156
147
|
|
157
|
-
if ((beg + len) > (long)old_array->length)
|
158
|
-
{
|
148
|
+
if ((beg + len) > (long)old_array->length) {
|
159
149
|
len = old_array->length - beg;
|
160
150
|
}
|
161
151
|
|
162
152
|
lexbor_array_t *new_array = lexbor_array_create();
|
163
|
-
if (len > 0)
|
164
|
-
{
|
153
|
+
if (len > 0) {
|
165
154
|
lxb_status_t status = lexbor_array_init(new_array, len);
|
166
|
-
if (status != LXB_STATUS_OK)
|
167
|
-
{
|
155
|
+
if (status != LXB_STATUS_OK) {
|
168
156
|
nl_raise_lexbor_error(status);
|
169
157
|
}
|
170
158
|
}
|
171
159
|
|
172
|
-
for (long j = beg; j < beg + len; ++j)
|
173
|
-
{
|
160
|
+
for (long j = beg; j < beg + len; ++j) {
|
174
161
|
lxb_status_t status = lexbor_array_push(new_array, old_array->list[j]);
|
175
|
-
if (status != LXB_STATUS_OK)
|
176
|
-
{
|
162
|
+
if (status != LXB_STATUS_OK) {
|
177
163
|
nl_raise_lexbor_error(status);
|
178
164
|
}
|
179
165
|
}
|
@@ -188,31 +174,26 @@ nl_node_set_slice(int argc, VALUE *argv, VALUE self)
|
|
188
174
|
|
189
175
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
190
176
|
|
191
|
-
if (argc == 2)
|
192
|
-
{
|
177
|
+
if (argc == 2) {
|
193
178
|
beg = NUM2LONG(argv[0]);
|
194
179
|
len = NUM2LONG(argv[1]);
|
195
|
-
if (beg < 0)
|
196
|
-
{
|
180
|
+
if (beg < 0) {
|
197
181
|
beg += array->length;
|
198
182
|
}
|
199
183
|
return nl_node_set_subseq(self, beg, len);
|
200
184
|
}
|
201
185
|
|
202
|
-
if (argc != 1)
|
203
|
-
{
|
186
|
+
if (argc != 1) {
|
204
187
|
rb_scan_args(argc, argv, "11", NULL, NULL);
|
205
188
|
}
|
206
189
|
arg = argv[0];
|
207
190
|
|
208
|
-
if (FIXNUM_P(arg))
|
209
|
-
{
|
191
|
+
if (FIXNUM_P(arg)) {
|
210
192
|
return nl_node_set_index_at(self, FIX2LONG(arg));
|
211
193
|
}
|
212
194
|
|
213
195
|
/* if arg is Range */
|
214
|
-
switch (rb_range_beg_len(arg, &beg, &len, array->length, 0))
|
215
|
-
{
|
196
|
+
switch (rb_range_beg_len(arg, &beg, &len, array->length, 0)) {
|
216
197
|
case Qfalse:
|
217
198
|
break;
|
218
199
|
case Qnil:
|
@@ -231,8 +212,7 @@ nl_node_set_to_array(VALUE self)
|
|
231
212
|
|
232
213
|
VALUE list = rb_ary_new2(array->length);
|
233
214
|
VALUE doc = nl_rb_document_get(self);
|
234
|
-
for (size_t i = 0; i < array->length; i++)
|
235
|
-
{
|
215
|
+
for (size_t i = 0; i < array->length; i++) {
|
236
216
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
237
217
|
VALUE rb_node = nl_rb_node_create(node, doc);
|
238
218
|
rb_ary_push(list, rb_node);
|
@@ -244,31 +224,27 @@ nl_node_set_to_array(VALUE self)
|
|
244
224
|
static VALUE
|
245
225
|
nl_node_set_union(VALUE self, VALUE other)
|
246
226
|
{
|
247
|
-
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet))
|
248
|
-
{
|
227
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
|
249
228
|
rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
|
250
229
|
}
|
251
230
|
|
252
231
|
lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
|
253
232
|
lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
|
254
233
|
|
255
|
-
if (self_array->length + other_array->length == 0)
|
256
|
-
{
|
234
|
+
if (self_array->length + other_array->length == 0) {
|
257
235
|
return nl_rb_node_set_create_with_data(NULL, nl_rb_document_get(self));
|
258
236
|
}
|
259
237
|
|
260
238
|
lexbor_array_t *new_array = lexbor_array_create();
|
261
239
|
lxb_status_t status = lexbor_array_init(new_array, self_array->length + other_array->length);
|
262
|
-
if (status != LXB_STATUS_OK)
|
263
|
-
{
|
240
|
+
if (status != LXB_STATUS_OK) {
|
264
241
|
nl_raise_lexbor_error(status);
|
265
242
|
}
|
266
243
|
|
267
244
|
memcpy(new_array->list, self_array->list, sizeof(lxb_dom_node_t *) * self_array->length);
|
268
245
|
new_array->length = self_array->length;
|
269
246
|
|
270
|
-
for (size_t i = 0; i < other_array->length; i++)
|
271
|
-
{
|
247
|
+
for (size_t i = 0; i < other_array->length; i++) {
|
272
248
|
lexbor_array_push_unique(new_array, other_array->list[i]);
|
273
249
|
}
|
274
250
|
|
@@ -279,40 +255,33 @@ static lxb_status_t
|
|
279
255
|
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
280
256
|
{
|
281
257
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
282
|
-
if (doc == NULL)
|
283
|
-
{
|
258
|
+
if (doc == NULL) {
|
284
259
|
rb_raise(rb_eRuntimeError, "Error getting document");
|
285
260
|
}
|
286
261
|
// Wrap direct children with a temporary fragment so that they can be searched
|
287
262
|
lxb_dom_document_fragment_t *frag = lxb_dom_document_fragment_interface_create(doc);
|
288
|
-
if (frag == NULL)
|
289
|
-
{
|
263
|
+
if (frag == NULL) {
|
290
264
|
rb_raise(rb_eRuntimeError, "Error creating document fragment");
|
291
265
|
}
|
292
266
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
293
267
|
|
294
268
|
lexbor_array_t *backup_array = lexbor_array_create();
|
295
|
-
if (array->length > 0)
|
296
|
-
{
|
269
|
+
if (array->length > 0) {
|
297
270
|
lxb_status_t status = lexbor_array_init(backup_array, array->length);
|
298
|
-
if (status != LXB_STATUS_OK)
|
299
|
-
{
|
271
|
+
if (status != LXB_STATUS_OK) {
|
300
272
|
nl_raise_lexbor_error(status);
|
301
273
|
}
|
302
274
|
}
|
303
275
|
// Backup original node data and re-group them into a fragment
|
304
|
-
for (size_t i = 0; i < array->length; i++)
|
305
|
-
{
|
276
|
+
for (size_t i = 0; i < array->length; i++) {
|
306
277
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
307
278
|
lxb_dom_node_t *backup_node = malloc(sizeof(lxb_dom_node_t));
|
308
|
-
if (backup_node == NULL)
|
309
|
-
{
|
279
|
+
if (backup_node == NULL) {
|
310
280
|
nl_raise_lexbor_error(LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
311
281
|
}
|
312
282
|
memcpy(backup_node, node, sizeof(lxb_dom_node_t));
|
313
283
|
lxb_status_t status = lexbor_array_push(backup_array, backup_node);
|
314
|
-
if (status != LXB_STATUS_OK)
|
315
|
-
{
|
284
|
+
if (status != LXB_STATUS_OK) {
|
316
285
|
nl_raise_lexbor_error(LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
317
286
|
}
|
318
287
|
lxb_dom_node_insert_child(&frag->node, node);
|
@@ -323,8 +292,7 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
323
292
|
|
324
293
|
lxb_dom_document_fragment_interface_destroy(frag);
|
325
294
|
// Restore original node data
|
326
|
-
for (size_t i = 0; i < array->length; i++)
|
327
|
-
{
|
295
|
+
for (size_t i = 0; i < array->length; i++) {
|
328
296
|
memcpy(array->list[i], backup_array->list[i], sizeof(lxb_dom_node_t));
|
329
297
|
free(backup_array->list[i]);
|
330
298
|
}
|
@@ -341,14 +309,12 @@ nl_node_set_at_css(VALUE self, VALUE selector)
|
|
341
309
|
|
342
310
|
lxb_status_t status = nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
343
311
|
|
344
|
-
if (status != LXB_STATUS_OK)
|
345
|
-
{
|
312
|
+
if (status != LXB_STATUS_OK) {
|
346
313
|
lexbor_array_destroy(array, true);
|
347
314
|
nl_raise_lexbor_error(status);
|
348
315
|
}
|
349
316
|
|
350
|
-
if (array->length == 0)
|
351
|
-
{
|
317
|
+
if (array->length == 0) {
|
352
318
|
lexbor_array_destroy(array, true);
|
353
319
|
return Qnil;
|
354
320
|
}
|
@@ -369,8 +335,7 @@ nl_node_set_css(VALUE self, VALUE selector)
|
|
369
335
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
370
336
|
|
371
337
|
lxb_status_t status = nl_node_set_find(self, selector, nl_node_css_callback, array);
|
372
|
-
if (status != LXB_STATUS_OK)
|
373
|
-
{
|
338
|
+
if (status != LXB_STATUS_OK) {
|
374
339
|
lexbor_array_destroy(array, true);
|
375
340
|
nl_raise_lexbor_error(status);
|
376
341
|
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborText;
|
4
|
+
extern VALUE cNokolexborCharacterData;
|
5
|
+
extern VALUE mNokolexbor;
|
6
|
+
|
7
|
+
static VALUE
|
8
|
+
nl_text_new(int argc, VALUE *argv, VALUE klass)
|
9
|
+
{
|
10
|
+
lxb_dom_document_t *document;
|
11
|
+
VALUE rb_text;
|
12
|
+
VALUE rb_document;
|
13
|
+
VALUE rest;
|
14
|
+
|
15
|
+
rb_scan_args(argc, argv, "2*", &rb_text, &rb_document, &rest);
|
16
|
+
|
17
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
18
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
19
|
+
}
|
20
|
+
|
21
|
+
document = nl_rb_document_unwrap(rb_document);
|
22
|
+
|
23
|
+
const char* c_text = StringValuePtr(rb_text);
|
24
|
+
size_t text_len = RSTRING_LEN(rb_text);
|
25
|
+
lxb_dom_text_t *element = lxb_dom_document_create_text_node(document, (const lxb_char_t *)c_text, text_len);
|
26
|
+
if (element == NULL) {
|
27
|
+
rb_raise(rb_eRuntimeError, "Error creating text node");
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE rb_node = nl_rb_node_create(&element->char_data.node, rb_document);
|
31
|
+
|
32
|
+
if (rb_block_given_p()) {
|
33
|
+
rb_yield(rb_node);
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_node;
|
37
|
+
}
|
38
|
+
|
39
|
+
void Init_nl_text(void)
|
40
|
+
{
|
41
|
+
cNokolexborText = rb_define_class_under(mNokolexbor, "Text", cNokolexborCharacterData);
|
42
|
+
|
43
|
+
rb_define_singleton_method(cNokolexborText, "new", nl_text_new, -1);
|
44
|
+
}
|
@@ -1,11 +1,11 @@
|
|
1
|
-
#include <ruby.h>
|
2
|
-
#include <ruby/util.h>
|
3
|
-
#include "nokolexbor.h"
|
4
1
|
#include "libxml.h"
|
5
2
|
#include "libxml/globals.h"
|
3
|
+
#include "libxml/parserInternals.h"
|
6
4
|
#include "libxml/xpath.h"
|
7
5
|
#include "libxml/xpathInternals.h"
|
8
|
-
#include "
|
6
|
+
#include "nokolexbor.h"
|
7
|
+
#include <ruby.h>
|
8
|
+
#include <ruby/util.h>
|
9
9
|
|
10
10
|
#define RBSTR_OR_QNIL(_str) (_str ? rb_utf8_str_new_cstr(_str) : Qnil)
|
11
11
|
|
@@ -15,12 +15,110 @@ VALUE cNokolexborXpathContext;
|
|
15
15
|
VALUE mNokolexborXpath;
|
16
16
|
VALUE cNokolexborXpathSyntaxError;
|
17
17
|
|
18
|
+
static const xmlChar *NOKOGIRI_PREFIX = (const xmlChar *)"nokogiri";
|
19
|
+
static const xmlChar *NOKOGIRI_URI = (const xmlChar *)"http://www.nokogiri.org/default_ns/ruby/extensions_functions";
|
20
|
+
static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
|
21
|
+
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
|
22
|
+
|
18
23
|
static void
|
19
24
|
free_xml_xpath_context(xmlXPathContextPtr ctx)
|
20
25
|
{
|
21
26
|
nl_xmlXPathFreeContext(ctx);
|
22
27
|
}
|
23
28
|
|
29
|
+
/* find a CSS class in an HTML element's `class` attribute */
|
30
|
+
static const xmlChar *
|
31
|
+
builtin_css_class(const xmlChar *str, const xmlChar *val)
|
32
|
+
{
|
33
|
+
int val_len;
|
34
|
+
|
35
|
+
if (str == NULL) {
|
36
|
+
return (NULL);
|
37
|
+
}
|
38
|
+
if (val == NULL) {
|
39
|
+
return (NULL);
|
40
|
+
}
|
41
|
+
|
42
|
+
val_len = nl_xmlStrlen(val);
|
43
|
+
if (val_len == 0) {
|
44
|
+
return (str);
|
45
|
+
}
|
46
|
+
|
47
|
+
while (*str != 0) {
|
48
|
+
if ((*str == *val) && !nl_xmlStrncmp(str, val, val_len)) {
|
49
|
+
const xmlChar *next_byte = str + val_len;
|
50
|
+
|
51
|
+
/* only match if the next byte is whitespace or end of string */
|
52
|
+
if ((*next_byte == 0) || (IS_BLANK_CH(*next_byte))) {
|
53
|
+
return ((const xmlChar *)str);
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
/* advance str to whitespace */
|
58
|
+
while ((*str != 0) && !IS_BLANK_CH(*str)) {
|
59
|
+
str++;
|
60
|
+
}
|
61
|
+
|
62
|
+
/* advance str to start of next word or end of string */
|
63
|
+
while ((*str != 0) && IS_BLANK_CH(*str)) {
|
64
|
+
str++;
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
return (NULL);
|
69
|
+
}
|
70
|
+
|
71
|
+
/* xmlXPathFunction to wrap builtin_css_class() */
|
72
|
+
static void
|
73
|
+
xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
74
|
+
{
|
75
|
+
xmlXPathObjectPtr hay, needle;
|
76
|
+
|
77
|
+
CHECK_ARITY(2);
|
78
|
+
|
79
|
+
CAST_TO_STRING;
|
80
|
+
needle = nl_xmlXPathValuePop(ctxt);
|
81
|
+
if ((needle == NULL) || (needle->type != XPATH_STRING)) {
|
82
|
+
nl_xmlXPathFreeObject(needle);
|
83
|
+
XP_ERROR(XPATH_INVALID_TYPE);
|
84
|
+
}
|
85
|
+
|
86
|
+
CAST_TO_STRING;
|
87
|
+
hay = nl_xmlXPathValuePop(ctxt);
|
88
|
+
if ((hay == NULL) || (hay->type != XPATH_STRING)) {
|
89
|
+
nl_xmlXPathFreeObject(hay);
|
90
|
+
nl_xmlXPathFreeObject(needle);
|
91
|
+
XP_ERROR(XPATH_INVALID_TYPE);
|
92
|
+
}
|
93
|
+
|
94
|
+
if (builtin_css_class(hay->stringval, needle->stringval)) {
|
95
|
+
nl_xmlXPathValuePush(ctxt, nl_xmlXPathNewBoolean(1));
|
96
|
+
} else {
|
97
|
+
nl_xmlXPathValuePush(ctxt, nl_xmlXPathNewBoolean(0));
|
98
|
+
}
|
99
|
+
|
100
|
+
nl_xmlXPathFreeObject(hay);
|
101
|
+
nl_xmlXPathFreeObject(needle);
|
102
|
+
}
|
103
|
+
|
104
|
+
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
|
105
|
+
static void
|
106
|
+
xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
107
|
+
{
|
108
|
+
xmlXPathObjectPtr element_name;
|
109
|
+
size_t tmp_len;
|
110
|
+
|
111
|
+
CHECK_ARITY(1);
|
112
|
+
CAST_TO_STRING;
|
113
|
+
CHECK_TYPE(XPATH_STRING);
|
114
|
+
element_name = nl_xmlXPathValuePop(ctxt);
|
115
|
+
|
116
|
+
const lxb_char_t *node_name = lxb_dom_node_name_qualified(ctxt->context->node, &tmp_len);
|
117
|
+
nl_xmlXPathValuePush(ctxt, nl_xmlXPathNewBoolean(nl_xmlStrEqual((xmlChar *)node_name, element_name->stringval)));
|
118
|
+
|
119
|
+
nl_xmlXPathFreeObject(element_name);
|
120
|
+
}
|
121
|
+
|
24
122
|
/*
|
25
123
|
* call-seq:
|
26
124
|
* register_ns(prefix, uri)
|
@@ -34,8 +132,8 @@ nl_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri)
|
|
34
132
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
35
133
|
|
36
134
|
nl_xmlXPathRegisterNs(ctx,
|
37
|
-
|
38
|
-
|
135
|
+
(const xmlChar *)StringValueCStr(prefix),
|
136
|
+
(const xmlChar *)StringValueCStr(uri));
|
39
137
|
return self;
|
40
138
|
}
|
41
139
|
|
@@ -55,8 +153,8 @@ nl_xpath_context_register_variable(VALUE self, VALUE name, VALUE value)
|
|
55
153
|
xmlValue = nl_xmlXPathNewCString(StringValueCStr(value));
|
56
154
|
|
57
155
|
nl_xmlXPathRegisterVariable(ctx,
|
58
|
-
|
59
|
-
|
156
|
+
(const xmlChar *)StringValueCStr(name),
|
157
|
+
xmlValue);
|
60
158
|
|
61
159
|
return self;
|
62
160
|
}
|
@@ -70,28 +168,23 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
70
168
|
{
|
71
169
|
VALUE rb_retval;
|
72
170
|
|
73
|
-
switch (c_xpath_object->type)
|
74
|
-
{
|
171
|
+
switch (c_xpath_object->type) {
|
75
172
|
case XPATH_STRING:
|
76
173
|
rb_retval = rb_utf8_str_new_cstr((const char *)c_xpath_object->stringval);
|
77
174
|
nl_xmlFree(c_xpath_object->stringval);
|
78
175
|
return rb_retval;
|
79
176
|
|
80
|
-
case XPATH_NODESET:
|
81
|
-
|
82
|
-
if (c_xpath_object->nodesetval == NULL)
|
83
|
-
{
|
177
|
+
case XPATH_NODESET: {
|
178
|
+
if (c_xpath_object->nodesetval == NULL) {
|
84
179
|
return nl_rb_node_set_create_with_data(NULL, rb_document);
|
85
180
|
}
|
86
|
-
if (c_xpath_object->nodesetval->nodeNr == 0)
|
87
|
-
{
|
181
|
+
if (c_xpath_object->nodesetval->nodeNr == 0) {
|
88
182
|
return nl_rb_node_set_create_with_data(NULL, rb_document);
|
89
183
|
}
|
90
184
|
|
91
185
|
lexbor_array_t *array = lexbor_array_create();
|
92
186
|
lxb_status_t status = lexbor_array_init(array, c_xpath_object->nodesetval->nodeNr);
|
93
|
-
if (status != LXB_STATUS_OK)
|
94
|
-
{
|
187
|
+
if (status != LXB_STATUS_OK) {
|
95
188
|
nl_raise_lexbor_error(status);
|
96
189
|
}
|
97
190
|
memcpy(array->list, c_xpath_object->nodesetval->nodeTab, sizeof(lxb_dom_node_t *) * c_xpath_object->nodesetval->nodeNr);
|
@@ -122,8 +215,7 @@ nl_xpath_wrap_syntax_error(xmlErrorPtr error)
|
|
122
215
|
&msg,
|
123
216
|
cNokolexborXpathSyntaxError);
|
124
217
|
|
125
|
-
if (error)
|
126
|
-
{
|
218
|
+
if (error) {
|
127
219
|
rb_iv_set(e, "@domain", INT2NUM(error->domain));
|
128
220
|
rb_iv_set(e, "@code", INT2NUM(error->code));
|
129
221
|
rb_iv_set(e, "@level", INT2NUM((short)error->level));
|
@@ -182,8 +274,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
182
274
|
|
183
275
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
184
276
|
|
185
|
-
if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1)
|
186
|
-
{
|
277
|
+
if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) {
|
187
278
|
xpath_handler = Qnil;
|
188
279
|
}
|
189
280
|
|
@@ -203,15 +294,13 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
203
294
|
nl_xmlSetStructuredErrorFunc(NULL, NULL);
|
204
295
|
nl_xmlSetGenericErrorFunc(NULL, NULL);
|
205
296
|
|
206
|
-
if (xpath == NULL)
|
207
|
-
{
|
297
|
+
if (xpath == NULL) {
|
208
298
|
nl_xmlXPathFreeObject(xpath);
|
209
299
|
rb_exc_raise(rb_ary_entry(errors, 0));
|
210
300
|
}
|
211
301
|
|
212
302
|
retval = xpath2ruby(xpath, ctx, nl_rb_document_get(self));
|
213
|
-
if (retval == Qundef)
|
214
|
-
{
|
303
|
+
if (retval == Qundef) {
|
215
304
|
retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
|
216
305
|
}
|
217
306
|
|
@@ -237,6 +326,13 @@ nl_xpath_context_new(VALUE klass, VALUE rb_node)
|
|
237
326
|
ctx = nl_xmlXPathNewContext(node->owner_document);
|
238
327
|
ctx->node = node;
|
239
328
|
|
329
|
+
nl_xmlXPathRegisterNs(ctx, NOKOGIRI_PREFIX, NOKOGIRI_URI);
|
330
|
+
nl_xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
331
|
+
nl_xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
332
|
+
xpath_builtin_css_class);
|
333
|
+
nl_xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
334
|
+
xpath_builtin_local_name_is);
|
335
|
+
|
240
336
|
self = Data_Wrap_Struct(klass, 0, free_xml_xpath_context, ctx);
|
241
337
|
rb_iv_set(self, "@document", nl_rb_document_get(rb_node));
|
242
338
|
|
data/ext/nokolexbor/nokolexbor.c
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
VALUE mNokolexbor;
|
4
4
|
VALUE eLexborError;
|
5
|
+
VALUE eLexborSyntaxError;
|
5
6
|
|
6
7
|
void nl_raise_lexbor_error(lxb_status_t error)
|
7
8
|
{
|
8
|
-
switch (error)
|
9
|
-
{
|
9
|
+
switch (error) {
|
10
10
|
case LXB_STATUS_ERROR:
|
11
11
|
rb_raise(eLexborError, "LXB_STATUS_ERROR");
|
12
12
|
case LXB_STATUS_ERROR_MEMORY_ALLOCATION:
|
@@ -30,7 +30,7 @@ void nl_raise_lexbor_error(lxb_status_t error)
|
|
30
30
|
case LXB_STATUS_ERROR_UNEXPECTED_RESULT:
|
31
31
|
rb_raise(eLexborError, "LXB_STATUS_ERROR_UNEXPECTED_RESULT");
|
32
32
|
case LXB_STATUS_ERROR_UNEXPECTED_DATA:
|
33
|
-
rb_raise(
|
33
|
+
rb_raise(eLexborSyntaxError, "LXB_STATUS_ERROR_UNEXPECTED_DATA");
|
34
34
|
case LXB_STATUS_ERROR_OVERFLOW:
|
35
35
|
rb_raise(eLexborError, "LXB_STATUS_ERROR_OVERFLOW");
|
36
36
|
case LXB_STATUS_CONTINUE:
|
@@ -54,10 +54,17 @@ void nl_raise_lexbor_error(lxb_status_t error)
|
|
54
54
|
|
55
55
|
void Init_nokolexbor(void)
|
56
56
|
{
|
57
|
+
#ifndef NOKOLEXBOR_ASAN
|
58
|
+
lexbor_memory_setup(ruby_xmalloc, ruby_xrealloc, ruby_xcalloc, ruby_xfree);
|
59
|
+
#endif
|
57
60
|
mNokolexbor = rb_define_module("Nokolexbor");
|
58
61
|
eLexborError = rb_define_class_under(mNokolexbor, "LexborError", rb_eStandardError);
|
62
|
+
eLexborSyntaxError = rb_define_class_under(mNokolexbor, "LexborSyntaxError", eLexborError);
|
59
63
|
Init_nl_node();
|
60
64
|
Init_nl_document();
|
65
|
+
Init_nl_text();
|
66
|
+
Init_nl_comment();
|
67
|
+
Init_nl_cdata();
|
61
68
|
Init_nl_node_set();
|
62
69
|
Init_nl_xpath_context();
|
63
70
|
}
|
data/ext/nokolexbor/nokolexbor.h
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
5
|
|
6
|
-
#include <lexbor/html/html.h>
|
7
6
|
#include <lexbor/css/css.h>
|
7
|
+
#include <lexbor/html/html.h>
|
8
8
|
#include <lexbor/selectors/selectors.h>
|
9
9
|
|
10
10
|
extern VALUE cNokolexborDocument;
|
@@ -12,6 +12,9 @@ extern VALUE cNokolexborDocument;
|
|
12
12
|
void Init_nl_document(void);
|
13
13
|
void Init_nl_node(void);
|
14
14
|
void Init_nl_node_set(void);
|
15
|
+
void Init_nl_text(void);
|
16
|
+
void Init_nl_comment(void);
|
17
|
+
void Init_nl_cdata(void);
|
15
18
|
void Init_nl_xpath_context(void);
|
16
19
|
|
17
20
|
void nl_raise_lexbor_error(lxb_status_t error);
|
@@ -21,14 +24,13 @@ VALUE nl_rb_node_set_create_with_data(lexbor_array_t *array, VALUE rb_document);
|
|
21
24
|
|
22
25
|
lxb_inline VALUE nl_rb_document_get(VALUE rb_node_or_doc)
|
23
26
|
{
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
return rb_iv_get(rb_node_or_doc, "@document");
|
27
|
+
if (rb_obj_is_kind_of(rb_node_or_doc, cNokolexborDocument)) {
|
28
|
+
return rb_node_or_doc;
|
29
|
+
}
|
30
|
+
return rb_iv_get(rb_node_or_doc, "@document");
|
29
31
|
}
|
30
32
|
|
31
|
-
lxb_dom_document_t *
|
33
|
+
lxb_dom_document_t *nl_rb_document_unwrap(VALUE rb_doc);
|
32
34
|
|
33
35
|
const lxb_char_t *
|
34
36
|
lxb_dom_node_name_qualified(lxb_dom_node_t *node, size_t *len);
|
data/ext/nokolexbor/xml_xpath.c
CHANGED
@@ -2897,6 +2897,10 @@ valuePop(xmlXPathParserContextPtr ctxt)
|
|
2897
2897
|
ctxt->valueTab[ctxt->valueNr] = NULL;
|
2898
2898
|
return (ret);
|
2899
2899
|
}
|
2900
|
+
|
2901
|
+
xmlXPathObjectPtr
|
2902
|
+
nl_xmlXPathValuePop(xmlXPathParserContextPtr ctxt) { return valuePop(ctxt); }
|
2903
|
+
|
2900
2904
|
/**
|
2901
2905
|
* valuePush:
|
2902
2906
|
* @ctxt: an XPath evaluation context
|
@@ -2941,6 +2945,9 @@ valuePush(xmlXPathParserContextPtr ctxt, xmlXPathObjectPtr value)
|
|
2941
2945
|
return (ctxt->valueNr++);
|
2942
2946
|
}
|
2943
2947
|
|
2948
|
+
int
|
2949
|
+
nl_xmlXPathValuePush(xmlXPathParserContextPtr ctxt, xmlXPathObjectPtr value) { return valuePush(ctxt, value); }
|
2950
|
+
|
2944
2951
|
/**
|
2945
2952
|
* nl_xmlXPathPopBoolean:
|
2946
2953
|
* @ctxt: an XPath parser context
|