nokolexbor 0.2.6 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/memory.c +6 -6
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +186 -173
- data/ext/nokolexbor/nl_node_set.c +35 -70
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +17 -26
- data/ext/nokolexbor/nokolexbor.c +7 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/lib/nokolexbor/document.rb +92 -1
- data/lib/nokolexbor/node.rb +64 -0
- data/lib/nokolexbor/node_set.rb +6 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +1 -1
- metadata +7 -4
@@ -55,8 +55,7 @@ nl_node_set_allocate(VALUE klass)
|
|
55
55
|
VALUE
|
56
56
|
nl_rb_node_set_create_with_data(lexbor_array_t *array, VALUE rb_document)
|
57
57
|
{
|
58
|
-
if (array == NULL)
|
59
|
-
{
|
58
|
+
if (array == NULL) {
|
60
59
|
array = lexbor_array_create();
|
61
60
|
}
|
62
61
|
VALUE ret = TypedData_Wrap_Struct(cNokolexborNodeSet, &nl_node_set_type, array);
|
@@ -77,8 +76,7 @@ nl_node_set_push(VALUE self, VALUE rb_node)
|
|
77
76
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
78
77
|
|
79
78
|
lxb_status_t status = lexbor_array_push_unique(array, node);
|
80
|
-
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED)
|
81
|
-
{
|
79
|
+
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED) {
|
82
80
|
nl_raise_lexbor_error(status);
|
83
81
|
}
|
84
82
|
|
@@ -93,13 +91,11 @@ nl_node_set_delete(VALUE self, VALUE rb_node)
|
|
93
91
|
|
94
92
|
size_t i;
|
95
93
|
for (i = 0; i < array->length; i++)
|
96
|
-
if (array->list[i] == node)
|
97
|
-
{
|
94
|
+
if (array->list[i] == node) {
|
98
95
|
break;
|
99
96
|
}
|
100
97
|
|
101
|
-
if (i >= array->length)
|
102
|
-
{
|
98
|
+
if (i >= array->length) {
|
103
99
|
// not found
|
104
100
|
return Qnil;
|
105
101
|
}
|
@@ -114,8 +110,7 @@ nl_node_set_is_include(VALUE self, VALUE rb_node)
|
|
114
110
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
115
111
|
|
116
112
|
for (size_t i = 0; i < array->length; i++)
|
117
|
-
if (array->list[i] == node)
|
118
|
-
{
|
113
|
+
if (array->list[i] == node) {
|
119
114
|
return Qtrue;
|
120
115
|
}
|
121
116
|
|
@@ -126,13 +121,11 @@ static VALUE
|
|
126
121
|
nl_node_set_index_at(VALUE self, long offset)
|
127
122
|
{
|
128
123
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
129
|
-
if (offset >= (long)array->length || abs((int)offset) > (long)array->length)
|
130
|
-
{
|
124
|
+
if (offset >= (long)array->length || abs((int)offset) > (long)array->length) {
|
131
125
|
return Qnil;
|
132
126
|
}
|
133
127
|
|
134
|
-
if (offset < 0)
|
135
|
-
{
|
128
|
+
if (offset < 0) {
|
136
129
|
offset += array->length;
|
137
130
|
}
|
138
131
|
|
@@ -145,35 +138,28 @@ nl_node_set_subseq(VALUE self, long beg, long len)
|
|
145
138
|
{
|
146
139
|
lexbor_array_t *old_array = nl_rb_node_set_unwrap(self);
|
147
140
|
|
148
|
-
if (beg > (long)old_array->length)
|
149
|
-
{
|
141
|
+
if (beg > (long)old_array->length) {
|
150
142
|
return Qnil;
|
151
143
|
}
|
152
|
-
if (beg < 0 || len < 0)
|
153
|
-
{
|
144
|
+
if (beg < 0 || len < 0) {
|
154
145
|
return Qnil;
|
155
146
|
}
|
156
147
|
|
157
|
-
if ((beg + len) > (long)old_array->length)
|
158
|
-
{
|
148
|
+
if ((beg + len) > (long)old_array->length) {
|
159
149
|
len = old_array->length - beg;
|
160
150
|
}
|
161
151
|
|
162
152
|
lexbor_array_t *new_array = lexbor_array_create();
|
163
|
-
if (len > 0)
|
164
|
-
{
|
153
|
+
if (len > 0) {
|
165
154
|
lxb_status_t status = lexbor_array_init(new_array, len);
|
166
|
-
if (status != LXB_STATUS_OK)
|
167
|
-
{
|
155
|
+
if (status != LXB_STATUS_OK) {
|
168
156
|
nl_raise_lexbor_error(status);
|
169
157
|
}
|
170
158
|
}
|
171
159
|
|
172
|
-
for (long j = beg; j < beg + len; ++j)
|
173
|
-
{
|
160
|
+
for (long j = beg; j < beg + len; ++j) {
|
174
161
|
lxb_status_t status = lexbor_array_push(new_array, old_array->list[j]);
|
175
|
-
if (status != LXB_STATUS_OK)
|
176
|
-
{
|
162
|
+
if (status != LXB_STATUS_OK) {
|
177
163
|
nl_raise_lexbor_error(status);
|
178
164
|
}
|
179
165
|
}
|
@@ -188,31 +174,26 @@ nl_node_set_slice(int argc, VALUE *argv, VALUE self)
|
|
188
174
|
|
189
175
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
190
176
|
|
191
|
-
if (argc == 2)
|
192
|
-
{
|
177
|
+
if (argc == 2) {
|
193
178
|
beg = NUM2LONG(argv[0]);
|
194
179
|
len = NUM2LONG(argv[1]);
|
195
|
-
if (beg < 0)
|
196
|
-
{
|
180
|
+
if (beg < 0) {
|
197
181
|
beg += array->length;
|
198
182
|
}
|
199
183
|
return nl_node_set_subseq(self, beg, len);
|
200
184
|
}
|
201
185
|
|
202
|
-
if (argc != 1)
|
203
|
-
{
|
186
|
+
if (argc != 1) {
|
204
187
|
rb_scan_args(argc, argv, "11", NULL, NULL);
|
205
188
|
}
|
206
189
|
arg = argv[0];
|
207
190
|
|
208
|
-
if (FIXNUM_P(arg))
|
209
|
-
{
|
191
|
+
if (FIXNUM_P(arg)) {
|
210
192
|
return nl_node_set_index_at(self, FIX2LONG(arg));
|
211
193
|
}
|
212
194
|
|
213
195
|
/* if arg is Range */
|
214
|
-
switch (rb_range_beg_len(arg, &beg, &len, array->length, 0))
|
215
|
-
{
|
196
|
+
switch (rb_range_beg_len(arg, &beg, &len, array->length, 0)) {
|
216
197
|
case Qfalse:
|
217
198
|
break;
|
218
199
|
case Qnil:
|
@@ -231,8 +212,7 @@ nl_node_set_to_array(VALUE self)
|
|
231
212
|
|
232
213
|
VALUE list = rb_ary_new2(array->length);
|
233
214
|
VALUE doc = nl_rb_document_get(self);
|
234
|
-
for (size_t i = 0; i < array->length; i++)
|
235
|
-
{
|
215
|
+
for (size_t i = 0; i < array->length; i++) {
|
236
216
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
237
217
|
VALUE rb_node = nl_rb_node_create(node, doc);
|
238
218
|
rb_ary_push(list, rb_node);
|
@@ -244,31 +224,27 @@ nl_node_set_to_array(VALUE self)
|
|
244
224
|
static VALUE
|
245
225
|
nl_node_set_union(VALUE self, VALUE other)
|
246
226
|
{
|
247
|
-
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet))
|
248
|
-
{
|
227
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
|
249
228
|
rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
|
250
229
|
}
|
251
230
|
|
252
231
|
lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
|
253
232
|
lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
|
254
233
|
|
255
|
-
if (self_array->length + other_array->length == 0)
|
256
|
-
{
|
234
|
+
if (self_array->length + other_array->length == 0) {
|
257
235
|
return nl_rb_node_set_create_with_data(NULL, nl_rb_document_get(self));
|
258
236
|
}
|
259
237
|
|
260
238
|
lexbor_array_t *new_array = lexbor_array_create();
|
261
239
|
lxb_status_t status = lexbor_array_init(new_array, self_array->length + other_array->length);
|
262
|
-
if (status != LXB_STATUS_OK)
|
263
|
-
{
|
240
|
+
if (status != LXB_STATUS_OK) {
|
264
241
|
nl_raise_lexbor_error(status);
|
265
242
|
}
|
266
243
|
|
267
244
|
memcpy(new_array->list, self_array->list, sizeof(lxb_dom_node_t *) * self_array->length);
|
268
245
|
new_array->length = self_array->length;
|
269
246
|
|
270
|
-
for (size_t i = 0; i < other_array->length; i++)
|
271
|
-
{
|
247
|
+
for (size_t i = 0; i < other_array->length; i++) {
|
272
248
|
lexbor_array_push_unique(new_array, other_array->list[i]);
|
273
249
|
}
|
274
250
|
|
@@ -279,40 +255,33 @@ static lxb_status_t
|
|
279
255
|
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
280
256
|
{
|
281
257
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
282
|
-
if (doc == NULL)
|
283
|
-
{
|
258
|
+
if (doc == NULL) {
|
284
259
|
rb_raise(rb_eRuntimeError, "Error getting document");
|
285
260
|
}
|
286
261
|
// Wrap direct children with a temporary fragment so that they can be searched
|
287
262
|
lxb_dom_document_fragment_t *frag = lxb_dom_document_fragment_interface_create(doc);
|
288
|
-
if (frag == NULL)
|
289
|
-
{
|
263
|
+
if (frag == NULL) {
|
290
264
|
rb_raise(rb_eRuntimeError, "Error creating document fragment");
|
291
265
|
}
|
292
266
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
293
267
|
|
294
268
|
lexbor_array_t *backup_array = lexbor_array_create();
|
295
|
-
if (array->length > 0)
|
296
|
-
{
|
269
|
+
if (array->length > 0) {
|
297
270
|
lxb_status_t status = lexbor_array_init(backup_array, array->length);
|
298
|
-
if (status != LXB_STATUS_OK)
|
299
|
-
{
|
271
|
+
if (status != LXB_STATUS_OK) {
|
300
272
|
nl_raise_lexbor_error(status);
|
301
273
|
}
|
302
274
|
}
|
303
275
|
// Backup original node data and re-group them into a fragment
|
304
|
-
for (size_t i = 0; i < array->length; i++)
|
305
|
-
{
|
276
|
+
for (size_t i = 0; i < array->length; i++) {
|
306
277
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
307
278
|
lxb_dom_node_t *backup_node = malloc(sizeof(lxb_dom_node_t));
|
308
|
-
if (backup_node == NULL)
|
309
|
-
{
|
279
|
+
if (backup_node == NULL) {
|
310
280
|
nl_raise_lexbor_error(LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
311
281
|
}
|
312
282
|
memcpy(backup_node, node, sizeof(lxb_dom_node_t));
|
313
283
|
lxb_status_t status = lexbor_array_push(backup_array, backup_node);
|
314
|
-
if (status != LXB_STATUS_OK)
|
315
|
-
{
|
284
|
+
if (status != LXB_STATUS_OK) {
|
316
285
|
nl_raise_lexbor_error(LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
317
286
|
}
|
318
287
|
lxb_dom_node_insert_child(&frag->node, node);
|
@@ -323,8 +292,7 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
323
292
|
|
324
293
|
lxb_dom_document_fragment_interface_destroy(frag);
|
325
294
|
// Restore original node data
|
326
|
-
for (size_t i = 0; i < array->length; i++)
|
327
|
-
{
|
295
|
+
for (size_t i = 0; i < array->length; i++) {
|
328
296
|
memcpy(array->list[i], backup_array->list[i], sizeof(lxb_dom_node_t));
|
329
297
|
free(backup_array->list[i]);
|
330
298
|
}
|
@@ -341,14 +309,12 @@ nl_node_set_at_css(VALUE self, VALUE selector)
|
|
341
309
|
|
342
310
|
lxb_status_t status = nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
343
311
|
|
344
|
-
if (status != LXB_STATUS_OK)
|
345
|
-
{
|
312
|
+
if (status != LXB_STATUS_OK) {
|
346
313
|
lexbor_array_destroy(array, true);
|
347
314
|
nl_raise_lexbor_error(status);
|
348
315
|
}
|
349
316
|
|
350
|
-
if (array->length == 0)
|
351
|
-
{
|
317
|
+
if (array->length == 0) {
|
352
318
|
lexbor_array_destroy(array, true);
|
353
319
|
return Qnil;
|
354
320
|
}
|
@@ -369,8 +335,7 @@ nl_node_set_css(VALUE self, VALUE selector)
|
|
369
335
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
370
336
|
|
371
337
|
lxb_status_t status = nl_node_set_find(self, selector, nl_node_css_callback, array);
|
372
|
-
if (status != LXB_STATUS_OK)
|
373
|
-
{
|
338
|
+
if (status != LXB_STATUS_OK) {
|
374
339
|
lexbor_array_destroy(array, true);
|
375
340
|
nl_raise_lexbor_error(status);
|
376
341
|
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborText;
|
4
|
+
extern VALUE cNokolexborCharacterData;
|
5
|
+
extern VALUE mNokolexbor;
|
6
|
+
|
7
|
+
static VALUE
|
8
|
+
nl_text_new(int argc, VALUE *argv, VALUE klass)
|
9
|
+
{
|
10
|
+
lxb_dom_document_t *document;
|
11
|
+
VALUE rb_text;
|
12
|
+
VALUE rb_document;
|
13
|
+
VALUE rest;
|
14
|
+
|
15
|
+
rb_scan_args(argc, argv, "2*", &rb_text, &rb_document, &rest);
|
16
|
+
|
17
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
18
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
19
|
+
}
|
20
|
+
|
21
|
+
document = nl_rb_document_unwrap(rb_document);
|
22
|
+
|
23
|
+
const char* c_text = StringValuePtr(rb_text);
|
24
|
+
size_t text_len = RSTRING_LEN(rb_text);
|
25
|
+
lxb_dom_text_t *element = lxb_dom_document_create_text_node(document, (const lxb_char_t *)c_text, text_len);
|
26
|
+
if (element == NULL) {
|
27
|
+
rb_raise(rb_eRuntimeError, "Error creating text node");
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE rb_node = nl_rb_node_create(&element->char_data.node, rb_document);
|
31
|
+
|
32
|
+
if (rb_block_given_p()) {
|
33
|
+
rb_yield(rb_node);
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_node;
|
37
|
+
}
|
38
|
+
|
39
|
+
void Init_nl_text(void)
|
40
|
+
{
|
41
|
+
cNokolexborText = rb_define_class_under(mNokolexbor, "Text", cNokolexborCharacterData);
|
42
|
+
|
43
|
+
rb_define_singleton_method(cNokolexborText, "new", nl_text_new, -1);
|
44
|
+
}
|
@@ -1,11 +1,11 @@
|
|
1
|
-
#include <ruby.h>
|
2
|
-
#include <ruby/util.h>
|
3
|
-
#include "nokolexbor.h"
|
4
1
|
#include "libxml.h"
|
5
2
|
#include "libxml/globals.h"
|
3
|
+
#include "libxml/parserInternals.h"
|
6
4
|
#include "libxml/xpath.h"
|
7
5
|
#include "libxml/xpathInternals.h"
|
8
|
-
#include "
|
6
|
+
#include "nokolexbor.h"
|
7
|
+
#include <ruby.h>
|
8
|
+
#include <ruby/util.h>
|
9
9
|
|
10
10
|
#define RBSTR_OR_QNIL(_str) (_str ? rb_utf8_str_new_cstr(_str) : Qnil)
|
11
11
|
|
@@ -34,8 +34,8 @@ nl_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri)
|
|
34
34
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
35
35
|
|
36
36
|
nl_xmlXPathRegisterNs(ctx,
|
37
|
-
|
38
|
-
|
37
|
+
(const xmlChar *)StringValueCStr(prefix),
|
38
|
+
(const xmlChar *)StringValueCStr(uri));
|
39
39
|
return self;
|
40
40
|
}
|
41
41
|
|
@@ -55,8 +55,8 @@ nl_xpath_context_register_variable(VALUE self, VALUE name, VALUE value)
|
|
55
55
|
xmlValue = nl_xmlXPathNewCString(StringValueCStr(value));
|
56
56
|
|
57
57
|
nl_xmlXPathRegisterVariable(ctx,
|
58
|
-
|
59
|
-
|
58
|
+
(const xmlChar *)StringValueCStr(name),
|
59
|
+
xmlValue);
|
60
60
|
|
61
61
|
return self;
|
62
62
|
}
|
@@ -70,28 +70,23 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
70
70
|
{
|
71
71
|
VALUE rb_retval;
|
72
72
|
|
73
|
-
switch (c_xpath_object->type)
|
74
|
-
{
|
73
|
+
switch (c_xpath_object->type) {
|
75
74
|
case XPATH_STRING:
|
76
75
|
rb_retval = rb_utf8_str_new_cstr((const char *)c_xpath_object->stringval);
|
77
76
|
nl_xmlFree(c_xpath_object->stringval);
|
78
77
|
return rb_retval;
|
79
78
|
|
80
|
-
case XPATH_NODESET:
|
81
|
-
|
82
|
-
if (c_xpath_object->nodesetval == NULL)
|
83
|
-
{
|
79
|
+
case XPATH_NODESET: {
|
80
|
+
if (c_xpath_object->nodesetval == NULL) {
|
84
81
|
return nl_rb_node_set_create_with_data(NULL, rb_document);
|
85
82
|
}
|
86
|
-
if (c_xpath_object->nodesetval->nodeNr == 0)
|
87
|
-
{
|
83
|
+
if (c_xpath_object->nodesetval->nodeNr == 0) {
|
88
84
|
return nl_rb_node_set_create_with_data(NULL, rb_document);
|
89
85
|
}
|
90
86
|
|
91
87
|
lexbor_array_t *array = lexbor_array_create();
|
92
88
|
lxb_status_t status = lexbor_array_init(array, c_xpath_object->nodesetval->nodeNr);
|
93
|
-
if (status != LXB_STATUS_OK)
|
94
|
-
{
|
89
|
+
if (status != LXB_STATUS_OK) {
|
95
90
|
nl_raise_lexbor_error(status);
|
96
91
|
}
|
97
92
|
memcpy(array->list, c_xpath_object->nodesetval->nodeTab, sizeof(lxb_dom_node_t *) * c_xpath_object->nodesetval->nodeNr);
|
@@ -122,8 +117,7 @@ nl_xpath_wrap_syntax_error(xmlErrorPtr error)
|
|
122
117
|
&msg,
|
123
118
|
cNokolexborXpathSyntaxError);
|
124
119
|
|
125
|
-
if (error)
|
126
|
-
{
|
120
|
+
if (error) {
|
127
121
|
rb_iv_set(e, "@domain", INT2NUM(error->domain));
|
128
122
|
rb_iv_set(e, "@code", INT2NUM(error->code));
|
129
123
|
rb_iv_set(e, "@level", INT2NUM((short)error->level));
|
@@ -182,8 +176,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
182
176
|
|
183
177
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
184
178
|
|
185
|
-
if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1)
|
186
|
-
{
|
179
|
+
if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) {
|
187
180
|
xpath_handler = Qnil;
|
188
181
|
}
|
189
182
|
|
@@ -203,15 +196,13 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
203
196
|
nl_xmlSetStructuredErrorFunc(NULL, NULL);
|
204
197
|
nl_xmlSetGenericErrorFunc(NULL, NULL);
|
205
198
|
|
206
|
-
if (xpath == NULL)
|
207
|
-
{
|
199
|
+
if (xpath == NULL) {
|
208
200
|
nl_xmlXPathFreeObject(xpath);
|
209
201
|
rb_exc_raise(rb_ary_entry(errors, 0));
|
210
202
|
}
|
211
203
|
|
212
204
|
retval = xpath2ruby(xpath, ctx, nl_rb_document_get(self));
|
213
|
-
if (retval == Qundef)
|
214
|
-
{
|
205
|
+
if (retval == Qundef) {
|
215
206
|
retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
|
216
207
|
}
|
217
208
|
|
data/ext/nokolexbor/nokolexbor.c
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
VALUE mNokolexbor;
|
4
4
|
VALUE eLexborError;
|
5
|
+
VALUE eLexborSyntaxError;
|
5
6
|
|
6
7
|
void nl_raise_lexbor_error(lxb_status_t error)
|
7
8
|
{
|
8
|
-
switch (error)
|
9
|
-
{
|
9
|
+
switch (error) {
|
10
10
|
case LXB_STATUS_ERROR:
|
11
11
|
rb_raise(eLexborError, "LXB_STATUS_ERROR");
|
12
12
|
case LXB_STATUS_ERROR_MEMORY_ALLOCATION:
|
@@ -30,7 +30,7 @@ void nl_raise_lexbor_error(lxb_status_t error)
|
|
30
30
|
case LXB_STATUS_ERROR_UNEXPECTED_RESULT:
|
31
31
|
rb_raise(eLexborError, "LXB_STATUS_ERROR_UNEXPECTED_RESULT");
|
32
32
|
case LXB_STATUS_ERROR_UNEXPECTED_DATA:
|
33
|
-
rb_raise(
|
33
|
+
rb_raise(eLexborSyntaxError, "LXB_STATUS_ERROR_UNEXPECTED_DATA");
|
34
34
|
case LXB_STATUS_ERROR_OVERFLOW:
|
35
35
|
rb_raise(eLexborError, "LXB_STATUS_ERROR_OVERFLOW");
|
36
36
|
case LXB_STATUS_CONTINUE:
|
@@ -56,8 +56,12 @@ void Init_nokolexbor(void)
|
|
56
56
|
{
|
57
57
|
mNokolexbor = rb_define_module("Nokolexbor");
|
58
58
|
eLexborError = rb_define_class_under(mNokolexbor, "LexborError", rb_eStandardError);
|
59
|
+
eLexborSyntaxError = rb_define_class_under(mNokolexbor, "LexborSyntaxError", eLexborError);
|
59
60
|
Init_nl_node();
|
60
61
|
Init_nl_document();
|
62
|
+
Init_nl_text();
|
63
|
+
Init_nl_comment();
|
64
|
+
Init_nl_cdata();
|
61
65
|
Init_nl_node_set();
|
62
66
|
Init_nl_xpath_context();
|
63
67
|
}
|
data/ext/nokolexbor/nokolexbor.h
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
5
|
|
6
|
-
#include <lexbor/html/html.h>
|
7
6
|
#include <lexbor/css/css.h>
|
7
|
+
#include <lexbor/html/html.h>
|
8
8
|
#include <lexbor/selectors/selectors.h>
|
9
9
|
|
10
10
|
extern VALUE cNokolexborDocument;
|
@@ -12,6 +12,9 @@ extern VALUE cNokolexborDocument;
|
|
12
12
|
void Init_nl_document(void);
|
13
13
|
void Init_nl_node(void);
|
14
14
|
void Init_nl_node_set(void);
|
15
|
+
void Init_nl_text(void);
|
16
|
+
void Init_nl_comment(void);
|
17
|
+
void Init_nl_cdata(void);
|
15
18
|
void Init_nl_xpath_context(void);
|
16
19
|
|
17
20
|
void nl_raise_lexbor_error(lxb_status_t error);
|
@@ -21,14 +24,13 @@ VALUE nl_rb_node_set_create_with_data(lexbor_array_t *array, VALUE rb_document);
|
|
21
24
|
|
22
25
|
lxb_inline VALUE nl_rb_document_get(VALUE rb_node_or_doc)
|
23
26
|
{
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
return rb_iv_get(rb_node_or_doc, "@document");
|
27
|
+
if (rb_obj_is_kind_of(rb_node_or_doc, cNokolexborDocument)) {
|
28
|
+
return rb_node_or_doc;
|
29
|
+
}
|
30
|
+
return rb_iv_get(rb_node_or_doc, "@document");
|
29
31
|
}
|
30
32
|
|
31
|
-
lxb_dom_document_t *
|
33
|
+
lxb_dom_document_t *nl_rb_document_unwrap(VALUE rb_doc);
|
32
34
|
|
33
35
|
const lxb_char_t *
|
34
36
|
lxb_dom_node_name_qualified(lxb_dom_node_t *node, size_t *len);
|
data/lib/nokolexbor/document.rb
CHANGED
@@ -1,6 +1,97 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
|
-
class Document < Node
|
4
|
+
class Document < Nokolexbor::Node
|
5
|
+
def create_element(name, *contents_or_attrs, &block)
|
6
|
+
elm = Nokolexbor::Element.new(name, self, &block)
|
7
|
+
contents_or_attrs.each do |arg|
|
8
|
+
case arg
|
9
|
+
when Hash
|
10
|
+
arg.each do |k, v|
|
11
|
+
elm[k.to_s] = v.to_s
|
12
|
+
end
|
13
|
+
else
|
14
|
+
elm.content = arg
|
15
|
+
end
|
16
|
+
end
|
17
|
+
elm
|
18
|
+
end
|
19
|
+
|
20
|
+
# Create a Text Node with +string+
|
21
|
+
def create_text_node(string, &block)
|
22
|
+
Nokolexbor::Text.new(string.to_s, self, &block)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Create a CDATA Node containing +string+
|
26
|
+
def create_cdata(string, &block)
|
27
|
+
Nokolexbor::CDATA.new(string.to_s, self, &block)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Create a Comment Node containing +string+
|
31
|
+
def create_comment(string, &block)
|
32
|
+
Nokolexbor::Comment.new(string.to_s, self, &block)
|
33
|
+
end
|
34
|
+
|
35
|
+
# A reference to +self+
|
36
|
+
def document
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
def meta_encoding
|
41
|
+
if (meta = at_css("meta[charset]"))
|
42
|
+
meta[:charset]
|
43
|
+
elsif (meta = meta_content_type)
|
44
|
+
meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def meta_encoding=(encoding)
|
49
|
+
if (meta = meta_content_type)
|
50
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
51
|
+
encoding
|
52
|
+
elsif (meta = at_css("meta[charset]"))
|
53
|
+
meta["charset"] = encoding
|
54
|
+
else
|
55
|
+
meta = Nokolexbor::Node.new("meta", self)
|
56
|
+
meta["charset"] = encoding
|
57
|
+
|
58
|
+
if (head = at_css("head"))
|
59
|
+
head.prepend_child(meta)
|
60
|
+
else
|
61
|
+
set_metadata_element(meta)
|
62
|
+
end
|
63
|
+
encoding
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def meta_content_type
|
68
|
+
xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
|
69
|
+
node["http-equiv"] =~ /\AContent-Type\z/i
|
70
|
+
end
|
71
|
+
end
|
72
|
+
private :meta_content_type
|
73
|
+
|
74
|
+
def set_metadata_element(element)
|
75
|
+
if (head = at_css("head"))
|
76
|
+
head << element
|
77
|
+
elsif (html = at_css("html"))
|
78
|
+
head = html.prepend_child(Nokolexbor::Node.new("head", self))
|
79
|
+
head.prepend_child(element)
|
80
|
+
elsif (first = children.find do |node|
|
81
|
+
case node
|
82
|
+
when Nokolexbor::Node
|
83
|
+
true
|
84
|
+
end
|
85
|
+
end)
|
86
|
+
# We reach here only if the underlying document model
|
87
|
+
# allows <html>/<head> elements to be omitted and does not
|
88
|
+
# automatically supply them.
|
89
|
+
first.add_previous_sibling(element)
|
90
|
+
else
|
91
|
+
html = add_child(Nokolexbor::Node.new("html", self))
|
92
|
+
head = html.add_child(Nokolexbor::Node.new("head", self))
|
93
|
+
head.prepend_child(element)
|
94
|
+
end
|
95
|
+
end
|
5
96
|
end
|
6
97
|
end
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -45,6 +45,10 @@ module Nokolexbor
|
|
45
45
|
type == ELEMENT_NODE
|
46
46
|
end
|
47
47
|
|
48
|
+
def document?
|
49
|
+
is_a?(Nokolexbor::Document)
|
50
|
+
end
|
51
|
+
|
48
52
|
def ancestors(selector = nil)
|
49
53
|
return NodeSet.new(@document) unless respond_to?(:parent)
|
50
54
|
return NodeSet.new(@document) unless parent
|
@@ -87,6 +91,56 @@ module Nokolexbor
|
|
87
91
|
self
|
88
92
|
end
|
89
93
|
|
94
|
+
def add_previous_sibling(node_or_tags)
|
95
|
+
raise ArgumentError,
|
96
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
97
|
+
|
98
|
+
add_sibling(:previous, node_or_tags)
|
99
|
+
end
|
100
|
+
|
101
|
+
def add_next_sibling(node_or_tags)
|
102
|
+
raise ArgumentError,
|
103
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
104
|
+
|
105
|
+
add_sibling(:next, node_or_tags)
|
106
|
+
end
|
107
|
+
|
108
|
+
def before(node_or_tags)
|
109
|
+
add_previous_sibling(node_or_tags)
|
110
|
+
self
|
111
|
+
end
|
112
|
+
|
113
|
+
def after(node_or_tags)
|
114
|
+
add_next_sibling(node_or_tags)
|
115
|
+
self
|
116
|
+
end
|
117
|
+
|
118
|
+
alias_method :next_sibling, :next
|
119
|
+
alias_method :previous_sibling, :previous
|
120
|
+
alias_method :next=, :add_next_sibling
|
121
|
+
alias_method :previous=, :add_previous_sibling
|
122
|
+
|
123
|
+
def <<(node_or_tags)
|
124
|
+
add_child(node_or_tags)
|
125
|
+
self
|
126
|
+
end
|
127
|
+
|
128
|
+
def prepend_child(node)
|
129
|
+
if (first = children.first)
|
130
|
+
# Mimic the error add_child would raise.
|
131
|
+
raise "Document already has a root node" if document? && !(node.comment? || node.processing_instruction?)
|
132
|
+
|
133
|
+
first.add_sibling(:previous, node)
|
134
|
+
else
|
135
|
+
add_child(node)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def traverse(&block)
|
140
|
+
children.each { |j| j.traverse(&block) }
|
141
|
+
yield(self)
|
142
|
+
end
|
143
|
+
|
90
144
|
def matches?(selector)
|
91
145
|
ancestors.last.css(selector).any? { |node| node == self }
|
92
146
|
end
|
@@ -118,6 +172,10 @@ module Nokolexbor
|
|
118
172
|
end
|
119
173
|
end
|
120
174
|
|
175
|
+
def parent=(parent_node)
|
176
|
+
parent_node.add_child(self)
|
177
|
+
end
|
178
|
+
|
121
179
|
def each
|
122
180
|
attributes.each do |name, node|
|
123
181
|
yield [name, node.value]
|
@@ -233,6 +291,12 @@ module Nokolexbor
|
|
233
291
|
end
|
234
292
|
end
|
235
293
|
|
294
|
+
def write_to(io, *options)
|
295
|
+
io.write(to_html(*options))
|
296
|
+
end
|
297
|
+
|
298
|
+
alias_method :write_html_to, :write_to
|
299
|
+
|
236
300
|
private
|
237
301
|
|
238
302
|
def xpath_internal(node, paths, handler, ns, binds)
|