nokolexbor 0.2.6 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/memory.c +6 -6
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +186 -173
- data/ext/nokolexbor/nl_node_set.c +35 -70
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +17 -26
- data/ext/nokolexbor/nokolexbor.c +7 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/lib/nokolexbor/document.rb +92 -1
- data/lib/nokolexbor/node.rb +64 -0
- data/lib/nokolexbor/node_set.rb +6 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +1 -1
- metadata +7 -4
@@ -55,8 +55,7 @@ nl_node_set_allocate(VALUE klass)
|
|
55
55
|
VALUE
|
56
56
|
nl_rb_node_set_create_with_data(lexbor_array_t *array, VALUE rb_document)
|
57
57
|
{
|
58
|
-
if (array == NULL)
|
59
|
-
{
|
58
|
+
if (array == NULL) {
|
60
59
|
array = lexbor_array_create();
|
61
60
|
}
|
62
61
|
VALUE ret = TypedData_Wrap_Struct(cNokolexborNodeSet, &nl_node_set_type, array);
|
@@ -77,8 +76,7 @@ nl_node_set_push(VALUE self, VALUE rb_node)
|
|
77
76
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
78
77
|
|
79
78
|
lxb_status_t status = lexbor_array_push_unique(array, node);
|
80
|
-
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED)
|
81
|
-
{
|
79
|
+
if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED) {
|
82
80
|
nl_raise_lexbor_error(status);
|
83
81
|
}
|
84
82
|
|
@@ -93,13 +91,11 @@ nl_node_set_delete(VALUE self, VALUE rb_node)
|
|
93
91
|
|
94
92
|
size_t i;
|
95
93
|
for (i = 0; i < array->length; i++)
|
96
|
-
if (array->list[i] == node)
|
97
|
-
{
|
94
|
+
if (array->list[i] == node) {
|
98
95
|
break;
|
99
96
|
}
|
100
97
|
|
101
|
-
if (i >= array->length)
|
102
|
-
{
|
98
|
+
if (i >= array->length) {
|
103
99
|
// not found
|
104
100
|
return Qnil;
|
105
101
|
}
|
@@ -114,8 +110,7 @@ nl_node_set_is_include(VALUE self, VALUE rb_node)
|
|
114
110
|
lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);
|
115
111
|
|
116
112
|
for (size_t i = 0; i < array->length; i++)
|
117
|
-
if (array->list[i] == node)
|
118
|
-
{
|
113
|
+
if (array->list[i] == node) {
|
119
114
|
return Qtrue;
|
120
115
|
}
|
121
116
|
|
@@ -126,13 +121,11 @@ static VALUE
|
|
126
121
|
nl_node_set_index_at(VALUE self, long offset)
|
127
122
|
{
|
128
123
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
129
|
-
if (offset >= (long)array->length || abs((int)offset) > (long)array->length)
|
130
|
-
{
|
124
|
+
if (offset >= (long)array->length || abs((int)offset) > (long)array->length) {
|
131
125
|
return Qnil;
|
132
126
|
}
|
133
127
|
|
134
|
-
if (offset < 0)
|
135
|
-
{
|
128
|
+
if (offset < 0) {
|
136
129
|
offset += array->length;
|
137
130
|
}
|
138
131
|
|
@@ -145,35 +138,28 @@ nl_node_set_subseq(VALUE self, long beg, long len)
|
|
145
138
|
{
|
146
139
|
lexbor_array_t *old_array = nl_rb_node_set_unwrap(self);
|
147
140
|
|
148
|
-
if (beg > (long)old_array->length)
|
149
|
-
{
|
141
|
+
if (beg > (long)old_array->length) {
|
150
142
|
return Qnil;
|
151
143
|
}
|
152
|
-
if (beg < 0 || len < 0)
|
153
|
-
{
|
144
|
+
if (beg < 0 || len < 0) {
|
154
145
|
return Qnil;
|
155
146
|
}
|
156
147
|
|
157
|
-
if ((beg + len) > (long)old_array->length)
|
158
|
-
{
|
148
|
+
if ((beg + len) > (long)old_array->length) {
|
159
149
|
len = old_array->length - beg;
|
160
150
|
}
|
161
151
|
|
162
152
|
lexbor_array_t *new_array = lexbor_array_create();
|
163
|
-
if (len > 0)
|
164
|
-
{
|
153
|
+
if (len > 0) {
|
165
154
|
lxb_status_t status = lexbor_array_init(new_array, len);
|
166
|
-
if (status != LXB_STATUS_OK)
|
167
|
-
{
|
155
|
+
if (status != LXB_STATUS_OK) {
|
168
156
|
nl_raise_lexbor_error(status);
|
169
157
|
}
|
170
158
|
}
|
171
159
|
|
172
|
-
for (long j = beg; j < beg + len; ++j)
|
173
|
-
{
|
160
|
+
for (long j = beg; j < beg + len; ++j) {
|
174
161
|
lxb_status_t status = lexbor_array_push(new_array, old_array->list[j]);
|
175
|
-
if (status != LXB_STATUS_OK)
|
176
|
-
{
|
162
|
+
if (status != LXB_STATUS_OK) {
|
177
163
|
nl_raise_lexbor_error(status);
|
178
164
|
}
|
179
165
|
}
|
@@ -188,31 +174,26 @@ nl_node_set_slice(int argc, VALUE *argv, VALUE self)
|
|
188
174
|
|
189
175
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
190
176
|
|
191
|
-
if (argc == 2)
|
192
|
-
{
|
177
|
+
if (argc == 2) {
|
193
178
|
beg = NUM2LONG(argv[0]);
|
194
179
|
len = NUM2LONG(argv[1]);
|
195
|
-
if (beg < 0)
|
196
|
-
{
|
180
|
+
if (beg < 0) {
|
197
181
|
beg += array->length;
|
198
182
|
}
|
199
183
|
return nl_node_set_subseq(self, beg, len);
|
200
184
|
}
|
201
185
|
|
202
|
-
if (argc != 1)
|
203
|
-
{
|
186
|
+
if (argc != 1) {
|
204
187
|
rb_scan_args(argc, argv, "11", NULL, NULL);
|
205
188
|
}
|
206
189
|
arg = argv[0];
|
207
190
|
|
208
|
-
if (FIXNUM_P(arg))
|
209
|
-
{
|
191
|
+
if (FIXNUM_P(arg)) {
|
210
192
|
return nl_node_set_index_at(self, FIX2LONG(arg));
|
211
193
|
}
|
212
194
|
|
213
195
|
/* if arg is Range */
|
214
|
-
switch (rb_range_beg_len(arg, &beg, &len, array->length, 0))
|
215
|
-
{
|
196
|
+
switch (rb_range_beg_len(arg, &beg, &len, array->length, 0)) {
|
216
197
|
case Qfalse:
|
217
198
|
break;
|
218
199
|
case Qnil:
|
@@ -231,8 +212,7 @@ nl_node_set_to_array(VALUE self)
|
|
231
212
|
|
232
213
|
VALUE list = rb_ary_new2(array->length);
|
233
214
|
VALUE doc = nl_rb_document_get(self);
|
234
|
-
for (size_t i = 0; i < array->length; i++)
|
235
|
-
{
|
215
|
+
for (size_t i = 0; i < array->length; i++) {
|
236
216
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
237
217
|
VALUE rb_node = nl_rb_node_create(node, doc);
|
238
218
|
rb_ary_push(list, rb_node);
|
@@ -244,31 +224,27 @@ nl_node_set_to_array(VALUE self)
|
|
244
224
|
static VALUE
|
245
225
|
nl_node_set_union(VALUE self, VALUE other)
|
246
226
|
{
|
247
|
-
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet))
|
248
|
-
{
|
227
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
|
249
228
|
rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
|
250
229
|
}
|
251
230
|
|
252
231
|
lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
|
253
232
|
lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
|
254
233
|
|
255
|
-
if (self_array->length + other_array->length == 0)
|
256
|
-
{
|
234
|
+
if (self_array->length + other_array->length == 0) {
|
257
235
|
return nl_rb_node_set_create_with_data(NULL, nl_rb_document_get(self));
|
258
236
|
}
|
259
237
|
|
260
238
|
lexbor_array_t *new_array = lexbor_array_create();
|
261
239
|
lxb_status_t status = lexbor_array_init(new_array, self_array->length + other_array->length);
|
262
|
-
if (status != LXB_STATUS_OK)
|
263
|
-
{
|
240
|
+
if (status != LXB_STATUS_OK) {
|
264
241
|
nl_raise_lexbor_error(status);
|
265
242
|
}
|
266
243
|
|
267
244
|
memcpy(new_array->list, self_array->list, sizeof(lxb_dom_node_t *) * self_array->length);
|
268
245
|
new_array->length = self_array->length;
|
269
246
|
|
270
|
-
for (size_t i = 0; i < other_array->length; i++)
|
271
|
-
{
|
247
|
+
for (size_t i = 0; i < other_array->length; i++) {
|
272
248
|
lexbor_array_push_unique(new_array, other_array->list[i]);
|
273
249
|
}
|
274
250
|
|
@@ -279,40 +255,33 @@ static lxb_status_t
|
|
279
255
|
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
280
256
|
{
|
281
257
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
282
|
-
if (doc == NULL)
|
283
|
-
{
|
258
|
+
if (doc == NULL) {
|
284
259
|
rb_raise(rb_eRuntimeError, "Error getting document");
|
285
260
|
}
|
286
261
|
// Wrap direct children with a temporary fragment so that they can be searched
|
287
262
|
lxb_dom_document_fragment_t *frag = lxb_dom_document_fragment_interface_create(doc);
|
288
|
-
if (frag == NULL)
|
289
|
-
{
|
263
|
+
if (frag == NULL) {
|
290
264
|
rb_raise(rb_eRuntimeError, "Error creating document fragment");
|
291
265
|
}
|
292
266
|
lexbor_array_t *array = nl_rb_node_set_unwrap(self);
|
293
267
|
|
294
268
|
lexbor_array_t *backup_array = lexbor_array_create();
|
295
|
-
if (array->length > 0)
|
296
|
-
{
|
269
|
+
if (array->length > 0) {
|
297
270
|
lxb_status_t status = lexbor_array_init(backup_array, array->length);
|
298
|
-
if (status != LXB_STATUS_OK)
|
299
|
-
{
|
271
|
+
if (status != LXB_STATUS_OK) {
|
300
272
|
nl_raise_lexbor_error(status);
|
301
273
|
}
|
302
274
|
}
|
303
275
|
// Backup original node data and re-group them into a fragment
|
304
|
-
for (size_t i = 0; i < array->length; i++)
|
305
|
-
{
|
276
|
+
for (size_t i = 0; i < array->length; i++) {
|
306
277
|
lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
|
307
278
|
lxb_dom_node_t *backup_node = malloc(sizeof(lxb_dom_node_t));
|
308
|
-
if (backup_node == NULL)
|
309
|
-
{
|
279
|
+
if (backup_node == NULL) {
|
310
280
|
nl_raise_lexbor_error(LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
311
281
|
}
|
312
282
|
memcpy(backup_node, node, sizeof(lxb_dom_node_t));
|
313
283
|
lxb_status_t status = lexbor_array_push(backup_array, backup_node);
|
314
|
-
if (status != LXB_STATUS_OK)
|
315
|
-
{
|
284
|
+
if (status != LXB_STATUS_OK) {
|
316
285
|
nl_raise_lexbor_error(LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
317
286
|
}
|
318
287
|
lxb_dom_node_insert_child(&frag->node, node);
|
@@ -323,8 +292,7 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
323
292
|
|
324
293
|
lxb_dom_document_fragment_interface_destroy(frag);
|
325
294
|
// Restore original node data
|
326
|
-
for (size_t i = 0; i < array->length; i++)
|
327
|
-
{
|
295
|
+
for (size_t i = 0; i < array->length; i++) {
|
328
296
|
memcpy(array->list[i], backup_array->list[i], sizeof(lxb_dom_node_t));
|
329
297
|
free(backup_array->list[i]);
|
330
298
|
}
|
@@ -341,14 +309,12 @@ nl_node_set_at_css(VALUE self, VALUE selector)
|
|
341
309
|
|
342
310
|
lxb_status_t status = nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
343
311
|
|
344
|
-
if (status != LXB_STATUS_OK)
|
345
|
-
{
|
312
|
+
if (status != LXB_STATUS_OK) {
|
346
313
|
lexbor_array_destroy(array, true);
|
347
314
|
nl_raise_lexbor_error(status);
|
348
315
|
}
|
349
316
|
|
350
|
-
if (array->length == 0)
|
351
|
-
{
|
317
|
+
if (array->length == 0) {
|
352
318
|
lexbor_array_destroy(array, true);
|
353
319
|
return Qnil;
|
354
320
|
}
|
@@ -369,8 +335,7 @@ nl_node_set_css(VALUE self, VALUE selector)
|
|
369
335
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
370
336
|
|
371
337
|
lxb_status_t status = nl_node_set_find(self, selector, nl_node_css_callback, array);
|
372
|
-
if (status != LXB_STATUS_OK)
|
373
|
-
{
|
338
|
+
if (status != LXB_STATUS_OK) {
|
374
339
|
lexbor_array_destroy(array, true);
|
375
340
|
nl_raise_lexbor_error(status);
|
376
341
|
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborText;
|
4
|
+
extern VALUE cNokolexborCharacterData;
|
5
|
+
extern VALUE mNokolexbor;
|
6
|
+
|
7
|
+
static VALUE
|
8
|
+
nl_text_new(int argc, VALUE *argv, VALUE klass)
|
9
|
+
{
|
10
|
+
lxb_dom_document_t *document;
|
11
|
+
VALUE rb_text;
|
12
|
+
VALUE rb_document;
|
13
|
+
VALUE rest;
|
14
|
+
|
15
|
+
rb_scan_args(argc, argv, "2*", &rb_text, &rb_document, &rest);
|
16
|
+
|
17
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
18
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
19
|
+
}
|
20
|
+
|
21
|
+
document = nl_rb_document_unwrap(rb_document);
|
22
|
+
|
23
|
+
const char* c_text = StringValuePtr(rb_text);
|
24
|
+
size_t text_len = RSTRING_LEN(rb_text);
|
25
|
+
lxb_dom_text_t *element = lxb_dom_document_create_text_node(document, (const lxb_char_t *)c_text, text_len);
|
26
|
+
if (element == NULL) {
|
27
|
+
rb_raise(rb_eRuntimeError, "Error creating text node");
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE rb_node = nl_rb_node_create(&element->char_data.node, rb_document);
|
31
|
+
|
32
|
+
if (rb_block_given_p()) {
|
33
|
+
rb_yield(rb_node);
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_node;
|
37
|
+
}
|
38
|
+
|
39
|
+
void Init_nl_text(void)
|
40
|
+
{
|
41
|
+
cNokolexborText = rb_define_class_under(mNokolexbor, "Text", cNokolexborCharacterData);
|
42
|
+
|
43
|
+
rb_define_singleton_method(cNokolexborText, "new", nl_text_new, -1);
|
44
|
+
}
|
@@ -1,11 +1,11 @@
|
|
1
|
-
#include <ruby.h>
|
2
|
-
#include <ruby/util.h>
|
3
|
-
#include "nokolexbor.h"
|
4
1
|
#include "libxml.h"
|
5
2
|
#include "libxml/globals.h"
|
3
|
+
#include "libxml/parserInternals.h"
|
6
4
|
#include "libxml/xpath.h"
|
7
5
|
#include "libxml/xpathInternals.h"
|
8
|
-
#include "
|
6
|
+
#include "nokolexbor.h"
|
7
|
+
#include <ruby.h>
|
8
|
+
#include <ruby/util.h>
|
9
9
|
|
10
10
|
#define RBSTR_OR_QNIL(_str) (_str ? rb_utf8_str_new_cstr(_str) : Qnil)
|
11
11
|
|
@@ -34,8 +34,8 @@ nl_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri)
|
|
34
34
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
35
35
|
|
36
36
|
nl_xmlXPathRegisterNs(ctx,
|
37
|
-
|
38
|
-
|
37
|
+
(const xmlChar *)StringValueCStr(prefix),
|
38
|
+
(const xmlChar *)StringValueCStr(uri));
|
39
39
|
return self;
|
40
40
|
}
|
41
41
|
|
@@ -55,8 +55,8 @@ nl_xpath_context_register_variable(VALUE self, VALUE name, VALUE value)
|
|
55
55
|
xmlValue = nl_xmlXPathNewCString(StringValueCStr(value));
|
56
56
|
|
57
57
|
nl_xmlXPathRegisterVariable(ctx,
|
58
|
-
|
59
|
-
|
58
|
+
(const xmlChar *)StringValueCStr(name),
|
59
|
+
xmlValue);
|
60
60
|
|
61
61
|
return self;
|
62
62
|
}
|
@@ -70,28 +70,23 @@ xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx, VALUE rb_do
|
|
70
70
|
{
|
71
71
|
VALUE rb_retval;
|
72
72
|
|
73
|
-
switch (c_xpath_object->type)
|
74
|
-
{
|
73
|
+
switch (c_xpath_object->type) {
|
75
74
|
case XPATH_STRING:
|
76
75
|
rb_retval = rb_utf8_str_new_cstr((const char *)c_xpath_object->stringval);
|
77
76
|
nl_xmlFree(c_xpath_object->stringval);
|
78
77
|
return rb_retval;
|
79
78
|
|
80
|
-
case XPATH_NODESET:
|
81
|
-
|
82
|
-
if (c_xpath_object->nodesetval == NULL)
|
83
|
-
{
|
79
|
+
case XPATH_NODESET: {
|
80
|
+
if (c_xpath_object->nodesetval == NULL) {
|
84
81
|
return nl_rb_node_set_create_with_data(NULL, rb_document);
|
85
82
|
}
|
86
|
-
if (c_xpath_object->nodesetval->nodeNr == 0)
|
87
|
-
{
|
83
|
+
if (c_xpath_object->nodesetval->nodeNr == 0) {
|
88
84
|
return nl_rb_node_set_create_with_data(NULL, rb_document);
|
89
85
|
}
|
90
86
|
|
91
87
|
lexbor_array_t *array = lexbor_array_create();
|
92
88
|
lxb_status_t status = lexbor_array_init(array, c_xpath_object->nodesetval->nodeNr);
|
93
|
-
if (status != LXB_STATUS_OK)
|
94
|
-
{
|
89
|
+
if (status != LXB_STATUS_OK) {
|
95
90
|
nl_raise_lexbor_error(status);
|
96
91
|
}
|
97
92
|
memcpy(array->list, c_xpath_object->nodesetval->nodeTab, sizeof(lxb_dom_node_t *) * c_xpath_object->nodesetval->nodeNr);
|
@@ -122,8 +117,7 @@ nl_xpath_wrap_syntax_error(xmlErrorPtr error)
|
|
122
117
|
&msg,
|
123
118
|
cNokolexborXpathSyntaxError);
|
124
119
|
|
125
|
-
if (error)
|
126
|
-
{
|
120
|
+
if (error) {
|
127
121
|
rb_iv_set(e, "@domain", INT2NUM(error->domain));
|
128
122
|
rb_iv_set(e, "@code", INT2NUM(error->code));
|
129
123
|
rb_iv_set(e, "@level", INT2NUM((short)error->level));
|
@@ -182,8 +176,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
182
176
|
|
183
177
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
184
178
|
|
185
|
-
if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1)
|
186
|
-
{
|
179
|
+
if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) {
|
187
180
|
xpath_handler = Qnil;
|
188
181
|
}
|
189
182
|
|
@@ -203,15 +196,13 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
203
196
|
nl_xmlSetStructuredErrorFunc(NULL, NULL);
|
204
197
|
nl_xmlSetGenericErrorFunc(NULL, NULL);
|
205
198
|
|
206
|
-
if (xpath == NULL)
|
207
|
-
{
|
199
|
+
if (xpath == NULL) {
|
208
200
|
nl_xmlXPathFreeObject(xpath);
|
209
201
|
rb_exc_raise(rb_ary_entry(errors, 0));
|
210
202
|
}
|
211
203
|
|
212
204
|
retval = xpath2ruby(xpath, ctx, nl_rb_document_get(self));
|
213
|
-
if (retval == Qundef)
|
214
|
-
{
|
205
|
+
if (retval == Qundef) {
|
215
206
|
retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
|
216
207
|
}
|
217
208
|
|
data/ext/nokolexbor/nokolexbor.c
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
VALUE mNokolexbor;
|
4
4
|
VALUE eLexborError;
|
5
|
+
VALUE eLexborSyntaxError;
|
5
6
|
|
6
7
|
void nl_raise_lexbor_error(lxb_status_t error)
|
7
8
|
{
|
8
|
-
switch (error)
|
9
|
-
{
|
9
|
+
switch (error) {
|
10
10
|
case LXB_STATUS_ERROR:
|
11
11
|
rb_raise(eLexborError, "LXB_STATUS_ERROR");
|
12
12
|
case LXB_STATUS_ERROR_MEMORY_ALLOCATION:
|
@@ -30,7 +30,7 @@ void nl_raise_lexbor_error(lxb_status_t error)
|
|
30
30
|
case LXB_STATUS_ERROR_UNEXPECTED_RESULT:
|
31
31
|
rb_raise(eLexborError, "LXB_STATUS_ERROR_UNEXPECTED_RESULT");
|
32
32
|
case LXB_STATUS_ERROR_UNEXPECTED_DATA:
|
33
|
-
rb_raise(
|
33
|
+
rb_raise(eLexborSyntaxError, "LXB_STATUS_ERROR_UNEXPECTED_DATA");
|
34
34
|
case LXB_STATUS_ERROR_OVERFLOW:
|
35
35
|
rb_raise(eLexborError, "LXB_STATUS_ERROR_OVERFLOW");
|
36
36
|
case LXB_STATUS_CONTINUE:
|
@@ -56,8 +56,12 @@ void Init_nokolexbor(void)
|
|
56
56
|
{
|
57
57
|
mNokolexbor = rb_define_module("Nokolexbor");
|
58
58
|
eLexborError = rb_define_class_under(mNokolexbor, "LexborError", rb_eStandardError);
|
59
|
+
eLexborSyntaxError = rb_define_class_under(mNokolexbor, "LexborSyntaxError", eLexborError);
|
59
60
|
Init_nl_node();
|
60
61
|
Init_nl_document();
|
62
|
+
Init_nl_text();
|
63
|
+
Init_nl_comment();
|
64
|
+
Init_nl_cdata();
|
61
65
|
Init_nl_node_set();
|
62
66
|
Init_nl_xpath_context();
|
63
67
|
}
|
data/ext/nokolexbor/nokolexbor.h
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
5
|
|
6
|
-
#include <lexbor/html/html.h>
|
7
6
|
#include <lexbor/css/css.h>
|
7
|
+
#include <lexbor/html/html.h>
|
8
8
|
#include <lexbor/selectors/selectors.h>
|
9
9
|
|
10
10
|
extern VALUE cNokolexborDocument;
|
@@ -12,6 +12,9 @@ extern VALUE cNokolexborDocument;
|
|
12
12
|
void Init_nl_document(void);
|
13
13
|
void Init_nl_node(void);
|
14
14
|
void Init_nl_node_set(void);
|
15
|
+
void Init_nl_text(void);
|
16
|
+
void Init_nl_comment(void);
|
17
|
+
void Init_nl_cdata(void);
|
15
18
|
void Init_nl_xpath_context(void);
|
16
19
|
|
17
20
|
void nl_raise_lexbor_error(lxb_status_t error);
|
@@ -21,14 +24,13 @@ VALUE nl_rb_node_set_create_with_data(lexbor_array_t *array, VALUE rb_document);
|
|
21
24
|
|
22
25
|
lxb_inline VALUE nl_rb_document_get(VALUE rb_node_or_doc)
|
23
26
|
{
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
return rb_iv_get(rb_node_or_doc, "@document");
|
27
|
+
if (rb_obj_is_kind_of(rb_node_or_doc, cNokolexborDocument)) {
|
28
|
+
return rb_node_or_doc;
|
29
|
+
}
|
30
|
+
return rb_iv_get(rb_node_or_doc, "@document");
|
29
31
|
}
|
30
32
|
|
31
|
-
lxb_dom_document_t *
|
33
|
+
lxb_dom_document_t *nl_rb_document_unwrap(VALUE rb_doc);
|
32
34
|
|
33
35
|
const lxb_char_t *
|
34
36
|
lxb_dom_node_name_qualified(lxb_dom_node_t *node, size_t *len);
|
data/lib/nokolexbor/document.rb
CHANGED
@@ -1,6 +1,97 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
|
-
class Document < Node
|
4
|
+
class Document < Nokolexbor::Node
|
5
|
+
def create_element(name, *contents_or_attrs, &block)
|
6
|
+
elm = Nokolexbor::Element.new(name, self, &block)
|
7
|
+
contents_or_attrs.each do |arg|
|
8
|
+
case arg
|
9
|
+
when Hash
|
10
|
+
arg.each do |k, v|
|
11
|
+
elm[k.to_s] = v.to_s
|
12
|
+
end
|
13
|
+
else
|
14
|
+
elm.content = arg
|
15
|
+
end
|
16
|
+
end
|
17
|
+
elm
|
18
|
+
end
|
19
|
+
|
20
|
+
# Create a Text Node with +string+
|
21
|
+
def create_text_node(string, &block)
|
22
|
+
Nokolexbor::Text.new(string.to_s, self, &block)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Create a CDATA Node containing +string+
|
26
|
+
def create_cdata(string, &block)
|
27
|
+
Nokolexbor::CDATA.new(string.to_s, self, &block)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Create a Comment Node containing +string+
|
31
|
+
def create_comment(string, &block)
|
32
|
+
Nokolexbor::Comment.new(string.to_s, self, &block)
|
33
|
+
end
|
34
|
+
|
35
|
+
# A reference to +self+
|
36
|
+
def document
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
def meta_encoding
|
41
|
+
if (meta = at_css("meta[charset]"))
|
42
|
+
meta[:charset]
|
43
|
+
elsif (meta = meta_content_type)
|
44
|
+
meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def meta_encoding=(encoding)
|
49
|
+
if (meta = meta_content_type)
|
50
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
51
|
+
encoding
|
52
|
+
elsif (meta = at_css("meta[charset]"))
|
53
|
+
meta["charset"] = encoding
|
54
|
+
else
|
55
|
+
meta = Nokolexbor::Node.new("meta", self)
|
56
|
+
meta["charset"] = encoding
|
57
|
+
|
58
|
+
if (head = at_css("head"))
|
59
|
+
head.prepend_child(meta)
|
60
|
+
else
|
61
|
+
set_metadata_element(meta)
|
62
|
+
end
|
63
|
+
encoding
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def meta_content_type
|
68
|
+
xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
|
69
|
+
node["http-equiv"] =~ /\AContent-Type\z/i
|
70
|
+
end
|
71
|
+
end
|
72
|
+
private :meta_content_type
|
73
|
+
|
74
|
+
def set_metadata_element(element)
|
75
|
+
if (head = at_css("head"))
|
76
|
+
head << element
|
77
|
+
elsif (html = at_css("html"))
|
78
|
+
head = html.prepend_child(Nokolexbor::Node.new("head", self))
|
79
|
+
head.prepend_child(element)
|
80
|
+
elsif (first = children.find do |node|
|
81
|
+
case node
|
82
|
+
when Nokolexbor::Node
|
83
|
+
true
|
84
|
+
end
|
85
|
+
end)
|
86
|
+
# We reach here only if the underlying document model
|
87
|
+
# allows <html>/<head> elements to be omitted and does not
|
88
|
+
# automatically supply them.
|
89
|
+
first.add_previous_sibling(element)
|
90
|
+
else
|
91
|
+
html = add_child(Nokolexbor::Node.new("html", self))
|
92
|
+
head = html.add_child(Nokolexbor::Node.new("head", self))
|
93
|
+
head.prepend_child(element)
|
94
|
+
end
|
95
|
+
end
|
5
96
|
end
|
6
97
|
end
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -45,6 +45,10 @@ module Nokolexbor
|
|
45
45
|
type == ELEMENT_NODE
|
46
46
|
end
|
47
47
|
|
48
|
+
def document?
|
49
|
+
is_a?(Nokolexbor::Document)
|
50
|
+
end
|
51
|
+
|
48
52
|
def ancestors(selector = nil)
|
49
53
|
return NodeSet.new(@document) unless respond_to?(:parent)
|
50
54
|
return NodeSet.new(@document) unless parent
|
@@ -87,6 +91,56 @@ module Nokolexbor
|
|
87
91
|
self
|
88
92
|
end
|
89
93
|
|
94
|
+
def add_previous_sibling(node_or_tags)
|
95
|
+
raise ArgumentError,
|
96
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
97
|
+
|
98
|
+
add_sibling(:previous, node_or_tags)
|
99
|
+
end
|
100
|
+
|
101
|
+
def add_next_sibling(node_or_tags)
|
102
|
+
raise ArgumentError,
|
103
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
104
|
+
|
105
|
+
add_sibling(:next, node_or_tags)
|
106
|
+
end
|
107
|
+
|
108
|
+
def before(node_or_tags)
|
109
|
+
add_previous_sibling(node_or_tags)
|
110
|
+
self
|
111
|
+
end
|
112
|
+
|
113
|
+
def after(node_or_tags)
|
114
|
+
add_next_sibling(node_or_tags)
|
115
|
+
self
|
116
|
+
end
|
117
|
+
|
118
|
+
alias_method :next_sibling, :next
|
119
|
+
alias_method :previous_sibling, :previous
|
120
|
+
alias_method :next=, :add_next_sibling
|
121
|
+
alias_method :previous=, :add_previous_sibling
|
122
|
+
|
123
|
+
def <<(node_or_tags)
|
124
|
+
add_child(node_or_tags)
|
125
|
+
self
|
126
|
+
end
|
127
|
+
|
128
|
+
def prepend_child(node)
|
129
|
+
if (first = children.first)
|
130
|
+
# Mimic the error add_child would raise.
|
131
|
+
raise "Document already has a root node" if document? && !(node.comment? || node.processing_instruction?)
|
132
|
+
|
133
|
+
first.add_sibling(:previous, node)
|
134
|
+
else
|
135
|
+
add_child(node)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def traverse(&block)
|
140
|
+
children.each { |j| j.traverse(&block) }
|
141
|
+
yield(self)
|
142
|
+
end
|
143
|
+
|
90
144
|
def matches?(selector)
|
91
145
|
ancestors.last.css(selector).any? { |node| node == self }
|
92
146
|
end
|
@@ -118,6 +172,10 @@ module Nokolexbor
|
|
118
172
|
end
|
119
173
|
end
|
120
174
|
|
175
|
+
def parent=(parent_node)
|
176
|
+
parent_node.add_child(self)
|
177
|
+
end
|
178
|
+
|
121
179
|
def each
|
122
180
|
attributes.each do |name, node|
|
123
181
|
yield [name, node.value]
|
@@ -233,6 +291,12 @@ module Nokolexbor
|
|
233
291
|
end
|
234
292
|
end
|
235
293
|
|
294
|
+
def write_to(io, *options)
|
295
|
+
io.write(to_html(*options))
|
296
|
+
end
|
297
|
+
|
298
|
+
alias_method :write_html_to, :write_to
|
299
|
+
|
236
300
|
private
|
237
301
|
|
238
302
|
def xpath_internal(node, paths, handler, ns, binds)
|