makiri 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/conformance.yml +22 -0
- data/.github/workflows/libfuzzer.yml +83 -0
- data/.github/workflows/release.yml +12 -7
- data/.github/workflows/security.yml +88 -3
- data/.github/workflows/valgrind.yml +135 -0
- data/CHANGELOG.md +152 -15
- data/README.md +183 -13
- data/Rakefile +294 -7
- data/ext/makiri/bridge/bridge.h +28 -0
- data/ext/makiri/bridge/ruby_string.c +282 -12
- data/ext/makiri/core/mkr_alloc.c +40 -3
- data/ext/makiri/core/mkr_alloc.h +28 -5
- data/ext/makiri/core/mkr_buf.c +47 -3
- data/ext/makiri/core/mkr_buf.h +112 -3
- data/ext/makiri/core/mkr_core.c +143 -0
- data/ext/makiri/core/mkr_core.h +11 -2
- data/ext/makiri/core/mkr_hash.h +1 -1
- data/ext/makiri/core/mkr_span.h +186 -0
- data/ext/makiri/core/mkr_text.h +8 -8
- data/ext/makiri/core/mkr_utf8.c +101 -0
- data/ext/makiri/core/mkr_utf8.h +88 -0
- data/ext/makiri/extconf.rb +123 -10
- data/ext/makiri/fuzz/Makefile +95 -0
- data/ext/makiri/fuzz/check_fuzzer.cc +4 -0
- data/ext/makiri/fuzz/xml_fuzz.c +24 -0
- data/ext/makiri/fuzz/xpath_fuzz.c +109 -0
- data/ext/makiri/glue/glue.h +55 -11
- data/ext/makiri/glue/ruby_doc.c +129 -59
- data/ext/makiri/glue/ruby_html_css.c +292 -0
- data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +248 -52
- data/ext/makiri/glue/ruby_html_node.c +859 -0
- data/ext/makiri/glue/ruby_html_serialize.c +154 -0
- data/ext/makiri/glue/ruby_node.c +74 -729
- data/ext/makiri/glue/ruby_node_set.c +167 -32
- data/ext/makiri/glue/ruby_xml.c +602 -0
- data/ext/makiri/glue/ruby_xml_node.c +1373 -0
- data/ext/makiri/glue/ruby_xpath.c +63 -30
- data/ext/makiri/glue/ruby_xpath.h +19 -0
- data/ext/makiri/lexbor_compat/compat.h +42 -9
- data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
- data/ext/makiri/lexbor_compat/dom_index.c +2 -2
- data/ext/makiri/lexbor_compat/post_parse.c +100 -10
- data/ext/makiri/lexbor_compat/source_loc.c +15 -13
- data/ext/makiri/lexbor_compat/text_index.c +14 -8
- data/ext/makiri/lexbor_compat/utf8_input.c +19 -33
- data/ext/makiri/makiri.c +184 -6
- data/ext/makiri/makiri.h +43 -2
- data/ext/makiri/xml/mkr_xml.h +125 -0
- data/ext/makiri/xml/mkr_xml_chars.c +195 -0
- data/ext/makiri/xml/mkr_xml_index.c +169 -0
- data/ext/makiri/xml/mkr_xml_index.h +48 -0
- data/ext/makiri/xml/mkr_xml_mutate.c +817 -0
- data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
- data/ext/makiri/xml/mkr_xml_node.c +399 -0
- data/ext/makiri/xml/mkr_xml_node.h +184 -0
- data/ext/makiri/xml/mkr_xml_tree.c +1515 -0
- data/ext/makiri/xpath/mkr_css.c +1023 -0
- data/ext/makiri/xpath/mkr_css.h +65 -0
- data/ext/makiri/xpath/mkr_xpath.c +96 -32
- data/ext/makiri/xpath/mkr_xpath.h +109 -4
- data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
- data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
- data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +551 -241
- data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +318 -276
- data/ext/makiri/xpath/mkr_xpath_internal.h +177 -206
- data/ext/makiri/xpath/mkr_xpath_lex.c +95 -125
- data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
- data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +145 -0
- data/ext/makiri/xpath/mkr_xpath_number.c +109 -0
- data/ext/makiri/xpath/mkr_xpath_parse.c +83 -94
- data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
- data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
- data/ext/makiri/xpath/mkr_xpath_shared.c +609 -0
- data/ext/makiri/xpath/mkr_xpath_value_body.h +801 -0
- data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
- data/lib/makiri/{attribute.rb → attr.rb} +7 -3
- data/lib/makiri/cdata_section.rb +19 -0
- data/lib/makiri/comment.rb +10 -0
- data/lib/makiri/compat_aliases.rb +30 -0
- data/lib/makiri/document.rb +9 -73
- data/lib/makiri/document_fragment.rb +14 -9
- data/lib/makiri/element.rb +4 -4
- data/lib/makiri/html/document.rb +106 -0
- data/lib/makiri/html/node_methods.rb +19 -0
- data/lib/makiri/html.rb +12 -0
- data/lib/makiri/node.rb +58 -15
- data/lib/makiri/node_set.rb +8 -0
- data/lib/makiri/processing_instruction.rb +10 -0
- data/lib/makiri/text.rb +1 -1
- data/lib/makiri/version.rb +1 -1
- data/lib/makiri/xml/builder.rb +263 -0
- data/lib/makiri/xml/document.rb +24 -0
- data/lib/makiri/xml/node_methods.rb +84 -0
- data/lib/makiri/xml.rb +10 -0
- data/lib/makiri/xpath_context.rb +1 -1
- data/lib/makiri.rb +24 -5
- data/script/build_native_gem.rb +2 -2
- data/script/check_alloc_failures.rb +266 -0
- data/script/check_c_safety.rb +77 -2
- data/script/check_c_safety_allowlist.yml +102 -0
- data/script/check_leaks.rb +64 -0
- data/script/leaks_harness.rb +64 -0
- data/vendor/lexbor/CMakeLists.txt +6 -0
- data/vendor/lexbor/README.md +12 -0
- data/vendor/lexbor/config.cmake +1 -1
- data/vendor/lexbor/source/lexbor/core/base.h +1 -1
- data/vendor/lexbor/source/lexbor/core/config.cmake +9 -1
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +2 -3
- data/vendor/lexbor/source/lexbor/css/selectors/state.c +3 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +21 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +5 -0
- data/vendor/lexbor/source/lexbor/encoding/decode.c +33 -4
- data/vendor/lexbor/source/lexbor/html/base.h +1 -1
- data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +4 -0
- data/vendor/lexbor/source/lexbor/html/serialize.c +545 -41
- data/vendor/lexbor/source/lexbor/html/serialize.h +2 -1
- data/vendor/lexbor/source/lexbor/html/tokenizer.h +2 -2
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1 -1
- data/vendor/lexbor/source/lexbor/html/tree.c +6 -6
- data/vendor/lexbor/source/lexbor/selectors/selectors.c +12 -3
- data/vendor/lexbor/source/lexbor/url/base.h +1 -1
- data/vendor/lexbor/source/lexbor/url/url.c +5 -2
- data/vendor/lexbor/source/lexbor/url/url.h +9 -0
- data/vendor/lexbor/version +1 -1
- metadata +53 -9
- data/ext/makiri/glue/ruby_css.c +0 -185
- data/ext/makiri/glue/ruby_serialize.c +0 -92
- data/ext/makiri/xpath/mkr_xpath_value.c +0 -1286
- data/lib/makiri/cdata.rb +0 -6
|
@@ -1,6 +1,18 @@
|
|
|
1
1
|
#include "glue.h"
|
|
2
2
|
|
|
3
3
|
#include <lexbor/html/parser.h>
|
|
4
|
+
#include <lexbor/ns/ns.h>
|
|
5
|
+
|
|
6
|
+
/* Exported by lexbor but omitted from its public headers. lxb_ns_append interns
|
|
7
|
+
* a namespace URI in the document's ns table; lxb_dom_attr_set_name_ns names an
|
|
8
|
+
* attribute from (namespace, qualified name), splitting prefix/local and
|
|
9
|
+
* interning the namespace. */
|
|
10
|
+
extern const lxb_ns_data_t *
|
|
11
|
+
lxb_ns_append(lexbor_hash_t *hash, const lxb_char_t *link, size_t length);
|
|
12
|
+
extern lxb_status_t
|
|
13
|
+
lxb_dom_attr_set_name_ns(lxb_dom_attr_t *attr, const lxb_char_t *link,
|
|
14
|
+
size_t link_length, const lxb_char_t *name,
|
|
15
|
+
size_t name_length, bool to_lowercase);
|
|
4
16
|
|
|
5
17
|
/*
|
|
6
18
|
* DOM mutation (v0.2). Thin wrappers over Lexbor's insert/remove/create
|
|
@@ -27,13 +39,14 @@ mkr_invalidate_index(VALUE node)
|
|
|
27
39
|
mkr_parsed_text_index_invalidate(p);
|
|
28
40
|
}
|
|
29
41
|
|
|
42
|
+
/* An HTML node argument for a tree mutation. Routes through mkr_html_node_unwrap so
|
|
43
|
+
* an XML node is rejected before its mkr_xml_node_t* reaches Lexbor (the
|
|
44
|
+
* same-document/cycle checks below read lxb fields, and the insert hands the
|
|
45
|
+
* pointer to Lexbor). */
|
|
30
46
|
static lxb_dom_node_t *
|
|
31
47
|
mkr_arg_node(VALUE v)
|
|
32
48
|
{
|
|
33
|
-
|
|
34
|
-
rb_raise(rb_eTypeError, "expected a Makiri::Node");
|
|
35
|
-
}
|
|
36
|
-
return mkr_node_unwrap(v);
|
|
49
|
+
return mkr_html_node_unwrap(v);
|
|
37
50
|
}
|
|
38
51
|
|
|
39
52
|
/* Validate that `incoming` may be placed relative to `ref` and detach it from
|
|
@@ -70,12 +83,22 @@ mkr_is_fragment(const lxb_dom_node_t *n)
|
|
|
70
83
|
/* tree mutation */
|
|
71
84
|
/* ------------------------------------------------------------------ */
|
|
72
85
|
|
|
86
|
+
/* Every tree / attribute mutation unwraps `self` through here first: a node the
|
|
87
|
+
* caller has frozen (Ruby's Object#freeze) is immutable, so raise FrozenError
|
|
88
|
+
* rather than silently editing it. Read accessors use mkr_html_node_unwrap (XML rejected at the type boundary). */
|
|
89
|
+
static lxb_dom_node_t *
|
|
90
|
+
mkr_node_unwrap_mutable(VALUE self)
|
|
91
|
+
{
|
|
92
|
+
rb_check_frozen(self);
|
|
93
|
+
return mkr_html_node_unwrap(self);
|
|
94
|
+
}
|
|
95
|
+
|
|
73
96
|
/* node.add_child(child) -> child. Appends child as the last child. A document
|
|
74
97
|
* fragment contributes its children rather than itself. */
|
|
75
98
|
static VALUE
|
|
76
99
|
mkr_node_add_child(VALUE self, VALUE rb_child)
|
|
77
100
|
{
|
|
78
|
-
lxb_dom_node_t *parent =
|
|
101
|
+
lxb_dom_node_t *parent = mkr_node_unwrap_mutable(self);
|
|
79
102
|
lxb_dom_node_t *child = mkr_arg_node(rb_child);
|
|
80
103
|
mkr_prepare_insert(parent, child);
|
|
81
104
|
if (mkr_is_fragment(child)) {
|
|
@@ -102,7 +125,7 @@ mkr_node_append(VALUE self, VALUE rb_child)
|
|
|
102
125
|
static VALUE
|
|
103
126
|
mkr_node_add_previous_sibling(VALUE self, VALUE rb_node)
|
|
104
127
|
{
|
|
105
|
-
lxb_dom_node_t *ref =
|
|
128
|
+
lxb_dom_node_t *ref = mkr_node_unwrap_mutable(self);
|
|
106
129
|
lxb_dom_node_t *node = mkr_arg_node(rb_node);
|
|
107
130
|
if (ref->parent == NULL) {
|
|
108
131
|
rb_raise(mkr_eError, "cannot add a sibling to a node with no parent");
|
|
@@ -124,7 +147,7 @@ mkr_node_add_previous_sibling(VALUE self, VALUE rb_node)
|
|
|
124
147
|
static VALUE
|
|
125
148
|
mkr_node_add_next_sibling(VALUE self, VALUE rb_node)
|
|
126
149
|
{
|
|
127
|
-
lxb_dom_node_t *ref =
|
|
150
|
+
lxb_dom_node_t *ref = mkr_node_unwrap_mutable(self);
|
|
128
151
|
lxb_dom_node_t *node = mkr_arg_node(rb_node);
|
|
129
152
|
if (ref->parent == NULL) {
|
|
130
153
|
rb_raise(mkr_eError, "cannot add a sibling to a node with no parent");
|
|
@@ -148,7 +171,7 @@ mkr_node_add_next_sibling(VALUE self, VALUE rb_node)
|
|
|
148
171
|
static VALUE
|
|
149
172
|
mkr_node_remove(VALUE self)
|
|
150
173
|
{
|
|
151
|
-
lxb_dom_node_t *node =
|
|
174
|
+
lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
|
|
152
175
|
if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
|
|
153
176
|
rb_raise(mkr_eError, "use delete(name) to remove an attribute");
|
|
154
177
|
}
|
|
@@ -163,7 +186,7 @@ mkr_node_remove(VALUE self)
|
|
|
163
186
|
static VALUE
|
|
164
187
|
mkr_node_replace(VALUE self, VALUE rb_other)
|
|
165
188
|
{
|
|
166
|
-
lxb_dom_node_t *ref =
|
|
189
|
+
lxb_dom_node_t *ref = mkr_node_unwrap_mutable(self);
|
|
167
190
|
lxb_dom_node_t *other = mkr_arg_node(rb_other);
|
|
168
191
|
if (ref->parent == NULL) {
|
|
169
192
|
rb_raise(mkr_eError, "cannot replace a node with no parent");
|
|
@@ -191,7 +214,7 @@ mkr_node_replace(VALUE self, VALUE rb_other)
|
|
|
191
214
|
static VALUE
|
|
192
215
|
mkr_node_aset(VALUE self, VALUE rb_name, VALUE rb_value)
|
|
193
216
|
{
|
|
194
|
-
lxb_dom_node_t *node =
|
|
217
|
+
lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
|
|
195
218
|
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
196
219
|
rb_raise(mkr_eError, "cannot set an attribute on a non-element node");
|
|
197
220
|
}
|
|
@@ -210,13 +233,180 @@ mkr_node_aset(VALUE self, VALUE rb_name, VALUE rb_value)
|
|
|
210
233
|
return rb_value;
|
|
211
234
|
}
|
|
212
235
|
|
|
236
|
+
/* An attribute's OWN namespace id: the one recorded by set_attribute_ns (which
|
|
237
|
+
* differs from the owner element's), else the null namespace - a normally-set or
|
|
238
|
+
* parsed attribute inherits the element's ns, which for matching purposes is the
|
|
239
|
+
* null namespace (an unprefixed attribute is namespaceless). */
|
|
240
|
+
static lxb_ns_id_t
|
|
241
|
+
mkr_attr_own_ns(const lxb_dom_attr_t *at)
|
|
242
|
+
{
|
|
243
|
+
if (at->owner != NULL && at->node.ns != at->owner->node.ns) {
|
|
244
|
+
return at->node.ns;
|
|
245
|
+
}
|
|
246
|
+
return LXB_NS__UNDEF;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/* Find the attribute on `el` matching (ns_id, local_name) case-sensitively - the
|
|
250
|
+
* DOM keys attributes on (namespace, local name), so two with the same qualified
|
|
251
|
+
* name but different namespaces coexist (unlike Lexbor's by-qualified-name,
|
|
252
|
+
* case-insensitive-for-HTML lookup). */
|
|
253
|
+
static lxb_dom_attr_t *
|
|
254
|
+
mkr_attr_find_ns(lxb_dom_element_t *el, lxb_ns_id_t ns_id,
|
|
255
|
+
const lxb_char_t *local, size_t local_len)
|
|
256
|
+
{
|
|
257
|
+
for (lxb_dom_attr_t *at = el->first_attr; at != NULL; at = at->next) {
|
|
258
|
+
if (mkr_attr_own_ns(at) != ns_id) {
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
/* Compare the case-preserved local name (the suffix of the qualified
|
|
262
|
+
* name): Lexbor lower-cases the stored local_name even when the
|
|
263
|
+
* qualified name keeps its case, but setAttributeNS is case-sensitive. */
|
|
264
|
+
size_t qlen = 0, llen = 0;
|
|
265
|
+
const lxb_char_t *q = lxb_dom_attr_qualified_name(at, &qlen);
|
|
266
|
+
(void) lxb_dom_attr_local_name(at, &llen);
|
|
267
|
+
if (q != NULL && qlen >= llen
|
|
268
|
+
&& mkr_bytes_eq(q + (qlen - llen), llen, local, local_len)) {
|
|
269
|
+
return at;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
return NULL;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/* element.set_attribute_ns(namespace_or_nil, qualified_name, value) -> value.
|
|
276
|
+
*
|
|
277
|
+
* Stores the attribute under its qualified name (case-preserved - setAttributeNS
|
|
278
|
+
* is case-sensitive, unlike the HTML setAttribute family) and records its OWN
|
|
279
|
+
* namespace on the attr node, so namespaceURI / getAttributeNS resolve it. The
|
|
280
|
+
* namespace URI is interned in the document's ns table; nil/"" stores the null
|
|
281
|
+
* namespace (LXB_NS__UNDEF). */
|
|
282
|
+
static VALUE
|
|
283
|
+
mkr_node_set_attribute_ns(VALUE self, VALUE rb_ns, VALUE rb_qname, VALUE rb_value)
|
|
284
|
+
{
|
|
285
|
+
lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
|
|
286
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
287
|
+
rb_raise(mkr_eError, "cannot set an attribute on a non-element node");
|
|
288
|
+
}
|
|
289
|
+
lxb_dom_element_t *el = lxb_dom_interface_element(node);
|
|
290
|
+
|
|
291
|
+
mkr_ruby_borrowed_text_t qv = mkr_ruby_verified_text(rb_qname, "attribute qualified name");
|
|
292
|
+
mkr_ruby_borrowed_text_t vv = mkr_ruby_verified_text(rb_value, "attribute value");
|
|
293
|
+
|
|
294
|
+
mkr_ruby_borrowed_text_t nv = {0};
|
|
295
|
+
bool have_ns = false;
|
|
296
|
+
if (!NIL_P(rb_ns)) {
|
|
297
|
+
nv = mkr_ruby_verified_text(rb_ns, "namespace");
|
|
298
|
+
have_ns = nv.len > 0;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/* Intern the wanted namespace (null/"" => LXB_NS__UNDEF) so the existing
|
|
302
|
+
* attribute is matched on (namespace, local name) - the DOM key - rather than
|
|
303
|
+
* the qualified name. */
|
|
304
|
+
lxb_ns_id_t want_ns = LXB_NS__UNDEF;
|
|
305
|
+
if (have_ns && node->owner_document != NULL && node->owner_document->ns != NULL) {
|
|
306
|
+
const lxb_ns_data_t *d = lxb_ns_append(node->owner_document->ns,
|
|
307
|
+
(const lxb_char_t *)nv.ptr, nv.len);
|
|
308
|
+
if (d != NULL) {
|
|
309
|
+
want_ns = d->ns_id;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
const lxb_char_t *qn = (const lxb_char_t *)qv.ptr;
|
|
314
|
+
mkr_span_t qspan = mkr_span((const char *)qn, qv.len);
|
|
315
|
+
size_t colon_off;
|
|
316
|
+
bool has_colon = mkr_span_find(&qspan, ':', &colon_off);
|
|
317
|
+
const lxb_char_t *local = has_colon ? qn + colon_off + 1 : qn;
|
|
318
|
+
size_t local_len = has_colon ? qv.len - colon_off - 1 : qv.len;
|
|
319
|
+
|
|
320
|
+
/* A match keeps its qualified name (so re-setting with a different prefix
|
|
321
|
+
* leaves the prefix unchanged); only the value updates. A miss appends a new
|
|
322
|
+
* attribute, even when its qualified name collides with an existing one in a
|
|
323
|
+
* different namespace - the namespace-aware setter splits prefix/local and
|
|
324
|
+
* records the namespace; a null namespace just sets the bare name. */
|
|
325
|
+
lxb_dom_attr_t *attr = mkr_attr_find_ns(el, want_ns, local, local_len);
|
|
326
|
+
if (attr != NULL) {
|
|
327
|
+
if (lxb_dom_attr_set_value(attr, (const lxb_char_t *)vv.ptr, vv.len) != LXB_STATUS_OK) {
|
|
328
|
+
rb_raise(mkr_eError, "failed to set attribute value");
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
else {
|
|
332
|
+
attr = lxb_dom_attr_interface_create(node->owner_document);
|
|
333
|
+
if (attr == NULL) {
|
|
334
|
+
rb_raise(mkr_eError, "failed to create attribute");
|
|
335
|
+
}
|
|
336
|
+
/* A fresh attr is calloc'd, so node.ns is already LXB_NS__UNDEF for the
|
|
337
|
+
* null-namespace case; only the namespaced setter changes it. */
|
|
338
|
+
lxb_status_t st;
|
|
339
|
+
if (have_ns) {
|
|
340
|
+
st = lxb_dom_attr_set_name_ns(attr, (const lxb_char_t *)nv.ptr, nv.len,
|
|
341
|
+
(const lxb_char_t *)qv.ptr, qv.len, false);
|
|
342
|
+
}
|
|
343
|
+
else {
|
|
344
|
+
st = lxb_dom_attr_set_name(attr, (const lxb_char_t *)qv.ptr, qv.len, false);
|
|
345
|
+
}
|
|
346
|
+
if (st != LXB_STATUS_OK
|
|
347
|
+
|| lxb_dom_attr_set_value(attr, (const lxb_char_t *)vv.ptr, vv.len) != LXB_STATUS_OK) {
|
|
348
|
+
/* Leave the un-appended attr for the document arena to free wholesale
|
|
349
|
+
* (the module's "never destroy a detached node" convention). */
|
|
350
|
+
rb_raise(mkr_eError, "failed to set namespaced attribute");
|
|
351
|
+
}
|
|
352
|
+
lxb_dom_element_attr_append(el, attr);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
RB_GC_GUARD(qv.value);
|
|
356
|
+
RB_GC_GUARD(vv.value);
|
|
357
|
+
RB_GC_GUARD(nv.value);
|
|
358
|
+
mkr_invalidate_index(self);
|
|
359
|
+
return rb_value;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/* element.remove_attribute_ns(namespace_or_nil, local_name) -> nil. Removes the
|
|
363
|
+
* attribute matching (namespace, local name) - the DOM key - so a namespaced
|
|
364
|
+
* attribute is removed without disturbing a same-qualified-name one in another
|
|
365
|
+
* namespace (which removal by qualified name, case-insensitive for HTML, would). */
|
|
366
|
+
static VALUE
|
|
367
|
+
mkr_node_remove_attribute_ns(VALUE self, VALUE rb_ns, VALUE rb_local)
|
|
368
|
+
{
|
|
369
|
+
lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
|
|
370
|
+
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
371
|
+
return Qnil;
|
|
372
|
+
}
|
|
373
|
+
lxb_dom_element_t *el = lxb_dom_interface_element(node);
|
|
374
|
+
|
|
375
|
+
mkr_ruby_borrowed_text_t lv = mkr_ruby_verified_text(rb_local, "attribute local name");
|
|
376
|
+
|
|
377
|
+
lxb_ns_id_t want_ns = LXB_NS__UNDEF;
|
|
378
|
+
VALUE ns_guard = Qnil;
|
|
379
|
+
if (!NIL_P(rb_ns)) {
|
|
380
|
+
mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_ns, "namespace");
|
|
381
|
+
ns_guard = nv.value;
|
|
382
|
+
if (nv.len > 0 && node->owner_document != NULL && node->owner_document->ns != NULL) {
|
|
383
|
+
const lxb_ns_data_t *d = lxb_ns_append(node->owner_document->ns,
|
|
384
|
+
(const lxb_char_t *)nv.ptr, nv.len);
|
|
385
|
+
if (d != NULL) {
|
|
386
|
+
want_ns = d->ns_id;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
lxb_dom_attr_t *attr = mkr_attr_find_ns(el, want_ns,
|
|
392
|
+
(const lxb_char_t *)lv.ptr, lv.len);
|
|
393
|
+
if (attr != NULL) {
|
|
394
|
+
lxb_dom_element_attr_remove(el, attr);
|
|
395
|
+
mkr_invalidate_index(self);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
RB_GC_GUARD(lv.value);
|
|
399
|
+
RB_GC_GUARD(ns_guard);
|
|
400
|
+
return Qnil;
|
|
401
|
+
}
|
|
402
|
+
|
|
213
403
|
/* element.name = new_name -> new_name. Renames the element in place (identity
|
|
214
404
|
* preserved): create a throwaway element with the new name so the document
|
|
215
405
|
* interns it, copy its name fields onto this node, then discard it. */
|
|
216
406
|
static VALUE
|
|
217
407
|
mkr_node_set_name(VALUE self, VALUE rb_name)
|
|
218
408
|
{
|
|
219
|
-
lxb_dom_node_t *node =
|
|
409
|
+
lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
|
|
220
410
|
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
221
411
|
rb_raise(mkr_eError, "name= is only supported on elements");
|
|
222
412
|
}
|
|
@@ -245,7 +435,7 @@ mkr_node_set_name(VALUE self, VALUE rb_name)
|
|
|
245
435
|
static VALUE
|
|
246
436
|
mkr_node_set_content(VALUE self, VALUE rb_text)
|
|
247
437
|
{
|
|
248
|
-
lxb_dom_node_t *node =
|
|
438
|
+
lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
|
|
249
439
|
mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_text, "node content");
|
|
250
440
|
lxb_status_t st = lxb_dom_node_text_content_set(
|
|
251
441
|
node, (const lxb_char_t *)tv.ptr, tv.len);
|
|
@@ -261,7 +451,7 @@ mkr_node_set_content(VALUE self, VALUE rb_text)
|
|
|
261
451
|
static VALUE
|
|
262
452
|
mkr_node_delete(VALUE self, VALUE rb_name)
|
|
263
453
|
{
|
|
264
|
-
lxb_dom_node_t *node =
|
|
454
|
+
lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
|
|
265
455
|
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
266
456
|
return self;
|
|
267
457
|
}
|
|
@@ -318,7 +508,11 @@ mkr_parse_fragment_into(lxb_dom_node_t *context_el, VALUE rb_html,
|
|
|
318
508
|
|
|
319
509
|
mkr_import_fragment_children(doc, frag, emit, u);
|
|
320
510
|
|
|
321
|
-
/*
|
|
511
|
+
/* lxb_html_parse_fragment built the fragment in a TRANSIENT document that
|
|
512
|
+
* destroying the parser does NOT free (measured: one document leaked per
|
|
513
|
+
* inner_html=/outer_html= call); our imported copies live in `doc`, so the
|
|
514
|
+
* transient document is destroyed explicitly. */
|
|
515
|
+
lxb_html_document_destroy(lxb_html_interface_document(frag->owner_document));
|
|
322
516
|
lxb_html_parser_destroy(parser);
|
|
323
517
|
RB_GC_GUARD(html);
|
|
324
518
|
}
|
|
@@ -327,7 +521,7 @@ mkr_parse_fragment_into(lxb_dom_node_t *context_el, VALUE rb_html,
|
|
|
327
521
|
static VALUE
|
|
328
522
|
mkr_node_set_inner_html(VALUE self, VALUE rb_html)
|
|
329
523
|
{
|
|
330
|
-
lxb_dom_node_t *node =
|
|
524
|
+
lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
|
|
331
525
|
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
332
526
|
rb_raise(mkr_eError, "inner_html= requires an element");
|
|
333
527
|
}
|
|
@@ -348,7 +542,7 @@ mkr_node_set_inner_html(VALUE self, VALUE rb_html)
|
|
|
348
542
|
static VALUE
|
|
349
543
|
mkr_node_set_outer_html(VALUE self, VALUE rb_html)
|
|
350
544
|
{
|
|
351
|
-
lxb_dom_node_t *node =
|
|
545
|
+
lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
|
|
352
546
|
lxb_dom_node_t *parent = node->parent;
|
|
353
547
|
if (parent == NULL || parent->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
354
548
|
rb_raise(mkr_eError, "outer_html= requires a node with a parent element");
|
|
@@ -369,7 +563,7 @@ mkr_node_set_outer_html(VALUE self, VALUE rb_html)
|
|
|
369
563
|
static VALUE
|
|
370
564
|
mkr_doc_create_element(VALUE self, VALUE rb_name)
|
|
371
565
|
{
|
|
372
|
-
lxb_dom_document_t *doc =
|
|
566
|
+
lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
|
|
373
567
|
mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "element name");
|
|
374
568
|
lxb_dom_element_t *el = lxb_dom_document_create_element(
|
|
375
569
|
doc, (const lxb_char_t *)nv.ptr, nv.len, NULL);
|
|
@@ -377,13 +571,13 @@ mkr_doc_create_element(VALUE self, VALUE rb_name)
|
|
|
377
571
|
if (el == NULL) {
|
|
378
572
|
rb_raise(mkr_eError, "failed to create element");
|
|
379
573
|
}
|
|
380
|
-
return
|
|
574
|
+
return mkr_wrap_html_node(lxb_dom_interface_node(el), self);
|
|
381
575
|
}
|
|
382
576
|
|
|
383
577
|
static VALUE
|
|
384
578
|
mkr_doc_create_text_node(VALUE self, VALUE rb_text)
|
|
385
579
|
{
|
|
386
|
-
lxb_dom_document_t *doc =
|
|
580
|
+
lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
|
|
387
581
|
mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_text, "text content");
|
|
388
582
|
lxb_dom_text_t *t = lxb_dom_document_create_text_node(
|
|
389
583
|
doc, (const lxb_char_t *)tv.ptr, tv.len);
|
|
@@ -391,13 +585,13 @@ mkr_doc_create_text_node(VALUE self, VALUE rb_text)
|
|
|
391
585
|
if (t == NULL) {
|
|
392
586
|
rb_raise(mkr_eError, "failed to create text node");
|
|
393
587
|
}
|
|
394
|
-
return
|
|
588
|
+
return mkr_wrap_html_node(lxb_dom_interface_node(t), self);
|
|
395
589
|
}
|
|
396
590
|
|
|
397
591
|
static VALUE
|
|
398
592
|
mkr_doc_create_comment(VALUE self, VALUE rb_text)
|
|
399
593
|
{
|
|
400
|
-
lxb_dom_document_t *doc =
|
|
594
|
+
lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
|
|
401
595
|
mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_text, "comment content");
|
|
402
596
|
lxb_dom_comment_t *c = lxb_dom_document_create_comment(
|
|
403
597
|
doc, (const lxb_char_t *)tv.ptr, tv.len);
|
|
@@ -405,16 +599,16 @@ mkr_doc_create_comment(VALUE self, VALUE rb_text)
|
|
|
405
599
|
if (c == NULL) {
|
|
406
600
|
rb_raise(mkr_eError, "failed to create comment");
|
|
407
601
|
}
|
|
408
|
-
return
|
|
602
|
+
return mkr_wrap_html_node(lxb_dom_interface_node(c), self);
|
|
409
603
|
}
|
|
410
604
|
|
|
411
|
-
/* Document#create_processing_instruction(target, data)
|
|
605
|
+
/* Document#create_processing_instruction(target, data) - DOM
|
|
412
606
|
* createProcessingInstruction: a detached ProcessingInstruction owned by this
|
|
413
607
|
* document. Lexbor validates the target, so an invalid one fails closed. */
|
|
414
608
|
static VALUE
|
|
415
609
|
mkr_doc_create_processing_instruction(VALUE self, VALUE rb_target, VALUE rb_data)
|
|
416
610
|
{
|
|
417
|
-
lxb_dom_document_t *doc =
|
|
611
|
+
lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
|
|
418
612
|
mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_target, "processing instruction target");
|
|
419
613
|
mkr_ruby_borrowed_text_t dv = mkr_ruby_verified_text(rb_data, "processing instruction data");
|
|
420
614
|
lxb_dom_processing_instruction_t *pi = lxb_dom_document_create_processing_instruction(
|
|
@@ -424,50 +618,52 @@ mkr_doc_create_processing_instruction(VALUE self, VALUE rb_target, VALUE rb_data
|
|
|
424
618
|
if (pi == NULL) {
|
|
425
619
|
rb_raise(mkr_eError, "failed to create processing instruction");
|
|
426
620
|
}
|
|
427
|
-
return
|
|
621
|
+
return mkr_wrap_html_node(lxb_dom_interface_node(pi), self);
|
|
428
622
|
}
|
|
429
623
|
|
|
430
|
-
/* Document#create_document_fragment
|
|
624
|
+
/* Document#create_document_fragment - DOM createDocumentFragment: an empty
|
|
431
625
|
* DocumentFragment owned by this document (unlike #fragment / DocumentFragment.parse,
|
|
432
626
|
* which parse HTML; this makes an empty one to build up programmatically). */
|
|
433
627
|
static VALUE
|
|
434
628
|
mkr_doc_create_document_fragment(VALUE self)
|
|
435
629
|
{
|
|
436
|
-
lxb_dom_document_t *doc =
|
|
630
|
+
lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
|
|
437
631
|
lxb_dom_document_fragment_t *f = lxb_dom_document_create_document_fragment(doc);
|
|
438
632
|
if (f == NULL) {
|
|
439
633
|
rb_raise(mkr_eError, "failed to create document fragment");
|
|
440
634
|
}
|
|
441
|
-
return
|
|
635
|
+
return mkr_wrap_html_node(lxb_dom_interface_node(f), self);
|
|
442
636
|
}
|
|
443
637
|
|
|
444
638
|
void
|
|
445
639
|
mkr_init_mutate(void)
|
|
446
640
|
{
|
|
447
|
-
rb_define_method(
|
|
448
|
-
rb_define_method(
|
|
449
|
-
rb_define_method(
|
|
450
|
-
rb_define_method(
|
|
451
|
-
rb_define_method(
|
|
452
|
-
rb_define_method(
|
|
453
|
-
rb_define_method(
|
|
454
|
-
rb_define_method(
|
|
455
|
-
rb_define_method(
|
|
456
|
-
|
|
457
|
-
rb_define_method(
|
|
458
|
-
rb_define_method(
|
|
459
|
-
|
|
460
|
-
rb_define_method(
|
|
461
|
-
rb_define_method(
|
|
462
|
-
rb_define_method(
|
|
463
|
-
rb_define_method(
|
|
464
|
-
rb_define_method(
|
|
465
|
-
|
|
466
|
-
rb_define_method(
|
|
467
|
-
|
|
468
|
-
rb_define_method(
|
|
469
|
-
rb_define_method(
|
|
641
|
+
rb_define_method(mkr_mHtmlNodeMethods, "add_child", mkr_node_add_child, 1);
|
|
642
|
+
rb_define_method(mkr_mHtmlNodeMethods, "<<", mkr_node_append, 1);
|
|
643
|
+
rb_define_method(mkr_mHtmlNodeMethods, "add_previous_sibling", mkr_node_add_previous_sibling, 1);
|
|
644
|
+
rb_define_method(mkr_mHtmlNodeMethods, "before", mkr_node_add_previous_sibling, 1);
|
|
645
|
+
rb_define_method(mkr_mHtmlNodeMethods, "add_next_sibling", mkr_node_add_next_sibling, 1);
|
|
646
|
+
rb_define_method(mkr_mHtmlNodeMethods, "after", mkr_node_add_next_sibling, 1);
|
|
647
|
+
rb_define_method(mkr_mHtmlNodeMethods, "remove", mkr_node_remove, 0);
|
|
648
|
+
rb_define_method(mkr_mHtmlNodeMethods, "unlink", mkr_node_remove, 0);
|
|
649
|
+
rb_define_method(mkr_mHtmlNodeMethods, "replace", mkr_node_replace, 1);
|
|
650
|
+
|
|
651
|
+
rb_define_method(mkr_mHtmlNodeMethods, "inner_html=", mkr_node_set_inner_html, 1);
|
|
652
|
+
rb_define_method(mkr_mHtmlNodeMethods, "outer_html=", mkr_node_set_outer_html, 1);
|
|
653
|
+
|
|
654
|
+
rb_define_method(mkr_mHtmlNodeMethods, "[]=", mkr_node_aset, 2);
|
|
655
|
+
rb_define_method(mkr_mHtmlNodeMethods, "set_attribute_ns", mkr_node_set_attribute_ns, 3);
|
|
656
|
+
rb_define_method(mkr_mHtmlNodeMethods, "remove_attribute_ns", mkr_node_remove_attribute_ns, 2);
|
|
657
|
+
rb_define_method(mkr_mHtmlNodeMethods, "delete", mkr_node_delete, 1);
|
|
658
|
+
rb_define_method(mkr_mHtmlNodeMethods, "remove_attribute", mkr_node_delete, 1);
|
|
659
|
+
rb_define_method(mkr_mHtmlNodeMethods, "content=", mkr_node_set_content, 1);
|
|
660
|
+
rb_define_method(mkr_mHtmlNodeMethods, "name=", mkr_node_set_name, 1);
|
|
661
|
+
|
|
662
|
+
rb_define_method(mkr_cHtmlDocument, "create_element", mkr_doc_create_element, 1);
|
|
663
|
+
rb_define_method(mkr_cHtmlDocument, "create_text_node", mkr_doc_create_text_node, 1);
|
|
664
|
+
rb_define_method(mkr_cHtmlDocument, "create_comment", mkr_doc_create_comment, 1);
|
|
665
|
+
rb_define_method(mkr_cHtmlDocument, "create_processing_instruction",
|
|
470
666
|
mkr_doc_create_processing_instruction, 2);
|
|
471
|
-
rb_define_method(
|
|
667
|
+
rb_define_method(mkr_cHtmlDocument, "create_document_fragment",
|
|
472
668
|
mkr_doc_create_document_fragment, 0);
|
|
473
669
|
}
|