makiri 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/conformance.yml +22 -0
  3. data/.github/workflows/libfuzzer.yml +83 -0
  4. data/.github/workflows/release.yml +12 -7
  5. data/.github/workflows/security.yml +88 -3
  6. data/.github/workflows/valgrind.yml +135 -0
  7. data/CHANGELOG.md +152 -15
  8. data/README.md +183 -13
  9. data/Rakefile +294 -7
  10. data/ext/makiri/bridge/bridge.h +28 -0
  11. data/ext/makiri/bridge/ruby_string.c +282 -12
  12. data/ext/makiri/core/mkr_alloc.c +40 -3
  13. data/ext/makiri/core/mkr_alloc.h +28 -5
  14. data/ext/makiri/core/mkr_buf.c +47 -3
  15. data/ext/makiri/core/mkr_buf.h +112 -3
  16. data/ext/makiri/core/mkr_core.c +143 -0
  17. data/ext/makiri/core/mkr_core.h +11 -2
  18. data/ext/makiri/core/mkr_hash.h +1 -1
  19. data/ext/makiri/core/mkr_span.h +186 -0
  20. data/ext/makiri/core/mkr_text.h +8 -8
  21. data/ext/makiri/core/mkr_utf8.c +101 -0
  22. data/ext/makiri/core/mkr_utf8.h +88 -0
  23. data/ext/makiri/extconf.rb +123 -10
  24. data/ext/makiri/fuzz/Makefile +95 -0
  25. data/ext/makiri/fuzz/check_fuzzer.cc +4 -0
  26. data/ext/makiri/fuzz/xml_fuzz.c +24 -0
  27. data/ext/makiri/fuzz/xpath_fuzz.c +109 -0
  28. data/ext/makiri/glue/glue.h +55 -11
  29. data/ext/makiri/glue/ruby_doc.c +129 -59
  30. data/ext/makiri/glue/ruby_html_css.c +292 -0
  31. data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +248 -52
  32. data/ext/makiri/glue/ruby_html_node.c +859 -0
  33. data/ext/makiri/glue/ruby_html_serialize.c +154 -0
  34. data/ext/makiri/glue/ruby_node.c +74 -729
  35. data/ext/makiri/glue/ruby_node_set.c +167 -32
  36. data/ext/makiri/glue/ruby_xml.c +602 -0
  37. data/ext/makiri/glue/ruby_xml_node.c +1373 -0
  38. data/ext/makiri/glue/ruby_xpath.c +63 -30
  39. data/ext/makiri/glue/ruby_xpath.h +19 -0
  40. data/ext/makiri/lexbor_compat/compat.h +42 -9
  41. data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
  42. data/ext/makiri/lexbor_compat/dom_index.c +2 -2
  43. data/ext/makiri/lexbor_compat/post_parse.c +100 -10
  44. data/ext/makiri/lexbor_compat/source_loc.c +15 -13
  45. data/ext/makiri/lexbor_compat/text_index.c +14 -8
  46. data/ext/makiri/lexbor_compat/utf8_input.c +19 -33
  47. data/ext/makiri/makiri.c +184 -6
  48. data/ext/makiri/makiri.h +43 -2
  49. data/ext/makiri/xml/mkr_xml.h +125 -0
  50. data/ext/makiri/xml/mkr_xml_chars.c +195 -0
  51. data/ext/makiri/xml/mkr_xml_index.c +169 -0
  52. data/ext/makiri/xml/mkr_xml_index.h +48 -0
  53. data/ext/makiri/xml/mkr_xml_mutate.c +817 -0
  54. data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
  55. data/ext/makiri/xml/mkr_xml_node.c +399 -0
  56. data/ext/makiri/xml/mkr_xml_node.h +184 -0
  57. data/ext/makiri/xml/mkr_xml_tree.c +1515 -0
  58. data/ext/makiri/xpath/mkr_css.c +1023 -0
  59. data/ext/makiri/xpath/mkr_css.h +65 -0
  60. data/ext/makiri/xpath/mkr_xpath.c +96 -32
  61. data/ext/makiri/xpath/mkr_xpath.h +109 -4
  62. data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
  63. data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
  64. data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +551 -241
  65. data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +318 -276
  66. data/ext/makiri/xpath/mkr_xpath_internal.h +177 -206
  67. data/ext/makiri/xpath/mkr_xpath_lex.c +95 -125
  68. data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
  69. data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +145 -0
  70. data/ext/makiri/xpath/mkr_xpath_number.c +109 -0
  71. data/ext/makiri/xpath/mkr_xpath_parse.c +83 -94
  72. data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
  73. data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
  74. data/ext/makiri/xpath/mkr_xpath_shared.c +609 -0
  75. data/ext/makiri/xpath/mkr_xpath_value_body.h +801 -0
  76. data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
  77. data/lib/makiri/{attribute.rb → attr.rb} +7 -3
  78. data/lib/makiri/cdata_section.rb +19 -0
  79. data/lib/makiri/comment.rb +10 -0
  80. data/lib/makiri/compat_aliases.rb +30 -0
  81. data/lib/makiri/document.rb +9 -73
  82. data/lib/makiri/document_fragment.rb +14 -9
  83. data/lib/makiri/element.rb +4 -4
  84. data/lib/makiri/html/document.rb +106 -0
  85. data/lib/makiri/html/node_methods.rb +19 -0
  86. data/lib/makiri/html.rb +12 -0
  87. data/lib/makiri/node.rb +58 -15
  88. data/lib/makiri/node_set.rb +8 -0
  89. data/lib/makiri/processing_instruction.rb +10 -0
  90. data/lib/makiri/text.rb +1 -1
  91. data/lib/makiri/version.rb +1 -1
  92. data/lib/makiri/xml/builder.rb +263 -0
  93. data/lib/makiri/xml/document.rb +24 -0
  94. data/lib/makiri/xml/node_methods.rb +84 -0
  95. data/lib/makiri/xml.rb +10 -0
  96. data/lib/makiri/xpath_context.rb +1 -1
  97. data/lib/makiri.rb +24 -5
  98. data/script/build_native_gem.rb +2 -2
  99. data/script/check_alloc_failures.rb +266 -0
  100. data/script/check_c_safety.rb +77 -2
  101. data/script/check_c_safety_allowlist.yml +102 -0
  102. data/script/check_leaks.rb +64 -0
  103. data/script/leaks_harness.rb +64 -0
  104. data/vendor/lexbor/CMakeLists.txt +6 -0
  105. data/vendor/lexbor/README.md +12 -0
  106. data/vendor/lexbor/config.cmake +1 -1
  107. data/vendor/lexbor/source/lexbor/core/base.h +1 -1
  108. data/vendor/lexbor/source/lexbor/core/config.cmake +9 -1
  109. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +2 -3
  110. data/vendor/lexbor/source/lexbor/css/selectors/state.c +3 -0
  111. data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +21 -0
  112. data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +5 -0
  113. data/vendor/lexbor/source/lexbor/encoding/decode.c +33 -4
  114. data/vendor/lexbor/source/lexbor/html/base.h +1 -1
  115. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +4 -0
  116. data/vendor/lexbor/source/lexbor/html/serialize.c +545 -41
  117. data/vendor/lexbor/source/lexbor/html/serialize.h +2 -1
  118. data/vendor/lexbor/source/lexbor/html/tokenizer.h +2 -2
  119. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1 -1
  120. data/vendor/lexbor/source/lexbor/html/tree.c +6 -6
  121. data/vendor/lexbor/source/lexbor/selectors/selectors.c +12 -3
  122. data/vendor/lexbor/source/lexbor/url/base.h +1 -1
  123. data/vendor/lexbor/source/lexbor/url/url.c +5 -2
  124. data/vendor/lexbor/source/lexbor/url/url.h +9 -0
  125. data/vendor/lexbor/version +1 -1
  126. metadata +53 -9
  127. data/ext/makiri/glue/ruby_css.c +0 -185
  128. data/ext/makiri/glue/ruby_serialize.c +0 -92
  129. data/ext/makiri/xpath/mkr_xpath_value.c +0 -1286
  130. data/lib/makiri/cdata.rb +0 -6
@@ -1,6 +1,18 @@
1
1
  #include "glue.h"
2
2
 
3
3
  #include <lexbor/html/parser.h>
4
+ #include <lexbor/ns/ns.h>
5
+
6
+ /* Exported by lexbor but omitted from its public headers. lxb_ns_append interns
7
+ * a namespace URI in the document's ns table; lxb_dom_attr_set_name_ns names an
8
+ * attribute from (namespace, qualified name), splitting prefix/local and
9
+ * interning the namespace. */
10
+ extern const lxb_ns_data_t *
11
+ lxb_ns_append(lexbor_hash_t *hash, const lxb_char_t *link, size_t length);
12
+ extern lxb_status_t
13
+ lxb_dom_attr_set_name_ns(lxb_dom_attr_t *attr, const lxb_char_t *link,
14
+ size_t link_length, const lxb_char_t *name,
15
+ size_t name_length, bool to_lowercase);
4
16
 
5
17
  /*
6
18
  * DOM mutation (v0.2). Thin wrappers over Lexbor's insert/remove/create
@@ -27,13 +39,14 @@ mkr_invalidate_index(VALUE node)
27
39
  mkr_parsed_text_index_invalidate(p);
28
40
  }
29
41
 
42
+ /* An HTML node argument for a tree mutation. Routes through mkr_html_node_unwrap so
43
+ * an XML node is rejected before its mkr_xml_node_t* reaches Lexbor (the
44
+ * same-document/cycle checks below read lxb fields, and the insert hands the
45
+ * pointer to Lexbor). */
30
46
  static lxb_dom_node_t *
31
47
  mkr_arg_node(VALUE v)
32
48
  {
33
- if (!rb_obj_is_kind_of(v, mkr_cNode)) {
34
- rb_raise(rb_eTypeError, "expected a Makiri::Node");
35
- }
36
- return mkr_node_unwrap(v);
49
+ return mkr_html_node_unwrap(v);
37
50
  }
38
51
 
39
52
  /* Validate that `incoming` may be placed relative to `ref` and detach it from
@@ -70,12 +83,22 @@ mkr_is_fragment(const lxb_dom_node_t *n)
70
83
  /* tree mutation */
71
84
  /* ------------------------------------------------------------------ */
72
85
 
86
+ /* Every tree / attribute mutation unwraps `self` through here first: a node the
87
+ * caller has frozen (Ruby's Object#freeze) is immutable, so raise FrozenError
88
+ * rather than silently editing it. Read accessors use mkr_html_node_unwrap (XML rejected at the type boundary). */
89
+ static lxb_dom_node_t *
90
+ mkr_node_unwrap_mutable(VALUE self)
91
+ {
92
+ rb_check_frozen(self);
93
+ return mkr_html_node_unwrap(self);
94
+ }
95
+
73
96
  /* node.add_child(child) -> child. Appends child as the last child. A document
74
97
  * fragment contributes its children rather than itself. */
75
98
  static VALUE
76
99
  mkr_node_add_child(VALUE self, VALUE rb_child)
77
100
  {
78
- lxb_dom_node_t *parent = mkr_node_unwrap(self);
101
+ lxb_dom_node_t *parent = mkr_node_unwrap_mutable(self);
79
102
  lxb_dom_node_t *child = mkr_arg_node(rb_child);
80
103
  mkr_prepare_insert(parent, child);
81
104
  if (mkr_is_fragment(child)) {
@@ -102,7 +125,7 @@ mkr_node_append(VALUE self, VALUE rb_child)
102
125
  static VALUE
103
126
  mkr_node_add_previous_sibling(VALUE self, VALUE rb_node)
104
127
  {
105
- lxb_dom_node_t *ref = mkr_node_unwrap(self);
128
+ lxb_dom_node_t *ref = mkr_node_unwrap_mutable(self);
106
129
  lxb_dom_node_t *node = mkr_arg_node(rb_node);
107
130
  if (ref->parent == NULL) {
108
131
  rb_raise(mkr_eError, "cannot add a sibling to a node with no parent");
@@ -124,7 +147,7 @@ mkr_node_add_previous_sibling(VALUE self, VALUE rb_node)
124
147
  static VALUE
125
148
  mkr_node_add_next_sibling(VALUE self, VALUE rb_node)
126
149
  {
127
- lxb_dom_node_t *ref = mkr_node_unwrap(self);
150
+ lxb_dom_node_t *ref = mkr_node_unwrap_mutable(self);
128
151
  lxb_dom_node_t *node = mkr_arg_node(rb_node);
129
152
  if (ref->parent == NULL) {
130
153
  rb_raise(mkr_eError, "cannot add a sibling to a node with no parent");
@@ -148,7 +171,7 @@ mkr_node_add_next_sibling(VALUE self, VALUE rb_node)
148
171
  static VALUE
149
172
  mkr_node_remove(VALUE self)
150
173
  {
151
- lxb_dom_node_t *node = mkr_node_unwrap(self);
174
+ lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
152
175
  if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
153
176
  rb_raise(mkr_eError, "use delete(name) to remove an attribute");
154
177
  }
@@ -163,7 +186,7 @@ mkr_node_remove(VALUE self)
163
186
  static VALUE
164
187
  mkr_node_replace(VALUE self, VALUE rb_other)
165
188
  {
166
- lxb_dom_node_t *ref = mkr_node_unwrap(self);
189
+ lxb_dom_node_t *ref = mkr_node_unwrap_mutable(self);
167
190
  lxb_dom_node_t *other = mkr_arg_node(rb_other);
168
191
  if (ref->parent == NULL) {
169
192
  rb_raise(mkr_eError, "cannot replace a node with no parent");
@@ -191,7 +214,7 @@ mkr_node_replace(VALUE self, VALUE rb_other)
191
214
  static VALUE
192
215
  mkr_node_aset(VALUE self, VALUE rb_name, VALUE rb_value)
193
216
  {
194
- lxb_dom_node_t *node = mkr_node_unwrap(self);
217
+ lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
195
218
  if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
196
219
  rb_raise(mkr_eError, "cannot set an attribute on a non-element node");
197
220
  }
@@ -210,13 +233,180 @@ mkr_node_aset(VALUE self, VALUE rb_name, VALUE rb_value)
210
233
  return rb_value;
211
234
  }
212
235
 
236
+ /* An attribute's OWN namespace id: the one recorded by set_attribute_ns (which
237
+ * differs from the owner element's), else the null namespace - a normally-set or
238
+ * parsed attribute inherits the element's ns, which for matching purposes is the
239
+ * null namespace (an unprefixed attribute is namespaceless). */
240
+ static lxb_ns_id_t
241
+ mkr_attr_own_ns(const lxb_dom_attr_t *at)
242
+ {
243
+ if (at->owner != NULL && at->node.ns != at->owner->node.ns) {
244
+ return at->node.ns;
245
+ }
246
+ return LXB_NS__UNDEF;
247
+ }
248
+
249
+ /* Find the attribute on `el` matching (ns_id, local_name) case-sensitively - the
250
+ * DOM keys attributes on (namespace, local name), so two with the same qualified
251
+ * name but different namespaces coexist (unlike Lexbor's by-qualified-name,
252
+ * case-insensitive-for-HTML lookup). */
253
+ static lxb_dom_attr_t *
254
+ mkr_attr_find_ns(lxb_dom_element_t *el, lxb_ns_id_t ns_id,
255
+ const lxb_char_t *local, size_t local_len)
256
+ {
257
+ for (lxb_dom_attr_t *at = el->first_attr; at != NULL; at = at->next) {
258
+ if (mkr_attr_own_ns(at) != ns_id) {
259
+ continue;
260
+ }
261
+ /* Compare the case-preserved local name (the suffix of the qualified
262
+ * name): Lexbor lower-cases the stored local_name even when the
263
+ * qualified name keeps its case, but setAttributeNS is case-sensitive. */
264
+ size_t qlen = 0, llen = 0;
265
+ const lxb_char_t *q = lxb_dom_attr_qualified_name(at, &qlen);
266
+ (void) lxb_dom_attr_local_name(at, &llen);
267
+ if (q != NULL && qlen >= llen
268
+ && mkr_bytes_eq(q + (qlen - llen), llen, local, local_len)) {
269
+ return at;
270
+ }
271
+ }
272
+ return NULL;
273
+ }
274
+
275
+ /* element.set_attribute_ns(namespace_or_nil, qualified_name, value) -> value.
276
+ *
277
+ * Stores the attribute under its qualified name (case-preserved - setAttributeNS
278
+ * is case-sensitive, unlike the HTML setAttribute family) and records its OWN
279
+ * namespace on the attr node, so namespaceURI / getAttributeNS resolve it. The
280
+ * namespace URI is interned in the document's ns table; nil/"" stores the null
281
+ * namespace (LXB_NS__UNDEF). */
282
+ static VALUE
283
+ mkr_node_set_attribute_ns(VALUE self, VALUE rb_ns, VALUE rb_qname, VALUE rb_value)
284
+ {
285
+ lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
286
+ if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
287
+ rb_raise(mkr_eError, "cannot set an attribute on a non-element node");
288
+ }
289
+ lxb_dom_element_t *el = lxb_dom_interface_element(node);
290
+
291
+ mkr_ruby_borrowed_text_t qv = mkr_ruby_verified_text(rb_qname, "attribute qualified name");
292
+ mkr_ruby_borrowed_text_t vv = mkr_ruby_verified_text(rb_value, "attribute value");
293
+
294
+ mkr_ruby_borrowed_text_t nv = {0};
295
+ bool have_ns = false;
296
+ if (!NIL_P(rb_ns)) {
297
+ nv = mkr_ruby_verified_text(rb_ns, "namespace");
298
+ have_ns = nv.len > 0;
299
+ }
300
+
301
+ /* Intern the wanted namespace (null/"" => LXB_NS__UNDEF) so the existing
302
+ * attribute is matched on (namespace, local name) - the DOM key - rather than
303
+ * the qualified name. */
304
+ lxb_ns_id_t want_ns = LXB_NS__UNDEF;
305
+ if (have_ns && node->owner_document != NULL && node->owner_document->ns != NULL) {
306
+ const lxb_ns_data_t *d = lxb_ns_append(node->owner_document->ns,
307
+ (const lxb_char_t *)nv.ptr, nv.len);
308
+ if (d != NULL) {
309
+ want_ns = d->ns_id;
310
+ }
311
+ }
312
+
313
+ const lxb_char_t *qn = (const lxb_char_t *)qv.ptr;
314
+ mkr_span_t qspan = mkr_span((const char *)qn, qv.len);
315
+ size_t colon_off;
316
+ bool has_colon = mkr_span_find(&qspan, ':', &colon_off);
317
+ const lxb_char_t *local = has_colon ? qn + colon_off + 1 : qn;
318
+ size_t local_len = has_colon ? qv.len - colon_off - 1 : qv.len;
319
+
320
+ /* A match keeps its qualified name (so re-setting with a different prefix
321
+ * leaves the prefix unchanged); only the value updates. A miss appends a new
322
+ * attribute, even when its qualified name collides with an existing one in a
323
+ * different namespace - the namespace-aware setter splits prefix/local and
324
+ * records the namespace; a null namespace just sets the bare name. */
325
+ lxb_dom_attr_t *attr = mkr_attr_find_ns(el, want_ns, local, local_len);
326
+ if (attr != NULL) {
327
+ if (lxb_dom_attr_set_value(attr, (const lxb_char_t *)vv.ptr, vv.len) != LXB_STATUS_OK) {
328
+ rb_raise(mkr_eError, "failed to set attribute value");
329
+ }
330
+ }
331
+ else {
332
+ attr = lxb_dom_attr_interface_create(node->owner_document);
333
+ if (attr == NULL) {
334
+ rb_raise(mkr_eError, "failed to create attribute");
335
+ }
336
+ /* A fresh attr is calloc'd, so node.ns is already LXB_NS__UNDEF for the
337
+ * null-namespace case; only the namespaced setter changes it. */
338
+ lxb_status_t st;
339
+ if (have_ns) {
340
+ st = lxb_dom_attr_set_name_ns(attr, (const lxb_char_t *)nv.ptr, nv.len,
341
+ (const lxb_char_t *)qv.ptr, qv.len, false);
342
+ }
343
+ else {
344
+ st = lxb_dom_attr_set_name(attr, (const lxb_char_t *)qv.ptr, qv.len, false);
345
+ }
346
+ if (st != LXB_STATUS_OK
347
+ || lxb_dom_attr_set_value(attr, (const lxb_char_t *)vv.ptr, vv.len) != LXB_STATUS_OK) {
348
+ /* Leave the un-appended attr for the document arena to free wholesale
349
+ * (the module's "never destroy a detached node" convention). */
350
+ rb_raise(mkr_eError, "failed to set namespaced attribute");
351
+ }
352
+ lxb_dom_element_attr_append(el, attr);
353
+ }
354
+
355
+ RB_GC_GUARD(qv.value);
356
+ RB_GC_GUARD(vv.value);
357
+ RB_GC_GUARD(nv.value);
358
+ mkr_invalidate_index(self);
359
+ return rb_value;
360
+ }
361
+
362
+ /* element.remove_attribute_ns(namespace_or_nil, local_name) -> nil. Removes the
363
+ * attribute matching (namespace, local name) - the DOM key - so a namespaced
364
+ * attribute is removed without disturbing a same-qualified-name one in another
365
+ * namespace (which removal by qualified name, case-insensitive for HTML, would). */
366
+ static VALUE
367
+ mkr_node_remove_attribute_ns(VALUE self, VALUE rb_ns, VALUE rb_local)
368
+ {
369
+ lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
370
+ if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
371
+ return Qnil;
372
+ }
373
+ lxb_dom_element_t *el = lxb_dom_interface_element(node);
374
+
375
+ mkr_ruby_borrowed_text_t lv = mkr_ruby_verified_text(rb_local, "attribute local name");
376
+
377
+ lxb_ns_id_t want_ns = LXB_NS__UNDEF;
378
+ VALUE ns_guard = Qnil;
379
+ if (!NIL_P(rb_ns)) {
380
+ mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_ns, "namespace");
381
+ ns_guard = nv.value;
382
+ if (nv.len > 0 && node->owner_document != NULL && node->owner_document->ns != NULL) {
383
+ const lxb_ns_data_t *d = lxb_ns_append(node->owner_document->ns,
384
+ (const lxb_char_t *)nv.ptr, nv.len);
385
+ if (d != NULL) {
386
+ want_ns = d->ns_id;
387
+ }
388
+ }
389
+ }
390
+
391
+ lxb_dom_attr_t *attr = mkr_attr_find_ns(el, want_ns,
392
+ (const lxb_char_t *)lv.ptr, lv.len);
393
+ if (attr != NULL) {
394
+ lxb_dom_element_attr_remove(el, attr);
395
+ mkr_invalidate_index(self);
396
+ }
397
+
398
+ RB_GC_GUARD(lv.value);
399
+ RB_GC_GUARD(ns_guard);
400
+ return Qnil;
401
+ }
402
+
213
403
  /* element.name = new_name -> new_name. Renames the element in place (identity
214
404
  * preserved): create a throwaway element with the new name so the document
215
405
  * interns it, copy its name fields onto this node, then discard it. */
216
406
  static VALUE
217
407
  mkr_node_set_name(VALUE self, VALUE rb_name)
218
408
  {
219
- lxb_dom_node_t *node = mkr_node_unwrap(self);
409
+ lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
220
410
  if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
221
411
  rb_raise(mkr_eError, "name= is only supported on elements");
222
412
  }
@@ -245,7 +435,7 @@ mkr_node_set_name(VALUE self, VALUE rb_name)
245
435
  static VALUE
246
436
  mkr_node_set_content(VALUE self, VALUE rb_text)
247
437
  {
248
- lxb_dom_node_t *node = mkr_node_unwrap(self);
438
+ lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
249
439
  mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_text, "node content");
250
440
  lxb_status_t st = lxb_dom_node_text_content_set(
251
441
  node, (const lxb_char_t *)tv.ptr, tv.len);
@@ -261,7 +451,7 @@ mkr_node_set_content(VALUE self, VALUE rb_text)
261
451
  static VALUE
262
452
  mkr_node_delete(VALUE self, VALUE rb_name)
263
453
  {
264
- lxb_dom_node_t *node = mkr_node_unwrap(self);
454
+ lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
265
455
  if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
266
456
  return self;
267
457
  }
@@ -318,7 +508,11 @@ mkr_parse_fragment_into(lxb_dom_node_t *context_el, VALUE rb_html,
318
508
 
319
509
  mkr_import_fragment_children(doc, frag, emit, u);
320
510
 
321
- /* Frees the transient fragment document; our imported copies live on. */
511
+ /* lxb_html_parse_fragment built the fragment in a TRANSIENT document that
512
+ * destroying the parser does NOT free (measured: one document leaked per
513
+ * inner_html=/outer_html= call); our imported copies live in `doc`, so the
514
+ * transient document is destroyed explicitly. */
515
+ lxb_html_document_destroy(lxb_html_interface_document(frag->owner_document));
322
516
  lxb_html_parser_destroy(parser);
323
517
  RB_GC_GUARD(html);
324
518
  }
@@ -327,7 +521,7 @@ mkr_parse_fragment_into(lxb_dom_node_t *context_el, VALUE rb_html,
327
521
  static VALUE
328
522
  mkr_node_set_inner_html(VALUE self, VALUE rb_html)
329
523
  {
330
- lxb_dom_node_t *node = mkr_node_unwrap(self);
524
+ lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
331
525
  if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
332
526
  rb_raise(mkr_eError, "inner_html= requires an element");
333
527
  }
@@ -348,7 +542,7 @@ mkr_node_set_inner_html(VALUE self, VALUE rb_html)
348
542
  static VALUE
349
543
  mkr_node_set_outer_html(VALUE self, VALUE rb_html)
350
544
  {
351
- lxb_dom_node_t *node = mkr_node_unwrap(self);
545
+ lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
352
546
  lxb_dom_node_t *parent = node->parent;
353
547
  if (parent == NULL || parent->type != LXB_DOM_NODE_TYPE_ELEMENT) {
354
548
  rb_raise(mkr_eError, "outer_html= requires a node with a parent element");
@@ -369,7 +563,7 @@ mkr_node_set_outer_html(VALUE self, VALUE rb_html)
369
563
  static VALUE
370
564
  mkr_doc_create_element(VALUE self, VALUE rb_name)
371
565
  {
372
- lxb_dom_document_t *doc = mkr_doc_unwrap(self);
566
+ lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
373
567
  mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "element name");
374
568
  lxb_dom_element_t *el = lxb_dom_document_create_element(
375
569
  doc, (const lxb_char_t *)nv.ptr, nv.len, NULL);
@@ -377,13 +571,13 @@ mkr_doc_create_element(VALUE self, VALUE rb_name)
377
571
  if (el == NULL) {
378
572
  rb_raise(mkr_eError, "failed to create element");
379
573
  }
380
- return mkr_wrap_node(lxb_dom_interface_node(el), self);
574
+ return mkr_wrap_html_node(lxb_dom_interface_node(el), self);
381
575
  }
382
576
 
383
577
  static VALUE
384
578
  mkr_doc_create_text_node(VALUE self, VALUE rb_text)
385
579
  {
386
- lxb_dom_document_t *doc = mkr_doc_unwrap(self);
580
+ lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
387
581
  mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_text, "text content");
388
582
  lxb_dom_text_t *t = lxb_dom_document_create_text_node(
389
583
  doc, (const lxb_char_t *)tv.ptr, tv.len);
@@ -391,13 +585,13 @@ mkr_doc_create_text_node(VALUE self, VALUE rb_text)
391
585
  if (t == NULL) {
392
586
  rb_raise(mkr_eError, "failed to create text node");
393
587
  }
394
- return mkr_wrap_node(lxb_dom_interface_node(t), self);
588
+ return mkr_wrap_html_node(lxb_dom_interface_node(t), self);
395
589
  }
396
590
 
397
591
  static VALUE
398
592
  mkr_doc_create_comment(VALUE self, VALUE rb_text)
399
593
  {
400
- lxb_dom_document_t *doc = mkr_doc_unwrap(self);
594
+ lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
401
595
  mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_text, "comment content");
402
596
  lxb_dom_comment_t *c = lxb_dom_document_create_comment(
403
597
  doc, (const lxb_char_t *)tv.ptr, tv.len);
@@ -405,16 +599,16 @@ mkr_doc_create_comment(VALUE self, VALUE rb_text)
405
599
  if (c == NULL) {
406
600
  rb_raise(mkr_eError, "failed to create comment");
407
601
  }
408
- return mkr_wrap_node(lxb_dom_interface_node(c), self);
602
+ return mkr_wrap_html_node(lxb_dom_interface_node(c), self);
409
603
  }
410
604
 
411
- /* Document#create_processing_instruction(target, data) DOM
605
+ /* Document#create_processing_instruction(target, data) - DOM
412
606
  * createProcessingInstruction: a detached ProcessingInstruction owned by this
413
607
  * document. Lexbor validates the target, so an invalid one fails closed. */
414
608
  static VALUE
415
609
  mkr_doc_create_processing_instruction(VALUE self, VALUE rb_target, VALUE rb_data)
416
610
  {
417
- lxb_dom_document_t *doc = mkr_doc_unwrap(self);
611
+ lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
418
612
  mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_target, "processing instruction target");
419
613
  mkr_ruby_borrowed_text_t dv = mkr_ruby_verified_text(rb_data, "processing instruction data");
420
614
  lxb_dom_processing_instruction_t *pi = lxb_dom_document_create_processing_instruction(
@@ -424,50 +618,52 @@ mkr_doc_create_processing_instruction(VALUE self, VALUE rb_target, VALUE rb_data
424
618
  if (pi == NULL) {
425
619
  rb_raise(mkr_eError, "failed to create processing instruction");
426
620
  }
427
- return mkr_wrap_node(lxb_dom_interface_node(pi), self);
621
+ return mkr_wrap_html_node(lxb_dom_interface_node(pi), self);
428
622
  }
429
623
 
430
- /* Document#create_document_fragment DOM createDocumentFragment: an empty
624
+ /* Document#create_document_fragment - DOM createDocumentFragment: an empty
431
625
  * DocumentFragment owned by this document (unlike #fragment / DocumentFragment.parse,
432
626
  * which parse HTML; this makes an empty one to build up programmatically). */
433
627
  static VALUE
434
628
  mkr_doc_create_document_fragment(VALUE self)
435
629
  {
436
- lxb_dom_document_t *doc = mkr_doc_unwrap(self);
630
+ lxb_dom_document_t *doc = mkr_html_doc_unwrap(self);
437
631
  lxb_dom_document_fragment_t *f = lxb_dom_document_create_document_fragment(doc);
438
632
  if (f == NULL) {
439
633
  rb_raise(mkr_eError, "failed to create document fragment");
440
634
  }
441
- return mkr_wrap_node(lxb_dom_interface_node(f), self);
635
+ return mkr_wrap_html_node(lxb_dom_interface_node(f), self);
442
636
  }
443
637
 
444
638
  void
445
639
  mkr_init_mutate(void)
446
640
  {
447
- rb_define_method(mkr_cNode, "add_child", mkr_node_add_child, 1);
448
- rb_define_method(mkr_cNode, "<<", mkr_node_append, 1);
449
- rb_define_method(mkr_cNode, "add_previous_sibling", mkr_node_add_previous_sibling, 1);
450
- rb_define_method(mkr_cNode, "before", mkr_node_add_previous_sibling, 1);
451
- rb_define_method(mkr_cNode, "add_next_sibling", mkr_node_add_next_sibling, 1);
452
- rb_define_method(mkr_cNode, "after", mkr_node_add_next_sibling, 1);
453
- rb_define_method(mkr_cNode, "remove", mkr_node_remove, 0);
454
- rb_define_method(mkr_cNode, "unlink", mkr_node_remove, 0);
455
- rb_define_method(mkr_cNode, "replace", mkr_node_replace, 1);
456
-
457
- rb_define_method(mkr_cNode, "inner_html=", mkr_node_set_inner_html, 1);
458
- rb_define_method(mkr_cNode, "outer_html=", mkr_node_set_outer_html, 1);
459
-
460
- rb_define_method(mkr_cNode, "[]=", mkr_node_aset, 2);
461
- rb_define_method(mkr_cNode, "delete", mkr_node_delete, 1);
462
- rb_define_method(mkr_cNode, "remove_attribute", mkr_node_delete, 1);
463
- rb_define_method(mkr_cNode, "content=", mkr_node_set_content, 1);
464
- rb_define_method(mkr_cNode, "name=", mkr_node_set_name, 1);
465
-
466
- rb_define_method(mkr_cDocument, "create_element", mkr_doc_create_element, 1);
467
- rb_define_method(mkr_cDocument, "create_text_node", mkr_doc_create_text_node, 1);
468
- rb_define_method(mkr_cDocument, "create_comment", mkr_doc_create_comment, 1);
469
- rb_define_method(mkr_cDocument, "create_processing_instruction",
641
+ rb_define_method(mkr_mHtmlNodeMethods, "add_child", mkr_node_add_child, 1);
642
+ rb_define_method(mkr_mHtmlNodeMethods, "<<", mkr_node_append, 1);
643
+ rb_define_method(mkr_mHtmlNodeMethods, "add_previous_sibling", mkr_node_add_previous_sibling, 1);
644
+ rb_define_method(mkr_mHtmlNodeMethods, "before", mkr_node_add_previous_sibling, 1);
645
+ rb_define_method(mkr_mHtmlNodeMethods, "add_next_sibling", mkr_node_add_next_sibling, 1);
646
+ rb_define_method(mkr_mHtmlNodeMethods, "after", mkr_node_add_next_sibling, 1);
647
+ rb_define_method(mkr_mHtmlNodeMethods, "remove", mkr_node_remove, 0);
648
+ rb_define_method(mkr_mHtmlNodeMethods, "unlink", mkr_node_remove, 0);
649
+ rb_define_method(mkr_mHtmlNodeMethods, "replace", mkr_node_replace, 1);
650
+
651
+ rb_define_method(mkr_mHtmlNodeMethods, "inner_html=", mkr_node_set_inner_html, 1);
652
+ rb_define_method(mkr_mHtmlNodeMethods, "outer_html=", mkr_node_set_outer_html, 1);
653
+
654
+ rb_define_method(mkr_mHtmlNodeMethods, "[]=", mkr_node_aset, 2);
655
+ rb_define_method(mkr_mHtmlNodeMethods, "set_attribute_ns", mkr_node_set_attribute_ns, 3);
656
+ rb_define_method(mkr_mHtmlNodeMethods, "remove_attribute_ns", mkr_node_remove_attribute_ns, 2);
657
+ rb_define_method(mkr_mHtmlNodeMethods, "delete", mkr_node_delete, 1);
658
+ rb_define_method(mkr_mHtmlNodeMethods, "remove_attribute", mkr_node_delete, 1);
659
+ rb_define_method(mkr_mHtmlNodeMethods, "content=", mkr_node_set_content, 1);
660
+ rb_define_method(mkr_mHtmlNodeMethods, "name=", mkr_node_set_name, 1);
661
+
662
+ rb_define_method(mkr_cHtmlDocument, "create_element", mkr_doc_create_element, 1);
663
+ rb_define_method(mkr_cHtmlDocument, "create_text_node", mkr_doc_create_text_node, 1);
664
+ rb_define_method(mkr_cHtmlDocument, "create_comment", mkr_doc_create_comment, 1);
665
+ rb_define_method(mkr_cHtmlDocument, "create_processing_instruction",
470
666
  mkr_doc_create_processing_instruction, 2);
471
- rb_define_method(mkr_cDocument, "create_document_fragment",
667
+ rb_define_method(mkr_cHtmlDocument, "create_document_fragment",
472
668
  mkr_doc_create_document_fragment, 0);
473
669
  }