makiri 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +12 -7
  3. data/CHANGELOG.md +93 -14
  4. data/README.md +173 -7
  5. data/Rakefile +103 -7
  6. data/ext/makiri/bridge/bridge.h +28 -0
  7. data/ext/makiri/bridge/ruby_string.c +217 -0
  8. data/ext/makiri/core/mkr_alloc.h +1 -1
  9. data/ext/makiri/core/mkr_buf.c +35 -1
  10. data/ext/makiri/core/mkr_buf.h +37 -3
  11. data/ext/makiri/core/mkr_core.h +1 -1
  12. data/ext/makiri/core/mkr_hash.h +1 -1
  13. data/ext/makiri/core/mkr_text.h +8 -8
  14. data/ext/makiri/extconf.rb +20 -2
  15. data/ext/makiri/glue/glue.h +47 -11
  16. data/ext/makiri/glue/ruby_doc.c +117 -43
  17. data/ext/makiri/glue/ruby_html_css.c +246 -0
  18. data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +242 -51
  19. data/ext/makiri/glue/ruby_html_node.c +888 -0
  20. data/ext/makiri/glue/ruby_html_serialize.c +154 -0
  21. data/ext/makiri/glue/ruby_node.c +54 -748
  22. data/ext/makiri/glue/ruby_node_set.c +167 -32
  23. data/ext/makiri/glue/ruby_xml.c +420 -0
  24. data/ext/makiri/glue/ruby_xml_node.c +1386 -0
  25. data/ext/makiri/glue/ruby_xpath.c +59 -26
  26. data/ext/makiri/glue/ruby_xpath.h +19 -0
  27. data/ext/makiri/lexbor_compat/compat.h +42 -9
  28. data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
  29. data/ext/makiri/lexbor_compat/dom_index.c +2 -2
  30. data/ext/makiri/lexbor_compat/post_parse.c +100 -10
  31. data/ext/makiri/lexbor_compat/source_loc.c +13 -9
  32. data/ext/makiri/lexbor_compat/text_index.c +14 -8
  33. data/ext/makiri/lexbor_compat/utf8_input.c +85 -26
  34. data/ext/makiri/makiri.c +139 -6
  35. data/ext/makiri/makiri.h +43 -2
  36. data/ext/makiri/xml/mkr_xml.h +126 -0
  37. data/ext/makiri/xml/mkr_xml_chars.c +225 -0
  38. data/ext/makiri/xml/mkr_xml_mutate.c +875 -0
  39. data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
  40. data/ext/makiri/xml/mkr_xml_node.c +267 -0
  41. data/ext/makiri/xml/mkr_xml_node.h +119 -0
  42. data/ext/makiri/xml/mkr_xml_tree.c +1479 -0
  43. data/ext/makiri/xpath/mkr_xpath.c +59 -32
  44. data/ext/makiri/xpath/mkr_xpath.h +96 -4
  45. data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
  46. data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
  47. data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +202 -175
  48. data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +110 -86
  49. data/ext/makiri/xpath/mkr_xpath_internal.h +91 -200
  50. data/ext/makiri/xpath/mkr_xpath_lex.c +2 -2
  51. data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
  52. data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +142 -0
  53. data/ext/makiri/xpath/mkr_xpath_parse.c +5 -5
  54. data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
  55. data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
  56. data/ext/makiri/xpath/mkr_xpath_shared.c +593 -0
  57. data/ext/makiri/xpath/{mkr_xpath_value.c → mkr_xpath_value_body.h} +145 -656
  58. data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
  59. data/lib/makiri/{attribute.rb → attr.rb} +7 -3
  60. data/lib/makiri/cdata_section.rb +21 -0
  61. data/lib/makiri/comment.rb +12 -0
  62. data/lib/makiri/compat_aliases.rb +30 -0
  63. data/lib/makiri/document.rb +4 -76
  64. data/lib/makiri/document_fragment.rb +14 -9
  65. data/lib/makiri/element.rb +5 -3
  66. data/lib/makiri/html/document.rb +106 -0
  67. data/lib/makiri/html/node_methods.rb +19 -0
  68. data/lib/makiri/html.rb +12 -0
  69. data/lib/makiri/node.rb +58 -15
  70. data/lib/makiri/node_set.rb +8 -0
  71. data/lib/makiri/processing_instruction.rb +12 -0
  72. data/lib/makiri/text.rb +2 -0
  73. data/lib/makiri/version.rb +1 -1
  74. data/lib/makiri/xml/document.rb +24 -0
  75. data/lib/makiri/xml/node_methods.rb +37 -0
  76. data/lib/makiri/xml.rb +10 -0
  77. data/lib/makiri/xpath_context.rb +1 -1
  78. data/lib/makiri.rb +23 -5
  79. data/script/build_native_gem.rb +2 -2
  80. data/script/check_c_safety.rb +32 -0
  81. data/script/check_c_safety_allowlist.yml +83 -0
  82. metadata +35 -9
  83. data/ext/makiri/glue/ruby_css.c +0 -185
  84. data/ext/makiri/glue/ruby_serialize.c +0 -92
  85. data/lib/makiri/cdata.rb +0 -6
@@ -0,0 +1,888 @@
1
+ /* ruby_html_node.c - the HTML (Lexbor) node representation: wrapping an
2
+ * lxb_dom_node_t into a Makiri::HTML::* leaf, the HTML node-pointer accessor, and
3
+ * all the HTML node reader methods. The XML counterpart is ruby_xml_node.c; the
4
+ * shared, representation-neutral node type system (the TypedData types plus the
5
+ * kind-agnostic mkr_node_raw / mkr_node_id / mkr_node_document accessors) lives in
6
+ * ruby_node.c. */
7
+ #include "glue.h"
8
+
9
+ #include <lexbor/ns/ns.h> /* lxb_ns_by_id, LXB_NS__UNDEF (namespaceURI) */
10
+
11
+ /* ------------------------------------------------------------------ */
12
+ /* wrap / unwrap */
13
+ /* ------------------------------------------------------------------ */
14
+
15
+ VALUE
16
+ mkr_wrap_html_node(lxb_dom_node_t *node, VALUE document)
17
+ {
18
+ if (node == NULL) {
19
+ return Qnil;
20
+ }
21
+
22
+ /* The document node maps back onto the Ruby Document object. */
23
+ if (node->type == LXB_DOM_NODE_TYPE_DOCUMENT) {
24
+ return document;
25
+ }
26
+
27
+ /* An HTML (lxb_dom) node wraps to a Makiri::HTML::* leaf; the leaf carries the
28
+ * lxb_dom reader methods via the included mkr_mHtmlNodeMethods module. XML
29
+ * nodes get their own wrap path (Makiri::XML::* leaves) in step 2. An uncommon
30
+ * DOM node type with no specific leaf (entity/notation - Lexbor's HTML parser
31
+ * does not produce these) falls back to the generic Makiri::HTML::Node rather
32
+ * than being misclassified as an Element. */
33
+ VALUE klass;
34
+ switch (node->type) {
35
+ case LXB_DOM_NODE_TYPE_ELEMENT: klass = mkr_cHtmlElement; break;
36
+ case LXB_DOM_NODE_TYPE_ATTRIBUTE: klass = mkr_cHtmlAttr; break;
37
+ case LXB_DOM_NODE_TYPE_TEXT: klass = mkr_cHtmlText; break;
38
+ case LXB_DOM_NODE_TYPE_COMMENT: klass = mkr_cHtmlComment; break;
39
+ case LXB_DOM_NODE_TYPE_CDATA_SECTION: klass = mkr_cHtmlCDATASection; break;
40
+ case LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION:
41
+ klass = mkr_cHtmlProcessingInstruction; break;
42
+ case LXB_DOM_NODE_TYPE_DOCUMENT_TYPE: klass = mkr_cHtmlDocumentType; break;
43
+ case LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT:
44
+ klass = mkr_cHtmlDocumentFragment; break;
45
+ default: klass = mkr_cHtmlNode; break;
46
+ }
47
+
48
+ mkr_node_data_t *nd;
49
+ VALUE obj = TypedData_Make_Struct(klass, mkr_node_data_t, &mkr_html_node_type, nd);
50
+ nd->node = (mkr_raw_node_t *)node;
51
+ nd->document = document;
52
+ return obj;
53
+ }
54
+
55
+ /* The HTML node-pointer accessor: returns the lxb_dom_node_t for an HTML node or
56
+ * HTML Document, and RAISES TypeError for an XML node/Document (TypedData_Get_Struct
57
+ * checks mkr_html_node_type, which an XML node - wrapped under mkr_xml_node_type -
58
+ * does not satisfy). Every HTML-glue site that dereferences a node or hands its
59
+ * pointer to Lexbor MUST use this, for `self` and for arguments alike. */
60
+ lxb_dom_node_t *
61
+ mkr_html_node_unwrap(VALUE rb_node)
62
+ {
63
+ if (rb_obj_is_kind_of(rb_node, mkr_cDocument)) {
64
+ if (rb_obj_is_kind_of(rb_node, mkr_cXmlDocument)) {
65
+ rb_raise(rb_eTypeError, "expected an HTML node, got a Makiri::XML::Document");
66
+ }
67
+ return (lxb_dom_node_t *)mkr_html_doc_unwrap(rb_node);
68
+ }
69
+ mkr_node_data_t *nd;
70
+ TypedData_Get_Struct(rb_node, mkr_node_data_t, &mkr_html_node_type, nd);
71
+ return (lxb_dom_node_t *)nd->node;
72
+ }
73
+
74
+ /* mkr_node_raw / mkr_node_id / mkr_node_document (the kind-agnostic accessors) and
75
+ * the TypedData types live in ruby_node.c (the shared node core). */
76
+
77
+ /* ------------------------------------------------------------------ */
78
+ /* name / type / content */
79
+ /* ------------------------------------------------------------------ */
80
+
81
+ /*
82
+ * Node name. Matches Nokogiri: lowercase tag name for HTML elements
83
+ * (Lexbor lowercases during tokenization), and the un-prefixed DOM names
84
+ * "text"/"comment"/"#cdata-section"/"document" for the other kinds.
85
+ */
86
+ static VALUE
87
+ mkr_node_name(VALUE self)
88
+ {
89
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
90
+ size_t len = 0;
91
+ const lxb_char_t *name;
92
+
93
+ switch (node->type) {
94
+ case LXB_DOM_NODE_TYPE_ELEMENT:
95
+ name = lxb_dom_element_qualified_name(lxb_dom_interface_element(node), &len);
96
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
97
+ case LXB_DOM_NODE_TYPE_ATTRIBUTE:
98
+ name = lxb_dom_attr_qualified_name(lxb_dom_interface_attr(node), &len);
99
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
100
+ case LXB_DOM_NODE_TYPE_TEXT:
101
+ return rb_utf8_str_new_cstr("text");
102
+ case LXB_DOM_NODE_TYPE_COMMENT:
103
+ return rb_utf8_str_new_cstr("comment");
104
+ case LXB_DOM_NODE_TYPE_CDATA_SECTION:
105
+ return rb_utf8_str_new_cstr("#cdata-section");
106
+ case LXB_DOM_NODE_TYPE_DOCUMENT:
107
+ return rb_utf8_str_new_cstr("document");
108
+ default:
109
+ name = lxb_dom_node_name(node, &len);
110
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
111
+ }
112
+ }
113
+
114
+ /* ------------------------------------------------------------------ */
115
+ /* namespace (WHATWG DOM Element/Attr: namespaceURI/prefix/localName) */
116
+ /* ------------------------------------------------------------------ */
117
+
118
+ /*
119
+ * Local name (DOM `localName`): the name without any prefix - "div" for
120
+ * <div>, "path" for an SVG <path>, "href" for an xlink:href attribute.
121
+ * Defined on Element and Attribute only; nil for the other node kinds (the DOM
122
+ * gives a Text/Comment/Document no localName).
123
+ */
124
+ static VALUE
125
+ mkr_node_local_name(VALUE self)
126
+ {
127
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
128
+ size_t len = 0;
129
+ const lxb_char_t *name;
130
+
131
+ switch (node->type) {
132
+ case LXB_DOM_NODE_TYPE_ELEMENT:
133
+ name = lxb_dom_element_local_name(lxb_dom_interface_element(node), &len);
134
+ break;
135
+ case LXB_DOM_NODE_TYPE_ATTRIBUTE: {
136
+ /* The case-preserved local name is the suffix of the qualified name;
137
+ * Lexbor's stored local_name is lower-cased even when the qualified name
138
+ * keeps its case (set_attribute_ns is case-sensitive). */
139
+ lxb_dom_attr_t *at = lxb_dom_interface_attr(node);
140
+ size_t qlen = 0, llen = 0;
141
+ const lxb_char_t *q = lxb_dom_attr_qualified_name(at, &qlen);
142
+ (void) lxb_dom_attr_local_name(at, &llen);
143
+ if (q != NULL && qlen >= llen) {
144
+ name = q + (qlen - llen);
145
+ len = llen;
146
+ }
147
+ else {
148
+ name = lxb_dom_attr_local_name(at, &len);
149
+ }
150
+ break;
151
+ }
152
+ default:
153
+ return Qnil;
154
+ }
155
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
156
+ }
157
+
158
+ /*
159
+ * Namespace prefix (DOM `prefix`): nil unless the qualified name is
160
+ * `prefix:local` - typically nil for HTML5-parsed content. Derived from the
161
+ * qualified-vs-local length (qualified == prefix ":" local), so a colon inside
162
+ * a local name can't be mistaken for a separator. Element/Attribute only.
163
+ */
164
+ static VALUE
165
+ mkr_node_prefix(VALUE self)
166
+ {
167
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
168
+ const lxb_char_t *q = NULL;
169
+ size_t qlen = 0, llen = 0;
170
+
171
+ switch (node->type) {
172
+ case LXB_DOM_NODE_TYPE_ELEMENT: {
173
+ lxb_dom_element_t *el = lxb_dom_interface_element(node);
174
+ q = lxb_dom_element_qualified_name(el, &qlen);
175
+ (void) lxb_dom_element_local_name(el, &llen);
176
+ break;
177
+ }
178
+ case LXB_DOM_NODE_TYPE_ATTRIBUTE: {
179
+ lxb_dom_attr_t *at = lxb_dom_interface_attr(node);
180
+ q = lxb_dom_attr_qualified_name(at, &qlen);
181
+ (void) lxb_dom_attr_local_name(at, &llen);
182
+ break;
183
+ }
184
+ default:
185
+ return Qnil;
186
+ }
187
+ if (q == NULL || qlen <= llen + 1) { /* no "prefix:" segment */
188
+ return Qnil;
189
+ }
190
+ return mkr_ruby_str_from_borrowed(
191
+ mkr_borrowed_text((const char *)q, qlen - llen - 1));
192
+ }
193
+
194
+ /*
195
+ * The fixed namespaces the HTML parser assigns to foreign-content attributes by
196
+ * prefix (the "adjust foreign attributes" step). Lexbor tags an attribute node
197
+ * with its *element's* ns rather than the attribute's own, so an attribute's
198
+ * namespaceURI is resolved from its prefix here, not from node->ns. Returns
199
+ * NULL (=> DOM null) for any other prefix.
200
+ */
201
+ static const char *
202
+ mkr_attr_ns_for_prefix(const char *p, size_t n)
203
+ {
204
+ if (n == 5 && memcmp(p, "xlink", 5) == 0) return "http://www.w3.org/1999/xlink";
205
+ if (n == 3 && memcmp(p, "xml", 3) == 0) return "http://www.w3.org/XML/1998/namespace";
206
+ if (n == 5 && memcmp(p, "xmlns", 5) == 0) return "http://www.w3.org/2000/xmlns/";
207
+ return NULL;
208
+ }
209
+
210
+ /*
211
+ * Namespace URI (DOM `namespaceURI`).
212
+ *
213
+ * Element: resolved from node->ns, so - DOM-faithfully - an HTML element is in
214
+ * the XHTML namespace ("http://www.w3.org/1999/xhtml"), not nil (an HTML
215
+ * element is never namespaceless; this is what browsers' DOM and `namespace-uri()`
216
+ * return). SVG/MathML elements get their own URI; nil only when truly
217
+ * unnamespaced (LXB_NS__UNDEF).
218
+ *
219
+ * Attribute: nil for an unprefixed attribute (class, id, ...); for a prefixed
220
+ * one, the parser-assigned foreign-content namespace keyed on the prefix
221
+ * (xlink/xml/xmlns), else nil.
222
+ *
223
+ * Other node kinds: nil.
224
+ */
225
+ static VALUE
226
+ mkr_node_namespace_uri(VALUE self)
227
+ {
228
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
229
+
230
+ if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
231
+ if (node->ns == LXB_NS__UNDEF) {
232
+ return Qnil;
233
+ }
234
+ lxb_dom_document_t *doc = node->owner_document;
235
+ if (doc == NULL || doc->ns == NULL) {
236
+ return Qnil;
237
+ }
238
+ size_t len = 0;
239
+ const lxb_char_t *uri = lxb_ns_by_id(doc->ns, node->ns, &len);
240
+ if (uri == NULL || len == 0) {
241
+ return Qnil;
242
+ }
243
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)uri, len));
244
+ }
245
+
246
+ if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
247
+ lxb_dom_attr_t *at = lxb_dom_interface_attr(node);
248
+
249
+ /* An attribute set via set_attribute_ns records its OWN namespace on the
250
+ * attr node - distinguishable because it differs from the owner element's
251
+ * ns (a normally-set/parsed attr inherits the element's). Resolve it from
252
+ * the interned id; LXB_NS__UNDEF (set by set_attribute_ns(nil, ...)) is
253
+ * the null namespace. */
254
+ if (at->owner != NULL && node->ns != at->owner->node.ns) {
255
+ if (node->ns == LXB_NS__UNDEF) {
256
+ return Qnil;
257
+ }
258
+ lxb_dom_document_t *doc = node->owner_document;
259
+ if (doc != NULL && doc->ns != NULL) {
260
+ size_t len = 0;
261
+ const lxb_char_t *uri = lxb_ns_by_id(doc->ns, node->ns, &len);
262
+ if (uri != NULL && len != 0) {
263
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)uri, len));
264
+ }
265
+ }
266
+ return Qnil;
267
+ }
268
+
269
+ size_t qlen = 0, llen = 0;
270
+ const lxb_char_t *q = lxb_dom_attr_qualified_name(at, &qlen);
271
+ (void) lxb_dom_attr_local_name(at, &llen);
272
+ if (q == NULL || qlen <= llen + 1) {
273
+ return Qnil; /* unprefixed attribute => no namespace */
274
+ }
275
+ const char *uri = mkr_attr_ns_for_prefix((const char *)q, qlen - llen - 1);
276
+ return uri ? rb_utf8_str_new_cstr(uri) : Qnil;
277
+ }
278
+
279
+ return Qnil;
280
+ }
281
+
282
+ /*
283
+ * Element#tag_name (DOM `tagName`): the qualified name, uppercased for an HTML
284
+ * element in an HTML document ("DIV"), as the DOM specifies - unlike #name,
285
+ * which is the lowercase qualified name. SVG/MathML elements keep their case.
286
+ * nil for non-element nodes.
287
+ */
288
+ static VALUE
289
+ mkr_node_tag_name(VALUE self)
290
+ {
291
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
292
+ if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
293
+ return Qnil;
294
+ }
295
+ size_t len = 0;
296
+ const lxb_char_t *name =
297
+ lxb_dom_element_tag_name(lxb_dom_interface_element(node), &len);
298
+ if (name == NULL) {
299
+ return Qnil;
300
+ }
301
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
302
+ }
303
+
304
+ /*
305
+ * ProcessingInstruction#target (DOM `target`): the PI's target name
306
+ * (the "xml" in <?xml ...?>). nil for non-PI nodes. The PI's data is read via
307
+ * #content / #text like any character-data node.
308
+ */
309
+ static VALUE
310
+ mkr_node_pi_target(VALUE self)
311
+ {
312
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
313
+ if (node->type != LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) {
314
+ return Qnil;
315
+ }
316
+ size_t len = 0;
317
+ const lxb_char_t *t = lxb_dom_processing_instruction_target(
318
+ lxb_dom_interface_processing_instruction(node), &len);
319
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)t, len));
320
+ }
321
+
322
+ /* Numeric DOM node type (LXB_DOM_NODE_TYPE_*). */
323
+ static VALUE
324
+ mkr_node_get_type(VALUE self)
325
+ {
326
+ return INT2NUM((int)mkr_html_node_unwrap(self)->type);
327
+ }
328
+
329
+ /*
330
+ * DocumentType public / system identifiers (WHATWG DOM `publicId`/`systemId`).
331
+ * Returns the String, or nil when the doctype carries no such identifier.
332
+ * Lexbor represents a missing id inconsistently (NULL after `SYSTEM`, but an
333
+ * empty string for a bare `<!DOCTYPE html>`), so we treat empty as absent and
334
+ * return nil for both - matching Nokogiri (which also reports nil for an empty
335
+ * or missing id). Defined only on Makiri::DocumentType, so the receiver is
336
+ * always a doctype node; the guard is belt-and-suspenders.
337
+ */
338
+ static VALUE
339
+ mkr_doctype_id(VALUE self, int system)
340
+ {
341
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
342
+ if (node->type != LXB_DOM_NODE_TYPE_DOCUMENT_TYPE) {
343
+ return Qnil;
344
+ }
345
+ lxb_dom_document_type_t *dt = lxb_dom_interface_document_type(node);
346
+ size_t len = 0;
347
+ const lxb_char_t *id = system ? lxb_dom_document_type_system_id(dt, &len)
348
+ : lxb_dom_document_type_public_id(dt, &len);
349
+ return (id == NULL || len == 0)
350
+ ? Qnil
351
+ : mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)id, len));
352
+ }
353
+
354
+ static VALUE
355
+ mkr_doctype_public_id(VALUE self)
356
+ {
357
+ return mkr_doctype_id(self, 0);
358
+ }
359
+
360
+ static VALUE
361
+ mkr_doctype_system_id(VALUE self)
362
+ {
363
+ return mkr_doctype_id(self, 1);
364
+ }
365
+
366
+ /*
367
+ * A <template> element's "template contents" - the separate DocumentFragment
368
+ * the HTML parser fills instead of making the parsed nodes children of the
369
+ * <template> (WHATWG DOM `HTMLTemplateElement.content`; browsers behave the
370
+ * same: template.children is empty, template.content holds the nodes). Lexbor
371
+ * stores it on the template interface; we surface it as a Makiri::DocumentFragment
372
+ * so it can be traversed/queried (`tpl.content_fragment.css("p")`).
373
+ *
374
+ * Returns nil for any node that is not an HTML <template>. Note: CSS/XPath over
375
+ * the *template element itself* deliberately do NOT descend into the content
376
+ * (matching the DOM, and unavoidable for CSS since it runs Lexbor's selector
377
+ * engine over the real tree) - query the fragment instead.
378
+ */
379
+ static VALUE
380
+ mkr_node_content_fragment(VALUE self)
381
+ {
382
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
383
+ if (node->type != LXB_DOM_NODE_TYPE_ELEMENT
384
+ || node->local_name != LXB_TAG_TEMPLATE
385
+ || node->ns != LXB_NS_HTML) {
386
+ return Qnil;
387
+ }
388
+ lxb_dom_document_fragment_t *content = lxb_html_interface_template(node)->content;
389
+ if (content == NULL) {
390
+ return Qnil;
391
+ }
392
+ return mkr_wrap_html_node((lxb_dom_node_t *)content, mkr_node_document(self));
393
+ }
394
+
395
+ /* Concatenated text content of this node and its descendants. The DOM spec
396
+ * makes a Document's textContent null; we instead return the text of the root
397
+ * element (matching the intuitive, Nokogiri-like Document#text). */
398
+ static VALUE
399
+ mkr_node_content(VALUE self)
400
+ {
401
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
402
+ if (node->type == LXB_DOM_NODE_TYPE_DOCUMENT) {
403
+ node = lxb_dom_document_root((lxb_dom_document_t *)node);
404
+ if (node == NULL) {
405
+ return rb_utf8_str_new("", 0);
406
+ }
407
+ }
408
+
409
+ /* Fast path for elements / fragments (the common case, incl. document text).
410
+ *
411
+ * Preferred: the per-document text index (lexbor_compat/text_index.c) maps
412
+ * this node to the contiguous, document-order run of its descendants' text
413
+ * slices, so we serve a single pre-sized memcpy run with no per-extraction
414
+ * tree walk - the walk is otherwise the dominant, cache-bound cost. Built
415
+ * lazily on first use and dropped on any mutation, so a slice can never
416
+ * point at reallocated/detached storage.
417
+ *
418
+ * Fallback (index unavailable - node outside the indexed tree, e.g. a
419
+ * fragment, or a build OOM): stream each descendant text/CDATA node's data
420
+ * straight into the Ruby string via an iterative pre-order walk (stack-safe;
421
+ * skips Lexbor's intermediate arena buffer + copy). */
422
+ if (node->type == LXB_DOM_NODE_TYPE_ELEMENT
423
+ || node->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
424
+ mkr_parsed_t *parsed = mkr_doc_parsed(mkr_node_document(self));
425
+ const mkr_borrowed_text_t *slices;
426
+ size_t nslices, total;
427
+ if (parsed != NULL
428
+ && mkr_parsed_text_slices(parsed, node, &slices, &nslices, &total)) {
429
+ return mkr_ruby_str_from_slices(slices, nslices, total);
430
+ }
431
+
432
+ VALUE str = rb_utf8_str_new(NULL, 0);
433
+ for (lxb_dom_node_t *n = node->first_child; n != NULL;) {
434
+ if (n->type == LXB_DOM_NODE_TYPE_TEXT
435
+ || n->type == LXB_DOM_NODE_TYPE_CDATA_SECTION) {
436
+ const lexbor_str_t *d = &lxb_dom_interface_character_data(n)->data;
437
+ if (d->data != NULL && d->length != 0) {
438
+ rb_str_cat(str, (const char *)d->data, (long)d->length);
439
+ }
440
+ }
441
+ if (n->first_child != NULL) { n = n->first_child; continue; }
442
+ while (n != node && n->next == NULL) { n = n->parent; }
443
+ if (n == node) { break; }
444
+ n = n->next;
445
+ }
446
+ return str;
447
+ }
448
+
449
+ /* Character-data and other node kinds keep the general (proven) path. */
450
+ size_t len = 0;
451
+ lxb_char_t *text = lxb_dom_node_text_content(node, &len);
452
+ if (text == NULL) {
453
+ return rb_utf8_str_new("", 0);
454
+ }
455
+ VALUE str = rb_utf8_str_new((const char *)text, len);
456
+ lxb_dom_document_destroy_text(node->owner_document, text);
457
+ return str;
458
+ }
459
+
460
+ /* ------------------------------------------------------------------ */
461
+ /* tree navigation */
462
+ /* ------------------------------------------------------------------ */
463
+
464
+ static VALUE
465
+ mkr_node_get_document(VALUE self)
466
+ {
467
+ return mkr_node_document(self);
468
+ }
469
+
470
+ static VALUE
471
+ mkr_node_parent(VALUE self)
472
+ {
473
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
474
+ VALUE document = mkr_node_document(self);
475
+
476
+ /* Lexbor never links an attribute back to its element, so node->parent is
477
+ * NULL for attributes. Resolve via the compat attr->owner index. */
478
+ if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
479
+ lxb_dom_node_t *owner =
480
+ mkr_parsed_attr_owner(mkr_doc_parsed(document),
481
+ lxb_dom_interface_attr(node));
482
+ return mkr_wrap_html_node(owner, document);
483
+ }
484
+
485
+ return mkr_wrap_html_node(node->parent, document);
486
+ }
487
+
488
+ static VALUE
489
+ mkr_node_next(VALUE self)
490
+ {
491
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
492
+ return mkr_wrap_html_node(node->next, mkr_node_document(self));
493
+ }
494
+
495
+ static VALUE
496
+ mkr_node_previous(VALUE self)
497
+ {
498
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
499
+ return mkr_wrap_html_node(node->prev, mkr_node_document(self));
500
+ }
501
+
502
+ static VALUE
503
+ mkr_node_next_element(VALUE self)
504
+ {
505
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self)->next;
506
+ while (node != NULL && node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
507
+ node = node->next;
508
+ }
509
+ return mkr_wrap_html_node(node, mkr_node_document(self));
510
+ }
511
+
512
+ static VALUE
513
+ mkr_node_previous_element(VALUE self)
514
+ {
515
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self)->prev;
516
+ while (node != NULL && node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
517
+ node = node->prev;
518
+ }
519
+ return mkr_wrap_html_node(node, mkr_node_document(self));
520
+ }
521
+
522
+ /* First child node (any type), or nil. */
523
+ static VALUE
524
+ mkr_node_child(VALUE self)
525
+ {
526
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
527
+ return mkr_wrap_html_node(node->first_child, mkr_node_document(self));
528
+ }
529
+
530
+ /* All child nodes as a NodeSet. */
531
+ static VALUE
532
+ mkr_node_children(VALUE self)
533
+ {
534
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
535
+ VALUE document = mkr_node_document(self);
536
+ VALUE set = mkr_node_set_new(document);
537
+ for (lxb_dom_node_t *c = node->first_child; c != NULL; c = c->next) {
538
+ mkr_node_set_push(set, (mkr_raw_node_t *)c);
539
+ }
540
+ return set;
541
+ }
542
+
543
+ /* Child elements only, as a NodeSet. */
544
+ static VALUE
545
+ mkr_node_element_children(VALUE self)
546
+ {
547
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
548
+ VALUE document = mkr_node_document(self);
549
+ VALUE set = mkr_node_set_new(document);
550
+ for (lxb_dom_node_t *c = node->first_child; c != NULL; c = c->next) {
551
+ if (c->type == LXB_DOM_NODE_TYPE_ELEMENT) {
552
+ mkr_node_set_push(set, (mkr_raw_node_t *)c);
553
+ }
554
+ }
555
+ return set;
556
+ }
557
+
558
+ /* Ancestor elements, nearest first (parent, grandparent, ... root). */
559
+ static VALUE
560
+ mkr_node_ancestors(VALUE self)
561
+ {
562
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
563
+ VALUE document = mkr_node_document(self);
564
+ VALUE set = mkr_node_set_new(document);
565
+ for (lxb_dom_node_t *p = node->parent; p != NULL; p = p->parent) {
566
+ if (p->type == LXB_DOM_NODE_TYPE_ELEMENT) {
567
+ mkr_node_set_push(set, (mkr_raw_node_t *)p);
568
+ }
569
+ }
570
+ return set;
571
+ }
572
+
573
+ static VALUE
574
+ mkr_node_first_element_child(VALUE self)
575
+ {
576
+ lxb_dom_node_t *c = mkr_html_node_unwrap(self)->first_child;
577
+ while (c != NULL && c->type != LXB_DOM_NODE_TYPE_ELEMENT) {
578
+ c = c->next;
579
+ }
580
+ return mkr_wrap_html_node(c, mkr_node_document(self));
581
+ }
582
+
583
+ static VALUE
584
+ mkr_node_last_element_child(VALUE self)
585
+ {
586
+ lxb_dom_node_t *c = mkr_html_node_unwrap(self)->last_child;
587
+ while (c != NULL && c->type != LXB_DOM_NODE_TYPE_ELEMENT) {
588
+ c = c->prev;
589
+ }
590
+ return mkr_wrap_html_node(c, mkr_node_document(self));
591
+ }
592
+
593
+ /* ------------------------------------------------------------------ */
594
+ /* attributes (read-only) */
595
+ /* ------------------------------------------------------------------ */
596
+
597
+ /* node[name] -> String or nil (nil when not an element or absent). */
598
+ static VALUE
599
+ mkr_node_aref(VALUE self, VALUE rb_name)
600
+ {
601
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
602
+ if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
603
+ return Qnil;
604
+ }
605
+
606
+ mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "attribute name");
607
+ const lxb_char_t *nm = (const lxb_char_t *)nv.ptr;
608
+ size_t nlen = nv.len;
609
+
610
+ lxb_dom_element_t *el = lxb_dom_interface_element(node);
611
+ if (!lxb_dom_element_has_attribute(el, nm, nlen)) {
612
+ return Qnil;
613
+ }
614
+
615
+ size_t vlen = 0;
616
+ const lxb_char_t *val = lxb_dom_element_get_attribute(el, nm, nlen, &vlen);
617
+ RB_GC_GUARD(nv.value);
618
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)val, vlen));
619
+ }
620
+
621
+ /* node.key?(name) -> true/false */
622
+ static VALUE
623
+ mkr_node_has_key(VALUE self, VALUE rb_name)
624
+ {
625
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
626
+ if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
627
+ return Qfalse;
628
+ }
629
+ mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "attribute name");
630
+ lxb_dom_element_t *el = lxb_dom_interface_element(node);
631
+ bool has = lxb_dom_element_has_attribute(el, (const lxb_char_t *)nv.ptr, nv.len);
632
+ RB_GC_GUARD(nv.value);
633
+ return has ? Qtrue : Qfalse;
634
+ }
635
+
636
+ /* node.keys -> [String, ...] of attribute names (document order). */
637
+ static VALUE
638
+ mkr_node_keys(VALUE self)
639
+ {
640
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
641
+ VALUE ary = rb_ary_new();
642
+ if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
643
+ return ary;
644
+ }
645
+ lxb_dom_attr_t *attr =
646
+ lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
647
+ while (attr != NULL) {
648
+ size_t len = 0;
649
+ const lxb_char_t *name = lxb_dom_attr_qualified_name(attr, &len);
650
+ rb_ary_push(ary, mkr_ruby_str_from_borrowed(
651
+ mkr_borrowed_text((const char *)name, len)));
652
+ attr = lxb_dom_element_next_attribute(attr);
653
+ }
654
+ return ary;
655
+ }
656
+
657
+ /* node.values -> [String, ...] of attribute values (document order). */
658
+ static VALUE
659
+ mkr_node_values(VALUE self)
660
+ {
661
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
662
+ VALUE ary = rb_ary_new();
663
+ if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
664
+ return ary;
665
+ }
666
+ lxb_dom_attr_t *attr =
667
+ lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
668
+ while (attr != NULL) {
669
+ size_t len = 0;
670
+ const lxb_char_t *val = lxb_dom_attr_value(attr, &len);
671
+ rb_ary_push(ary, mkr_ruby_str_from_borrowed(
672
+ mkr_borrowed_text((const char *)val, len)));
673
+ attr = lxb_dom_element_next_attribute(attr);
674
+ }
675
+ return ary;
676
+ }
677
+
678
+ /* element.attribute_nodes -> NodeSet of Attribute nodes (document order).
679
+ * Empty for non-elements. These wrap the bare lxb_dom_attr_t; navigating back
680
+ * with Attribute#parent goes through the compat attr->owner index. */
681
+ static VALUE
682
+ mkr_node_attribute_nodes(VALUE self)
683
+ {
684
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
685
+ VALUE document = mkr_node_document(self);
686
+ VALUE set = mkr_node_set_new(document);
687
+ if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
688
+ return set;
689
+ }
690
+ lxb_dom_attr_t *attr =
691
+ lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
692
+ while (attr != NULL) {
693
+ mkr_node_set_push(set, (mkr_raw_node_t *)lxb_dom_interface_node(attr));
694
+ attr = lxb_dom_element_next_attribute(attr);
695
+ }
696
+ return set;
697
+ }
698
+
699
+ /* attr.value -> the attribute's value String. For non-attribute nodes, falls
700
+ * back to text content (matching the loose Nokogiri-ish meaning of #value). */
701
+ static VALUE
702
+ mkr_node_value(VALUE self)
703
+ {
704
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
705
+ if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
706
+ size_t len = 0;
707
+ const lxb_char_t *val =
708
+ lxb_dom_attr_value(lxb_dom_interface_attr(node), &len);
709
+ return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)val, len));
710
+ }
711
+ return mkr_node_content(self);
712
+ }
713
+
714
+ /* node.line -> 1-based source line, or nil when unknown.
715
+ *
716
+ * The line comes from the byte offset stamped onto the node at parse time
717
+ * (source-location tracking) resolved against the document's line table.
718
+ * Returns nil for nodes the tracker could not place (e.g. parser-inserted
719
+ * implicit <html>/<head>/<body>, or any node when tracking was disabled). */
720
+ static VALUE
721
+ mkr_node_line(VALUE self)
722
+ {
723
+ lxb_dom_node_t *node = mkr_html_node_unwrap(self);
724
+ mkr_parsed_t *p = mkr_doc_parsed(mkr_node_document(self));
725
+ size_t line = mkr_parsed_node_line(p, node);
726
+ return line == 0 ? Qnil : ULONG2NUM(line);
727
+ }
728
+
729
+ /* ------------------------------------------------------------------ */
730
+ /* identity */
731
+ /* ------------------------------------------------------------------ */
732
+
733
+ /* Pointer identity: equal iff both wrap the same lxb_dom_node_t. */
734
+ static VALUE
735
+ mkr_node_equals(VALUE self, VALUE other)
736
+ {
737
+ if (!rb_obj_is_kind_of(other, mkr_cNode)) {
738
+ return Qfalse;
739
+ }
740
+ /* Identity by pointer, kind-agnostic (an HTML node is simply never equal to an
741
+ * XML node) - mkr_node_id never dereferences, so comparing across
742
+ * representations is safe. */
743
+ return mkr_node_id(self) == mkr_node_id(other) ? Qtrue : Qfalse;
744
+ }
745
+
746
+ /* Distance from `n` to the root (a node with no parent). */
747
+ static size_t
748
+ mkr_node_depth(const lxb_dom_node_t *n)
749
+ {
750
+ size_t d = 0;
751
+ for (const lxb_dom_node_t *p = n->parent; p != NULL; p = p->parent) {
752
+ d++;
753
+ }
754
+ return d;
755
+ }
756
+
757
+ /*
758
+ * Node#<=> : document (pre-order) position, so an array of nodes can be sorted.
759
+ * Returns -1 / 0 / 1, or nil when the nodes are not comparable: a non-node,
760
+ * different documents or detached subtrees (no common root), or an attribute
761
+ * node (attributes are not in the first_child/next chain, so their order is not
762
+ * defined here). Included via Comparable, which gives <, >, between?, etc.
763
+ */
764
+ static VALUE
765
+ mkr_node_spaceship(VALUE self, VALUE other)
766
+ {
767
+ if (!rb_obj_is_kind_of(other, mkr_cNode)
768
+ || rb_obj_is_kind_of(mkr_node_document(other), mkr_cXmlDocument)) {
769
+ return Qnil; /* not a node, or an XML node - never order-comparable to HTML */
770
+ }
771
+ lxb_dom_node_t *a = mkr_html_node_unwrap(self);
772
+ lxb_dom_node_t *b = mkr_html_node_unwrap(other);
773
+ if (a == b) {
774
+ return INT2FIX(0);
775
+ }
776
+ if (a->type == LXB_DOM_NODE_TYPE_ATTRIBUTE
777
+ || b->type == LXB_DOM_NODE_TYPE_ATTRIBUTE
778
+ || a->owner_document != b->owner_document) {
779
+ return Qnil;
780
+ }
781
+
782
+ size_t da = mkr_node_depth(a), db = mkr_node_depth(b);
783
+ lxb_dom_node_t *pa = a, *pb = b;
784
+
785
+ /* Raise the deeper node to the other's depth; if it lands on the other,
786
+ * that other is an ancestor and so comes first in pre-order. */
787
+ if (da > db) {
788
+ for (size_t k = 0; k < da - db; k++) pa = pa->parent;
789
+ if (pa == b) return INT2FIX(1); /* b is an ancestor of a */
790
+ } else if (db > da) {
791
+ for (size_t k = 0; k < db - da; k++) pb = pb->parent;
792
+ if (pb == a) return INT2FIX(-1); /* a is an ancestor of b */
793
+ }
794
+
795
+ /* Climb both until they share a parent (the lowest common ancestor). */
796
+ while (pa->parent != pb->parent) {
797
+ if (pa->parent == NULL || pb->parent == NULL) {
798
+ return Qnil; /* different trees */
799
+ }
800
+ pa = pa->parent;
801
+ pb = pb->parent;
802
+ }
803
+ if (pa->parent == NULL) {
804
+ return Qnil; /* two distinct roots */
805
+ }
806
+
807
+ /* pa and pb are distinct siblings: earlier in the child list comes first. */
808
+ for (lxb_dom_node_t *c = pa->parent->first_child; c != NULL; c = c->next) {
809
+ if (c == pa) return INT2FIX(-1);
810
+ if (c == pb) return INT2FIX(1);
811
+ }
812
+ return Qnil; /* unreachable for a well-formed tree */
813
+ }
814
+
815
+ /* Nokogiri-compatible identity: the underlying lxb_dom_node_t pointer as an
816
+ * Integer. Stable for the node's lifetime and unique among currently-live
817
+ * nodes; a freed-then-reallocated node may reuse an address (same caveat as
818
+ * Nokogiri::XML::Node#pointer_id). a.pointer_id == b.pointer_id iff a.eql?(b). */
819
+ static VALUE
820
+ mkr_node_pointer_id(VALUE self)
821
+ {
822
+ return ULL2NUM((unsigned long long)mkr_node_id(self));
823
+ }
824
+
825
+ /* Stable hash derived from the node pointer, so a == b implies a.hash ==
826
+ * b.hash even across separately-created wrappers. Shares the pointer value
827
+ * with #pointer_id. */
828
+ static VALUE
829
+ mkr_node_hash(VALUE self)
830
+ {
831
+ return mkr_node_pointer_id(self);
832
+ }
833
+
834
+ void
835
+ mkr_init_node(void)
836
+ {
837
+ rb_define_method(mkr_mHtmlNodeMethods, "name", mkr_node_name, 0);
838
+ rb_define_method(mkr_mHtmlNodeMethods, "namespace_uri", mkr_node_namespace_uri, 0);
839
+ rb_define_method(mkr_mHtmlNodeMethods, "prefix", mkr_node_prefix, 0);
840
+ rb_define_method(mkr_mHtmlNodeMethods, "local_name", mkr_node_local_name, 0);
841
+ rb_define_method(mkr_mHtmlNodeMethods, "tag_name", mkr_node_tag_name, 0);
842
+ rb_define_method(mkr_mHtmlNodeMethods, "target", mkr_node_pi_target, 0);
843
+ rb_define_method(mkr_mHtmlNodeMethods, "node_type", mkr_node_get_type, 0);
844
+ rb_define_method(mkr_mHtmlNodeMethods, "content", mkr_node_content, 0);
845
+ rb_define_method(mkr_mHtmlNodeMethods, "text", mkr_node_content, 0);
846
+ rb_define_method(mkr_mHtmlNodeMethods, "inner_text", mkr_node_content, 0);
847
+
848
+ rb_define_method(mkr_mHtmlNodeMethods, "document", mkr_node_get_document, 0);
849
+ rb_define_method(mkr_mHtmlNodeMethods, "parent", mkr_node_parent, 0);
850
+ rb_define_method(mkr_mHtmlNodeMethods, "next", mkr_node_next, 0);
851
+ rb_define_method(mkr_mHtmlNodeMethods, "next_sibling", mkr_node_next, 0);
852
+ rb_define_method(mkr_mHtmlNodeMethods, "previous", mkr_node_previous, 0);
853
+ rb_define_method(mkr_mHtmlNodeMethods, "previous_sibling", mkr_node_previous, 0);
854
+ rb_define_method(mkr_mHtmlNodeMethods, "next_element", mkr_node_next_element, 0);
855
+ rb_define_method(mkr_mHtmlNodeMethods, "previous_element", mkr_node_previous_element, 0);
856
+
857
+ rb_define_method(mkr_mHtmlNodeMethods, "child", mkr_node_child, 0);
858
+ rb_define_method(mkr_mHtmlNodeMethods, "children", mkr_node_children, 0);
859
+ rb_define_method(mkr_mHtmlNodeMethods, "element_children", mkr_node_element_children, 0);
860
+ rb_define_method(mkr_mHtmlNodeMethods, "elements", mkr_node_element_children, 0);
861
+ rb_define_method(mkr_mHtmlNodeMethods, "first_element_child", mkr_node_first_element_child, 0);
862
+ rb_define_method(mkr_mHtmlNodeMethods, "last_element_child", mkr_node_last_element_child, 0);
863
+ rb_define_method(mkr_mHtmlNodeMethods, "ancestors", mkr_node_ancestors, 0);
864
+
865
+ rb_define_method(mkr_mHtmlNodeMethods, "[]", mkr_node_aref, 1);
866
+ rb_define_method(mkr_mHtmlNodeMethods, "key?", mkr_node_has_key, 1);
867
+ rb_define_method(mkr_mHtmlNodeMethods, "keys", mkr_node_keys, 0);
868
+ rb_define_method(mkr_mHtmlNodeMethods, "values", mkr_node_values, 0);
869
+ rb_define_method(mkr_mHtmlNodeMethods, "attribute_nodes", mkr_node_attribute_nodes, 0);
870
+ rb_define_method(mkr_mHtmlNodeMethods, "value", mkr_node_value, 0);
871
+ rb_define_method(mkr_mHtmlNodeMethods, "line", mkr_node_line, 0);
872
+
873
+ rb_define_method(mkr_mHtmlNodeMethods, "==", mkr_node_equals, 1);
874
+ rb_define_method(mkr_mHtmlNodeMethods, "eql?", mkr_node_equals, 1);
875
+ rb_define_method(mkr_mHtmlNodeMethods, "<=>", mkr_node_spaceship, 1);
876
+ rb_define_method(mkr_mHtmlNodeMethods, "hash", mkr_node_hash, 0);
877
+ rb_define_method(mkr_mHtmlNodeMethods, "pointer_id", mkr_node_pointer_id, 0);
878
+ rb_define_method(mkr_mHtmlNodeMethods, "clone_node", mkr_node_clone_node, -1);
879
+
880
+ /* DocumentType identifiers (WHATWG DOM names; external_id is the
881
+ * Nokogiri-compatible alias for public_id). */
882
+ rb_define_method(mkr_cHtmlDocumentType, "public_id", mkr_doctype_public_id, 0);
883
+ rb_define_method(mkr_cHtmlDocumentType, "external_id", mkr_doctype_public_id, 0);
884
+ rb_define_method(mkr_cHtmlDocumentType, "system_id", mkr_doctype_system_id, 0);
885
+
886
+ /* <template> contents (WHATWG DOM HTMLTemplateElement.content). */
887
+ rb_define_method(mkr_cHtmlElement, "content_fragment", mkr_node_content_fragment, 0);
888
+ }