makiri 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +12 -7
- data/CHANGELOG.md +93 -14
- data/README.md +173 -7
- data/Rakefile +103 -7
- data/ext/makiri/bridge/bridge.h +28 -0
- data/ext/makiri/bridge/ruby_string.c +217 -0
- data/ext/makiri/core/mkr_alloc.h +1 -1
- data/ext/makiri/core/mkr_buf.c +35 -1
- data/ext/makiri/core/mkr_buf.h +37 -3
- data/ext/makiri/core/mkr_core.h +1 -1
- data/ext/makiri/core/mkr_hash.h +1 -1
- data/ext/makiri/core/mkr_text.h +8 -8
- data/ext/makiri/extconf.rb +20 -2
- data/ext/makiri/glue/glue.h +47 -11
- data/ext/makiri/glue/ruby_doc.c +117 -43
- data/ext/makiri/glue/ruby_html_css.c +246 -0
- data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +242 -51
- data/ext/makiri/glue/ruby_html_node.c +888 -0
- data/ext/makiri/glue/ruby_html_serialize.c +154 -0
- data/ext/makiri/glue/ruby_node.c +54 -748
- data/ext/makiri/glue/ruby_node_set.c +167 -32
- data/ext/makiri/glue/ruby_xml.c +420 -0
- data/ext/makiri/glue/ruby_xml_node.c +1386 -0
- data/ext/makiri/glue/ruby_xpath.c +59 -26
- data/ext/makiri/glue/ruby_xpath.h +19 -0
- data/ext/makiri/lexbor_compat/compat.h +42 -9
- data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
- data/ext/makiri/lexbor_compat/dom_index.c +2 -2
- data/ext/makiri/lexbor_compat/post_parse.c +100 -10
- data/ext/makiri/lexbor_compat/source_loc.c +13 -9
- data/ext/makiri/lexbor_compat/text_index.c +14 -8
- data/ext/makiri/lexbor_compat/utf8_input.c +85 -26
- data/ext/makiri/makiri.c +139 -6
- data/ext/makiri/makiri.h +43 -2
- data/ext/makiri/xml/mkr_xml.h +126 -0
- data/ext/makiri/xml/mkr_xml_chars.c +225 -0
- data/ext/makiri/xml/mkr_xml_mutate.c +875 -0
- data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
- data/ext/makiri/xml/mkr_xml_node.c +267 -0
- data/ext/makiri/xml/mkr_xml_node.h +119 -0
- data/ext/makiri/xml/mkr_xml_tree.c +1479 -0
- data/ext/makiri/xpath/mkr_xpath.c +59 -32
- data/ext/makiri/xpath/mkr_xpath.h +96 -4
- data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
- data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
- data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +202 -175
- data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +110 -86
- data/ext/makiri/xpath/mkr_xpath_internal.h +91 -200
- data/ext/makiri/xpath/mkr_xpath_lex.c +2 -2
- data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
- data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +142 -0
- data/ext/makiri/xpath/mkr_xpath_parse.c +5 -5
- data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
- data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
- data/ext/makiri/xpath/mkr_xpath_shared.c +593 -0
- data/ext/makiri/xpath/{mkr_xpath_value.c → mkr_xpath_value_body.h} +145 -656
- data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
- data/lib/makiri/{attribute.rb → attr.rb} +7 -3
- data/lib/makiri/cdata_section.rb +21 -0
- data/lib/makiri/comment.rb +12 -0
- data/lib/makiri/compat_aliases.rb +30 -0
- data/lib/makiri/document.rb +4 -76
- data/lib/makiri/document_fragment.rb +14 -9
- data/lib/makiri/element.rb +5 -3
- data/lib/makiri/html/document.rb +106 -0
- data/lib/makiri/html/node_methods.rb +19 -0
- data/lib/makiri/html.rb +12 -0
- data/lib/makiri/node.rb +58 -15
- data/lib/makiri/node_set.rb +8 -0
- data/lib/makiri/processing_instruction.rb +12 -0
- data/lib/makiri/text.rb +2 -0
- data/lib/makiri/version.rb +1 -1
- data/lib/makiri/xml/document.rb +24 -0
- data/lib/makiri/xml/node_methods.rb +37 -0
- data/lib/makiri/xml.rb +10 -0
- data/lib/makiri/xpath_context.rb +1 -1
- data/lib/makiri.rb +23 -5
- data/script/build_native_gem.rb +2 -2
- data/script/check_c_safety.rb +32 -0
- data/script/check_c_safety_allowlist.yml +83 -0
- metadata +35 -9
- data/ext/makiri/glue/ruby_css.c +0 -185
- data/ext/makiri/glue/ruby_serialize.c +0 -92
- data/lib/makiri/cdata.rb +0 -6
data/ext/makiri/glue/ruby_node.c
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
|
+
/* ruby_node.c - the shared, representation-neutral node core.
|
|
2
|
+
*
|
|
3
|
+
* HTML (Lexbor) nodes and XML (custom-arena) nodes are two representations of the
|
|
4
|
+
* same Ruby-facing Node abstraction. This file owns what is common to BOTH: the
|
|
5
|
+
* TypedData types that distinguish the two wrappers (so a representation-specific
|
|
6
|
+
* accessor rejects the wrong kind via Ruby's own type machinery), the shared GC
|
|
7
|
+
* functions, and the kind-agnostic accessors used for identity and document
|
|
8
|
+
* lookup. The HTML node implementation (wrap/unwrap + reader methods) lives in
|
|
9
|
+
* ruby_html_node.c, the XML one in ruby_xml_node.c. */
|
|
1
10
|
#include "glue.h"
|
|
2
|
-
|
|
3
|
-
#include <lexbor/ns/ns.h> /* lxb_ns_by_id, LXB_NS__UNDEF (namespaceURI) */
|
|
11
|
+
#include "../xml/mkr_xml_node.h" /* mkr_xml_doc_t::doc_node, for the kind-aware mkr_node_raw */
|
|
4
12
|
|
|
5
13
|
/* ------------------------------------------------------------------ */
|
|
6
|
-
/*
|
|
14
|
+
/* GC + TypedData types */
|
|
7
15
|
/* ------------------------------------------------------------------ */
|
|
8
16
|
|
|
9
17
|
static void
|
|
@@ -16,7 +24,7 @@ mkr_node_gc_mark(void *ptr)
|
|
|
16
24
|
static void
|
|
17
25
|
mkr_node_gc_free(void *ptr)
|
|
18
26
|
{
|
|
19
|
-
/* The
|
|
27
|
+
/* The node is owned by the document arena (HTML or XML); never freed here. */
|
|
20
28
|
xfree(ptr);
|
|
21
29
|
}
|
|
22
30
|
|
|
@@ -27,61 +35,65 @@ mkr_node_memsize(const void *ptr)
|
|
|
27
35
|
return sizeof(mkr_node_data_t);
|
|
28
36
|
}
|
|
29
37
|
|
|
38
|
+
/* HTML and XML nodes share the mkr_node_data_t layout (node pointer + keepalive
|
|
39
|
+
* Document) and the same GC functions, but are wrapped under DISTINCT TypedData
|
|
40
|
+
* types so the representation is checked by Ruby's own type machinery: an HTML
|
|
41
|
+
* accessor (mkr_html_node_unwrap, via mkr_html_node_type) raises TypeError when
|
|
42
|
+
* handed an XML node and vice versa - it is structurally impossible to read one
|
|
43
|
+
* representation's pointer as the other's. mkr_node_type is the shared base (both
|
|
44
|
+
* derive from it via .parent), so the kind-agnostic identity accessors accept
|
|
45
|
+
* either. This is the single source of HTML/XML node-pointer safety; there is no
|
|
46
|
+
* ambiguous "return an lxb_dom_node_t for any node" unwrap. */
|
|
30
47
|
const rb_data_type_t mkr_node_type = {
|
|
31
48
|
"Makiri::Node",
|
|
32
49
|
{ mkr_node_gc_mark, mkr_node_gc_free, mkr_node_memsize, },
|
|
33
50
|
0, 0, RUBY_TYPED_FREE_IMMEDIATELY,
|
|
34
51
|
};
|
|
52
|
+
const rb_data_type_t mkr_html_node_type = {
|
|
53
|
+
"Makiri::HTML::Node",
|
|
54
|
+
{ mkr_node_gc_mark, mkr_node_gc_free, mkr_node_memsize, },
|
|
55
|
+
&mkr_node_type, 0, RUBY_TYPED_FREE_IMMEDIATELY,
|
|
56
|
+
};
|
|
57
|
+
const rb_data_type_t mkr_xml_node_type = {
|
|
58
|
+
"Makiri::XML::Node",
|
|
59
|
+
{ mkr_node_gc_mark, mkr_node_gc_free, mkr_node_memsize, },
|
|
60
|
+
&mkr_node_type, 0, RUBY_TYPED_FREE_IMMEDIATELY,
|
|
61
|
+
};
|
|
35
62
|
|
|
36
63
|
/* ------------------------------------------------------------------ */
|
|
37
|
-
/*
|
|
64
|
+
/* kind-agnostic accessors (identity / document) */
|
|
38
65
|
/* ------------------------------------------------------------------ */
|
|
39
66
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if (node->type == LXB_DOM_NODE_TYPE_DOCUMENT) {
|
|
49
|
-
return document;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
VALUE klass;
|
|
53
|
-
switch (node->type) {
|
|
54
|
-
case LXB_DOM_NODE_TYPE_ELEMENT: klass = mkr_cElement; break;
|
|
55
|
-
case LXB_DOM_NODE_TYPE_ATTRIBUTE: klass = mkr_cAttribute; break;
|
|
56
|
-
case LXB_DOM_NODE_TYPE_TEXT: klass = mkr_cText; break;
|
|
57
|
-
case LXB_DOM_NODE_TYPE_COMMENT: klass = mkr_cComment; break;
|
|
58
|
-
case LXB_DOM_NODE_TYPE_CDATA_SECTION: klass = mkr_cCData; break;
|
|
59
|
-
case LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION:
|
|
60
|
-
klass = mkr_cProcessingInstruction; break;
|
|
61
|
-
case LXB_DOM_NODE_TYPE_DOCUMENT_TYPE: klass = mkr_cDocumentType; break;
|
|
62
|
-
case LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT:
|
|
63
|
-
klass = mkr_cDocumentFragment; break;
|
|
64
|
-
default: klass = mkr_cNode; break;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
mkr_node_data_t *nd;
|
|
68
|
-
VALUE obj = TypedData_Make_Struct(klass, mkr_node_data_t, &mkr_node_type, nd);
|
|
69
|
-
nd->node = node;
|
|
70
|
-
nd->document = document;
|
|
71
|
-
return obj;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
lxb_dom_node_t *
|
|
75
|
-
mkr_node_unwrap(VALUE rb_node)
|
|
67
|
+
/* The kind-AGNOSTIC raw node pointer (base mkr_node_type, accepts HTML or XML),
|
|
68
|
+
* as an opaque void* - dereferencing it requires an explicit cast, so it cannot be
|
|
69
|
+
* mistaken for a typed pointer. Only for the few sites where the representation is
|
|
70
|
+
* either irrelevant (identity comparison) or already guaranteed by an external
|
|
71
|
+
* same-document/kind check (the XPath context node). The Document branch is
|
|
72
|
+
* kind-aware (XML Document -> its arena document node, HTML -> the Lexbor one). */
|
|
73
|
+
void *
|
|
74
|
+
mkr_node_raw(VALUE rb_node)
|
|
76
75
|
{
|
|
77
76
|
if (rb_obj_is_kind_of(rb_node, mkr_cDocument)) {
|
|
78
|
-
|
|
77
|
+
mkr_parsed_t *parsed = mkr_doc_parsed(rb_node);
|
|
78
|
+
if (mkr_parsed_kind(parsed) == MKR_DOC_XML) {
|
|
79
|
+
mkr_xml_doc_t *xdoc = mkr_parsed_xml_doc(parsed);
|
|
80
|
+
return xdoc ? (void *)xdoc->doc_node : NULL;
|
|
81
|
+
}
|
|
82
|
+
return (void *)mkr_html_doc_unwrap(rb_node);
|
|
79
83
|
}
|
|
80
84
|
mkr_node_data_t *nd;
|
|
81
85
|
TypedData_Get_Struct(rb_node, mkr_node_data_t, &mkr_node_type, nd);
|
|
82
86
|
return nd->node;
|
|
83
87
|
}
|
|
84
88
|
|
|
89
|
+
/* Node identity as an integer, for #==/#eql?/#hash/#pointer_id - kind-agnostic and
|
|
90
|
+
* never dereferenced. */
|
|
91
|
+
uintptr_t
|
|
92
|
+
mkr_node_id(VALUE rb_node)
|
|
93
|
+
{
|
|
94
|
+
return (uintptr_t)mkr_node_raw(rb_node);
|
|
95
|
+
}
|
|
96
|
+
|
|
85
97
|
VALUE
|
|
86
98
|
mkr_node_document(VALUE rb_node)
|
|
87
99
|
{
|
|
@@ -92,709 +104,3 @@ mkr_node_document(VALUE rb_node)
|
|
|
92
104
|
TypedData_Get_Struct(rb_node, mkr_node_data_t, &mkr_node_type, nd);
|
|
93
105
|
return nd->document;
|
|
94
106
|
}
|
|
95
|
-
|
|
96
|
-
/* ------------------------------------------------------------------ */
|
|
97
|
-
/* name / type / content */
|
|
98
|
-
/* ------------------------------------------------------------------ */
|
|
99
|
-
|
|
100
|
-
/*
|
|
101
|
-
* Node name. Matches Nokogiri: lowercase tag name for HTML elements
|
|
102
|
-
* (Lexbor lowercases during tokenization), and the un-prefixed DOM names
|
|
103
|
-
* "text"/"comment"/"#cdata-section"/"document" for the other kinds.
|
|
104
|
-
*/
|
|
105
|
-
static VALUE
|
|
106
|
-
mkr_node_name(VALUE self)
|
|
107
|
-
{
|
|
108
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
109
|
-
size_t len = 0;
|
|
110
|
-
const lxb_char_t *name;
|
|
111
|
-
|
|
112
|
-
switch (node->type) {
|
|
113
|
-
case LXB_DOM_NODE_TYPE_ELEMENT:
|
|
114
|
-
name = lxb_dom_element_qualified_name(lxb_dom_interface_element(node), &len);
|
|
115
|
-
return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
|
|
116
|
-
case LXB_DOM_NODE_TYPE_ATTRIBUTE:
|
|
117
|
-
name = lxb_dom_attr_qualified_name(lxb_dom_interface_attr(node), &len);
|
|
118
|
-
return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
|
|
119
|
-
case LXB_DOM_NODE_TYPE_TEXT:
|
|
120
|
-
return rb_utf8_str_new_cstr("text");
|
|
121
|
-
case LXB_DOM_NODE_TYPE_COMMENT:
|
|
122
|
-
return rb_utf8_str_new_cstr("comment");
|
|
123
|
-
case LXB_DOM_NODE_TYPE_CDATA_SECTION:
|
|
124
|
-
return rb_utf8_str_new_cstr("#cdata-section");
|
|
125
|
-
case LXB_DOM_NODE_TYPE_DOCUMENT:
|
|
126
|
-
return rb_utf8_str_new_cstr("document");
|
|
127
|
-
default:
|
|
128
|
-
name = lxb_dom_node_name(node, &len);
|
|
129
|
-
return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
/* ------------------------------------------------------------------ */
|
|
134
|
-
/* namespace (WHATWG DOM Element/Attr: namespaceURI/prefix/localName) */
|
|
135
|
-
/* ------------------------------------------------------------------ */
|
|
136
|
-
|
|
137
|
-
/*
|
|
138
|
-
* Local name (DOM `localName`): the name without any prefix — "div" for
|
|
139
|
-
* <div>, "path" for an SVG <path>, "href" for an xlink:href attribute.
|
|
140
|
-
* Defined on Element and Attribute only; nil for the other node kinds (the DOM
|
|
141
|
-
* gives a Text/Comment/Document no localName).
|
|
142
|
-
*/
|
|
143
|
-
static VALUE
|
|
144
|
-
mkr_node_local_name(VALUE self)
|
|
145
|
-
{
|
|
146
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
147
|
-
size_t len = 0;
|
|
148
|
-
const lxb_char_t *name;
|
|
149
|
-
|
|
150
|
-
switch (node->type) {
|
|
151
|
-
case LXB_DOM_NODE_TYPE_ELEMENT:
|
|
152
|
-
name = lxb_dom_element_local_name(lxb_dom_interface_element(node), &len);
|
|
153
|
-
break;
|
|
154
|
-
case LXB_DOM_NODE_TYPE_ATTRIBUTE:
|
|
155
|
-
name = lxb_dom_attr_local_name(lxb_dom_interface_attr(node), &len);
|
|
156
|
-
break;
|
|
157
|
-
default:
|
|
158
|
-
return Qnil;
|
|
159
|
-
}
|
|
160
|
-
return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
/*
|
|
164
|
-
* Namespace prefix (DOM `prefix`): nil unless the qualified name is
|
|
165
|
-
* `prefix:local` — typically nil for HTML5-parsed content. Derived from the
|
|
166
|
-
* qualified-vs-local length (qualified == prefix ":" local), so a colon inside
|
|
167
|
-
* a local name can't be mistaken for a separator. Element/Attribute only.
|
|
168
|
-
*/
|
|
169
|
-
static VALUE
|
|
170
|
-
mkr_node_prefix(VALUE self)
|
|
171
|
-
{
|
|
172
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
173
|
-
const lxb_char_t *q = NULL;
|
|
174
|
-
size_t qlen = 0, llen = 0;
|
|
175
|
-
|
|
176
|
-
switch (node->type) {
|
|
177
|
-
case LXB_DOM_NODE_TYPE_ELEMENT: {
|
|
178
|
-
lxb_dom_element_t *el = lxb_dom_interface_element(node);
|
|
179
|
-
q = lxb_dom_element_qualified_name(el, &qlen);
|
|
180
|
-
(void) lxb_dom_element_local_name(el, &llen);
|
|
181
|
-
break;
|
|
182
|
-
}
|
|
183
|
-
case LXB_DOM_NODE_TYPE_ATTRIBUTE: {
|
|
184
|
-
lxb_dom_attr_t *at = lxb_dom_interface_attr(node);
|
|
185
|
-
q = lxb_dom_attr_qualified_name(at, &qlen);
|
|
186
|
-
(void) lxb_dom_attr_local_name(at, &llen);
|
|
187
|
-
break;
|
|
188
|
-
}
|
|
189
|
-
default:
|
|
190
|
-
return Qnil;
|
|
191
|
-
}
|
|
192
|
-
if (q == NULL || qlen <= llen + 1) { /* no "prefix:" segment */
|
|
193
|
-
return Qnil;
|
|
194
|
-
}
|
|
195
|
-
return mkr_ruby_str_from_borrowed(
|
|
196
|
-
mkr_borrowed_text((const char *)q, qlen - llen - 1));
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
/*
|
|
200
|
-
* The fixed namespaces the HTML parser assigns to foreign-content attributes by
|
|
201
|
-
* prefix (the "adjust foreign attributes" step). Lexbor tags an attribute node
|
|
202
|
-
* with its *element's* ns rather than the attribute's own, so an attribute's
|
|
203
|
-
* namespaceURI is resolved from its prefix here, not from node->ns. Returns
|
|
204
|
-
* NULL (=> DOM null) for any other prefix.
|
|
205
|
-
*/
|
|
206
|
-
static const char *
|
|
207
|
-
mkr_attr_ns_for_prefix(const char *p, size_t n)
|
|
208
|
-
{
|
|
209
|
-
if (n == 5 && memcmp(p, "xlink", 5) == 0) return "http://www.w3.org/1999/xlink";
|
|
210
|
-
if (n == 3 && memcmp(p, "xml", 3) == 0) return "http://www.w3.org/XML/1998/namespace";
|
|
211
|
-
if (n == 5 && memcmp(p, "xmlns", 5) == 0) return "http://www.w3.org/2000/xmlns/";
|
|
212
|
-
return NULL;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
/*
|
|
216
|
-
* Namespace URI (DOM `namespaceURI`).
|
|
217
|
-
*
|
|
218
|
-
* Element: resolved from node->ns, so — DOM-faithfully — an HTML element is in
|
|
219
|
-
* the XHTML namespace ("http://www.w3.org/1999/xhtml"), not nil (an HTML
|
|
220
|
-
* element is never namespaceless; this is what browsers' DOM and `namespace-uri()`
|
|
221
|
-
* return). SVG/MathML elements get their own URI; nil only when truly
|
|
222
|
-
* unnamespaced (LXB_NS__UNDEF).
|
|
223
|
-
*
|
|
224
|
-
* Attribute: nil for an unprefixed attribute (class, id, ...); for a prefixed
|
|
225
|
-
* one, the parser-assigned foreign-content namespace keyed on the prefix
|
|
226
|
-
* (xlink/xml/xmlns), else nil.
|
|
227
|
-
*
|
|
228
|
-
* Other node kinds: nil.
|
|
229
|
-
*/
|
|
230
|
-
static VALUE
|
|
231
|
-
mkr_node_namespace_uri(VALUE self)
|
|
232
|
-
{
|
|
233
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
234
|
-
|
|
235
|
-
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
236
|
-
if (node->ns == LXB_NS__UNDEF) {
|
|
237
|
-
return Qnil;
|
|
238
|
-
}
|
|
239
|
-
lxb_dom_document_t *doc = node->owner_document;
|
|
240
|
-
if (doc == NULL || doc->ns == NULL) {
|
|
241
|
-
return Qnil;
|
|
242
|
-
}
|
|
243
|
-
size_t len = 0;
|
|
244
|
-
const lxb_char_t *uri = lxb_ns_by_id(doc->ns, node->ns, &len);
|
|
245
|
-
if (uri == NULL || len == 0) {
|
|
246
|
-
return Qnil;
|
|
247
|
-
}
|
|
248
|
-
return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)uri, len));
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
|
|
252
|
-
lxb_dom_attr_t *at = lxb_dom_interface_attr(node);
|
|
253
|
-
size_t qlen = 0, llen = 0;
|
|
254
|
-
const lxb_char_t *q = lxb_dom_attr_qualified_name(at, &qlen);
|
|
255
|
-
(void) lxb_dom_attr_local_name(at, &llen);
|
|
256
|
-
if (q == NULL || qlen <= llen + 1) {
|
|
257
|
-
return Qnil; /* unprefixed attribute => no namespace */
|
|
258
|
-
}
|
|
259
|
-
const char *uri = mkr_attr_ns_for_prefix((const char *)q, qlen - llen - 1);
|
|
260
|
-
return uri ? rb_utf8_str_new_cstr(uri) : Qnil;
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
return Qnil;
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
/*
|
|
267
|
-
* Element#tag_name (DOM `tagName`): the qualified name, uppercased for an HTML
|
|
268
|
-
* element in an HTML document ("DIV"), as the DOM specifies — unlike #name,
|
|
269
|
-
* which is the lowercase qualified name. SVG/MathML elements keep their case.
|
|
270
|
-
* nil for non-element nodes.
|
|
271
|
-
*/
|
|
272
|
-
static VALUE
|
|
273
|
-
mkr_node_tag_name(VALUE self)
|
|
274
|
-
{
|
|
275
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
276
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
277
|
-
return Qnil;
|
|
278
|
-
}
|
|
279
|
-
size_t len = 0;
|
|
280
|
-
const lxb_char_t *name =
|
|
281
|
-
lxb_dom_element_tag_name(lxb_dom_interface_element(node), &len);
|
|
282
|
-
if (name == NULL) {
|
|
283
|
-
return Qnil;
|
|
284
|
-
}
|
|
285
|
-
return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
/*
|
|
289
|
-
* ProcessingInstruction#target (DOM `target`): the PI's target name
|
|
290
|
-
* (the "xml" in <?xml ...?>). nil for non-PI nodes. The PI's data is read via
|
|
291
|
-
* #content / #text like any character-data node.
|
|
292
|
-
*/
|
|
293
|
-
static VALUE
|
|
294
|
-
mkr_node_pi_target(VALUE self)
|
|
295
|
-
{
|
|
296
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
297
|
-
if (node->type != LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) {
|
|
298
|
-
return Qnil;
|
|
299
|
-
}
|
|
300
|
-
size_t len = 0;
|
|
301
|
-
const lxb_char_t *t = lxb_dom_processing_instruction_target(
|
|
302
|
-
lxb_dom_interface_processing_instruction(node), &len);
|
|
303
|
-
return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)t, len));
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
/* Numeric DOM node type (LXB_DOM_NODE_TYPE_*). */
|
|
307
|
-
static VALUE
|
|
308
|
-
mkr_node_get_type(VALUE self)
|
|
309
|
-
{
|
|
310
|
-
return INT2NUM((int)mkr_node_unwrap(self)->type);
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
/*
|
|
314
|
-
* DocumentType public / system identifiers (WHATWG DOM `publicId`/`systemId`).
|
|
315
|
-
* Returns the String, or nil when the doctype carries no such identifier.
|
|
316
|
-
* Lexbor represents a missing id inconsistently (NULL after `SYSTEM`, but an
|
|
317
|
-
* empty string for a bare `<!DOCTYPE html>`), so we treat empty as absent and
|
|
318
|
-
* return nil for both — matching Nokogiri (which also reports nil for an empty
|
|
319
|
-
* or missing id). Defined only on Makiri::DocumentType, so the receiver is
|
|
320
|
-
* always a doctype node; the guard is belt-and-suspenders.
|
|
321
|
-
*/
|
|
322
|
-
static VALUE
|
|
323
|
-
mkr_doctype_id(VALUE self, int system)
|
|
324
|
-
{
|
|
325
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
326
|
-
if (node->type != LXB_DOM_NODE_TYPE_DOCUMENT_TYPE) {
|
|
327
|
-
return Qnil;
|
|
328
|
-
}
|
|
329
|
-
lxb_dom_document_type_t *dt = lxb_dom_interface_document_type(node);
|
|
330
|
-
size_t len = 0;
|
|
331
|
-
const lxb_char_t *id = system ? lxb_dom_document_type_system_id(dt, &len)
|
|
332
|
-
: lxb_dom_document_type_public_id(dt, &len);
|
|
333
|
-
return (id == NULL || len == 0)
|
|
334
|
-
? Qnil
|
|
335
|
-
: mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)id, len));
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
static VALUE
|
|
339
|
-
mkr_doctype_public_id(VALUE self)
|
|
340
|
-
{
|
|
341
|
-
return mkr_doctype_id(self, 0);
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
static VALUE
|
|
345
|
-
mkr_doctype_system_id(VALUE self)
|
|
346
|
-
{
|
|
347
|
-
return mkr_doctype_id(self, 1);
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
/*
|
|
351
|
-
* A <template> element's "template contents" — the separate DocumentFragment
|
|
352
|
-
* the HTML parser fills instead of making the parsed nodes children of the
|
|
353
|
-
* <template> (WHATWG DOM `HTMLTemplateElement.content`; browsers behave the
|
|
354
|
-
* same: template.children is empty, template.content holds the nodes). Lexbor
|
|
355
|
-
* stores it on the template interface; we surface it as a Makiri::DocumentFragment
|
|
356
|
-
* so it can be traversed/queried (`tpl.content_fragment.css("p")`).
|
|
357
|
-
*
|
|
358
|
-
* Returns nil for any node that is not an HTML <template>. Note: CSS/XPath over
|
|
359
|
-
* the *template element itself* deliberately do NOT descend into the content
|
|
360
|
-
* (matching the DOM, and unavoidable for CSS since it runs Lexbor's selector
|
|
361
|
-
* engine over the real tree) — query the fragment instead.
|
|
362
|
-
*/
|
|
363
|
-
static VALUE
|
|
364
|
-
mkr_node_content_fragment(VALUE self)
|
|
365
|
-
{
|
|
366
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
367
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT
|
|
368
|
-
|| node->local_name != LXB_TAG_TEMPLATE
|
|
369
|
-
|| node->ns != LXB_NS_HTML) {
|
|
370
|
-
return Qnil;
|
|
371
|
-
}
|
|
372
|
-
lxb_dom_document_fragment_t *content = lxb_html_interface_template(node)->content;
|
|
373
|
-
if (content == NULL) {
|
|
374
|
-
return Qnil;
|
|
375
|
-
}
|
|
376
|
-
return mkr_wrap_node((lxb_dom_node_t *)content, mkr_node_document(self));
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
/* Concatenated text content of this node and its descendants. The DOM spec
|
|
380
|
-
* makes a Document's textContent null; we instead return the text of the root
|
|
381
|
-
* element (matching the intuitive, Nokogiri-like Document#text). */
|
|
382
|
-
static VALUE
|
|
383
|
-
mkr_node_content(VALUE self)
|
|
384
|
-
{
|
|
385
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
386
|
-
if (node->type == LXB_DOM_NODE_TYPE_DOCUMENT) {
|
|
387
|
-
node = lxb_dom_document_root((lxb_dom_document_t *)node);
|
|
388
|
-
if (node == NULL) {
|
|
389
|
-
return rb_utf8_str_new("", 0);
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
/* Fast path for elements / fragments (the common case, incl. document text).
|
|
394
|
-
*
|
|
395
|
-
* Preferred: the per-document text index (lexbor_compat/text_index.c) maps
|
|
396
|
-
* this node to the contiguous, document-order run of its descendants' text
|
|
397
|
-
* slices, so we serve a single pre-sized memcpy run with no per-extraction
|
|
398
|
-
* tree walk — the walk is otherwise the dominant, cache-bound cost. Built
|
|
399
|
-
* lazily on first use and dropped on any mutation, so a slice can never
|
|
400
|
-
* point at reallocated/detached storage.
|
|
401
|
-
*
|
|
402
|
-
* Fallback (index unavailable — node outside the indexed tree, e.g. a
|
|
403
|
-
* fragment, or a build OOM): stream each descendant text/CDATA node's data
|
|
404
|
-
* straight into the Ruby string via an iterative pre-order walk (stack-safe;
|
|
405
|
-
* skips Lexbor's intermediate arena buffer + copy). */
|
|
406
|
-
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT
|
|
407
|
-
|| node->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
|
|
408
|
-
mkr_parsed_t *parsed = mkr_doc_parsed(mkr_node_document(self));
|
|
409
|
-
const mkr_borrowed_text_t *slices;
|
|
410
|
-
size_t nslices, total;
|
|
411
|
-
if (parsed != NULL
|
|
412
|
-
&& mkr_parsed_text_slices(parsed, node, &slices, &nslices, &total)) {
|
|
413
|
-
return mkr_ruby_str_from_slices(slices, nslices, total);
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
VALUE str = rb_utf8_str_new(NULL, 0);
|
|
417
|
-
for (lxb_dom_node_t *n = node->first_child; n != NULL;) {
|
|
418
|
-
if (n->type == LXB_DOM_NODE_TYPE_TEXT
|
|
419
|
-
|| n->type == LXB_DOM_NODE_TYPE_CDATA_SECTION) {
|
|
420
|
-
const lexbor_str_t *d = &lxb_dom_interface_character_data(n)->data;
|
|
421
|
-
if (d->data != NULL && d->length != 0) {
|
|
422
|
-
rb_str_cat(str, (const char *)d->data, (long)d->length);
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
if (n->first_child != NULL) { n = n->first_child; continue; }
|
|
426
|
-
while (n != node && n->next == NULL) { n = n->parent; }
|
|
427
|
-
if (n == node) { break; }
|
|
428
|
-
n = n->next;
|
|
429
|
-
}
|
|
430
|
-
return str;
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
/* Character-data and other node kinds keep the general (proven) path. */
|
|
434
|
-
size_t len = 0;
|
|
435
|
-
lxb_char_t *text = lxb_dom_node_text_content(node, &len);
|
|
436
|
-
if (text == NULL) {
|
|
437
|
-
return rb_utf8_str_new("", 0);
|
|
438
|
-
}
|
|
439
|
-
VALUE str = rb_utf8_str_new((const char *)text, len);
|
|
440
|
-
lxb_dom_document_destroy_text(node->owner_document, text);
|
|
441
|
-
return str;
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
/* ------------------------------------------------------------------ */
|
|
445
|
-
/* tree navigation */
|
|
446
|
-
/* ------------------------------------------------------------------ */
|
|
447
|
-
|
|
448
|
-
static VALUE
|
|
449
|
-
mkr_node_get_document(VALUE self)
|
|
450
|
-
{
|
|
451
|
-
return mkr_node_document(self);
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
static VALUE
|
|
455
|
-
mkr_node_parent(VALUE self)
|
|
456
|
-
{
|
|
457
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
458
|
-
VALUE document = mkr_node_document(self);
|
|
459
|
-
|
|
460
|
-
/* Lexbor never links an attribute back to its element, so node->parent is
|
|
461
|
-
* NULL for attributes. Resolve via the compat attr->owner index. */
|
|
462
|
-
if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
|
|
463
|
-
lxb_dom_node_t *owner =
|
|
464
|
-
mkr_parsed_attr_owner(mkr_doc_parsed(document),
|
|
465
|
-
lxb_dom_interface_attr(node));
|
|
466
|
-
return mkr_wrap_node(owner, document);
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
return mkr_wrap_node(node->parent, document);
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
static VALUE
|
|
473
|
-
mkr_node_next(VALUE self)
|
|
474
|
-
{
|
|
475
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
476
|
-
return mkr_wrap_node(node->next, mkr_node_document(self));
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
static VALUE
|
|
480
|
-
mkr_node_previous(VALUE self)
|
|
481
|
-
{
|
|
482
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
483
|
-
return mkr_wrap_node(node->prev, mkr_node_document(self));
|
|
484
|
-
}
|
|
485
|
-
|
|
486
|
-
static VALUE
|
|
487
|
-
mkr_node_next_element(VALUE self)
|
|
488
|
-
{
|
|
489
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self)->next;
|
|
490
|
-
while (node != NULL && node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
491
|
-
node = node->next;
|
|
492
|
-
}
|
|
493
|
-
return mkr_wrap_node(node, mkr_node_document(self));
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
static VALUE
|
|
497
|
-
mkr_node_previous_element(VALUE self)
|
|
498
|
-
{
|
|
499
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self)->prev;
|
|
500
|
-
while (node != NULL && node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
501
|
-
node = node->prev;
|
|
502
|
-
}
|
|
503
|
-
return mkr_wrap_node(node, mkr_node_document(self));
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
/* First child node (any type), or nil. */
|
|
507
|
-
static VALUE
|
|
508
|
-
mkr_node_child(VALUE self)
|
|
509
|
-
{
|
|
510
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
511
|
-
return mkr_wrap_node(node->first_child, mkr_node_document(self));
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
/* All child nodes as a NodeSet. */
|
|
515
|
-
static VALUE
|
|
516
|
-
mkr_node_children(VALUE self)
|
|
517
|
-
{
|
|
518
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
519
|
-
VALUE document = mkr_node_document(self);
|
|
520
|
-
VALUE set = mkr_node_set_new(document);
|
|
521
|
-
for (lxb_dom_node_t *c = node->first_child; c != NULL; c = c->next) {
|
|
522
|
-
mkr_node_set_push(set, c);
|
|
523
|
-
}
|
|
524
|
-
return set;
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
/* Child elements only, as a NodeSet. */
|
|
528
|
-
static VALUE
|
|
529
|
-
mkr_node_element_children(VALUE self)
|
|
530
|
-
{
|
|
531
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
532
|
-
VALUE document = mkr_node_document(self);
|
|
533
|
-
VALUE set = mkr_node_set_new(document);
|
|
534
|
-
for (lxb_dom_node_t *c = node->first_child; c != NULL; c = c->next) {
|
|
535
|
-
if (c->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
536
|
-
mkr_node_set_push(set, c);
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
return set;
|
|
540
|
-
}
|
|
541
|
-
|
|
542
|
-
/* Ancestor elements, nearest first (parent, grandparent, ... root). */
|
|
543
|
-
static VALUE
|
|
544
|
-
mkr_node_ancestors(VALUE self)
|
|
545
|
-
{
|
|
546
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
547
|
-
VALUE document = mkr_node_document(self);
|
|
548
|
-
VALUE set = mkr_node_set_new(document);
|
|
549
|
-
for (lxb_dom_node_t *p = node->parent; p != NULL; p = p->parent) {
|
|
550
|
-
if (p->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
551
|
-
mkr_node_set_push(set, p);
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
return set;
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
static VALUE
|
|
558
|
-
mkr_node_first_element_child(VALUE self)
|
|
559
|
-
{
|
|
560
|
-
lxb_dom_node_t *c = mkr_node_unwrap(self)->first_child;
|
|
561
|
-
while (c != NULL && c->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
562
|
-
c = c->next;
|
|
563
|
-
}
|
|
564
|
-
return mkr_wrap_node(c, mkr_node_document(self));
|
|
565
|
-
}
|
|
566
|
-
|
|
567
|
-
static VALUE
|
|
568
|
-
mkr_node_last_element_child(VALUE self)
|
|
569
|
-
{
|
|
570
|
-
lxb_dom_node_t *c = mkr_node_unwrap(self)->last_child;
|
|
571
|
-
while (c != NULL && c->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
572
|
-
c = c->prev;
|
|
573
|
-
}
|
|
574
|
-
return mkr_wrap_node(c, mkr_node_document(self));
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
/* ------------------------------------------------------------------ */
|
|
578
|
-
/* attributes (read-only) */
|
|
579
|
-
/* ------------------------------------------------------------------ */
|
|
580
|
-
|
|
581
|
-
/* node[name] -> String or nil (nil when not an element or absent). */
|
|
582
|
-
static VALUE
|
|
583
|
-
mkr_node_aref(VALUE self, VALUE rb_name)
|
|
584
|
-
{
|
|
585
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
586
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
587
|
-
return Qnil;
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "attribute name");
|
|
591
|
-
const lxb_char_t *nm = (const lxb_char_t *)nv.ptr;
|
|
592
|
-
size_t nlen = nv.len;
|
|
593
|
-
|
|
594
|
-
lxb_dom_element_t *el = lxb_dom_interface_element(node);
|
|
595
|
-
if (!lxb_dom_element_has_attribute(el, nm, nlen)) {
|
|
596
|
-
return Qnil;
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
size_t vlen = 0;
|
|
600
|
-
const lxb_char_t *val = lxb_dom_element_get_attribute(el, nm, nlen, &vlen);
|
|
601
|
-
RB_GC_GUARD(nv.value);
|
|
602
|
-
return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)val, vlen));
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
/* node.key?(name) -> true/false */
|
|
606
|
-
static VALUE
|
|
607
|
-
mkr_node_has_key(VALUE self, VALUE rb_name)
|
|
608
|
-
{
|
|
609
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
610
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
611
|
-
return Qfalse;
|
|
612
|
-
}
|
|
613
|
-
mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "attribute name");
|
|
614
|
-
lxb_dom_element_t *el = lxb_dom_interface_element(node);
|
|
615
|
-
bool has = lxb_dom_element_has_attribute(el, (const lxb_char_t *)nv.ptr, nv.len);
|
|
616
|
-
RB_GC_GUARD(nv.value);
|
|
617
|
-
return has ? Qtrue : Qfalse;
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
/* node.keys -> [String, ...] of attribute names (document order). */
|
|
621
|
-
static VALUE
|
|
622
|
-
mkr_node_keys(VALUE self)
|
|
623
|
-
{
|
|
624
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
625
|
-
VALUE ary = rb_ary_new();
|
|
626
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
627
|
-
return ary;
|
|
628
|
-
}
|
|
629
|
-
lxb_dom_attr_t *attr =
|
|
630
|
-
lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
|
631
|
-
while (attr != NULL) {
|
|
632
|
-
size_t len = 0;
|
|
633
|
-
const lxb_char_t *name = lxb_dom_attr_qualified_name(attr, &len);
|
|
634
|
-
rb_ary_push(ary, mkr_ruby_str_from_borrowed(
|
|
635
|
-
mkr_borrowed_text((const char *)name, len)));
|
|
636
|
-
attr = lxb_dom_element_next_attribute(attr);
|
|
637
|
-
}
|
|
638
|
-
return ary;
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
/* node.values -> [String, ...] of attribute values (document order). */
|
|
642
|
-
static VALUE
|
|
643
|
-
mkr_node_values(VALUE self)
|
|
644
|
-
{
|
|
645
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
646
|
-
VALUE ary = rb_ary_new();
|
|
647
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
648
|
-
return ary;
|
|
649
|
-
}
|
|
650
|
-
lxb_dom_attr_t *attr =
|
|
651
|
-
lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
|
652
|
-
while (attr != NULL) {
|
|
653
|
-
size_t len = 0;
|
|
654
|
-
const lxb_char_t *val = lxb_dom_attr_value(attr, &len);
|
|
655
|
-
rb_ary_push(ary, mkr_ruby_str_from_borrowed(
|
|
656
|
-
mkr_borrowed_text((const char *)val, len)));
|
|
657
|
-
attr = lxb_dom_element_next_attribute(attr);
|
|
658
|
-
}
|
|
659
|
-
return ary;
|
|
660
|
-
}
|
|
661
|
-
|
|
662
|
-
/* element.attribute_nodes -> NodeSet of Attribute nodes (document order).
|
|
663
|
-
* Empty for non-elements. These wrap the bare lxb_dom_attr_t; navigating back
|
|
664
|
-
* with Attribute#parent goes through the compat attr->owner index. */
|
|
665
|
-
static VALUE
|
|
666
|
-
mkr_node_attribute_nodes(VALUE self)
|
|
667
|
-
{
|
|
668
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
669
|
-
VALUE document = mkr_node_document(self);
|
|
670
|
-
VALUE set = mkr_node_set_new(document);
|
|
671
|
-
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
|
672
|
-
return set;
|
|
673
|
-
}
|
|
674
|
-
lxb_dom_attr_t *attr =
|
|
675
|
-
lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
|
|
676
|
-
while (attr != NULL) {
|
|
677
|
-
mkr_node_set_push(set, lxb_dom_interface_node(attr));
|
|
678
|
-
attr = lxb_dom_element_next_attribute(attr);
|
|
679
|
-
}
|
|
680
|
-
return set;
|
|
681
|
-
}
|
|
682
|
-
|
|
683
|
-
/* attr.value -> the attribute's value String. For non-attribute nodes, falls
|
|
684
|
-
* back to text content (matching the loose Nokogiri-ish meaning of #value). */
|
|
685
|
-
static VALUE
|
|
686
|
-
mkr_node_value(VALUE self)
|
|
687
|
-
{
|
|
688
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
689
|
-
if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
|
|
690
|
-
size_t len = 0;
|
|
691
|
-
const lxb_char_t *val =
|
|
692
|
-
lxb_dom_attr_value(lxb_dom_interface_attr(node), &len);
|
|
693
|
-
return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)val, len));
|
|
694
|
-
}
|
|
695
|
-
return mkr_node_content(self);
|
|
696
|
-
}
|
|
697
|
-
|
|
698
|
-
/* node.line -> 1-based source line, or nil when unknown.
|
|
699
|
-
*
|
|
700
|
-
* The line comes from the byte offset stamped onto the node at parse time
|
|
701
|
-
* (source-location tracking) resolved against the document's line table.
|
|
702
|
-
* Returns nil for nodes the tracker could not place (e.g. parser-inserted
|
|
703
|
-
* implicit <html>/<head>/<body>, or any node when tracking was disabled). */
|
|
704
|
-
static VALUE
|
|
705
|
-
mkr_node_line(VALUE self)
|
|
706
|
-
{
|
|
707
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
708
|
-
mkr_parsed_t *p = mkr_doc_parsed(mkr_node_document(self));
|
|
709
|
-
size_t line = mkr_parsed_node_line(p, node);
|
|
710
|
-
return line == 0 ? Qnil : ULONG2NUM(line);
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
/* ------------------------------------------------------------------ */
|
|
714
|
-
/* identity */
|
|
715
|
-
/* ------------------------------------------------------------------ */
|
|
716
|
-
|
|
717
|
-
/* Pointer identity: equal iff both wrap the same lxb_dom_node_t. */
|
|
718
|
-
static VALUE
|
|
719
|
-
mkr_node_equals(VALUE self, VALUE other)
|
|
720
|
-
{
|
|
721
|
-
if (!rb_obj_is_kind_of(other, mkr_cNode)) {
|
|
722
|
-
return Qfalse;
|
|
723
|
-
}
|
|
724
|
-
return mkr_node_unwrap(self) == mkr_node_unwrap(other) ? Qtrue : Qfalse;
|
|
725
|
-
}
|
|
726
|
-
|
|
727
|
-
/* Nokogiri-compatible identity: the underlying lxb_dom_node_t pointer as an
|
|
728
|
-
* Integer. Stable for the node's lifetime and unique among currently-live
|
|
729
|
-
* nodes; a freed-then-reallocated node may reuse an address (same caveat as
|
|
730
|
-
* Nokogiri::XML::Node#pointer_id). a.pointer_id == b.pointer_id iff a.eql?(b). */
|
|
731
|
-
static VALUE
|
|
732
|
-
mkr_node_pointer_id(VALUE self)
|
|
733
|
-
{
|
|
734
|
-
lxb_dom_node_t *node = mkr_node_unwrap(self);
|
|
735
|
-
return ULL2NUM((unsigned long long)(uintptr_t)node);
|
|
736
|
-
}
|
|
737
|
-
|
|
738
|
-
/* Stable hash derived from the node pointer, so a == b implies a.hash ==
|
|
739
|
-
* b.hash even across separately-created wrappers. Shares the pointer value
|
|
740
|
-
* with #pointer_id. */
|
|
741
|
-
static VALUE
|
|
742
|
-
mkr_node_hash(VALUE self)
|
|
743
|
-
{
|
|
744
|
-
return mkr_node_pointer_id(self);
|
|
745
|
-
}
|
|
746
|
-
|
|
747
|
-
void
|
|
748
|
-
mkr_init_node(void)
|
|
749
|
-
{
|
|
750
|
-
rb_define_method(mkr_cNode, "name", mkr_node_name, 0);
|
|
751
|
-
rb_define_method(mkr_cNode, "namespace_uri", mkr_node_namespace_uri, 0);
|
|
752
|
-
rb_define_method(mkr_cNode, "prefix", mkr_node_prefix, 0);
|
|
753
|
-
rb_define_method(mkr_cNode, "local_name", mkr_node_local_name, 0);
|
|
754
|
-
rb_define_method(mkr_cNode, "tag_name", mkr_node_tag_name, 0);
|
|
755
|
-
rb_define_method(mkr_cNode, "target", mkr_node_pi_target, 0);
|
|
756
|
-
rb_define_method(mkr_cNode, "node_type", mkr_node_get_type, 0);
|
|
757
|
-
rb_define_method(mkr_cNode, "content", mkr_node_content, 0);
|
|
758
|
-
rb_define_method(mkr_cNode, "text", mkr_node_content, 0);
|
|
759
|
-
rb_define_method(mkr_cNode, "inner_text", mkr_node_content, 0);
|
|
760
|
-
|
|
761
|
-
rb_define_method(mkr_cNode, "document", mkr_node_get_document, 0);
|
|
762
|
-
rb_define_method(mkr_cNode, "parent", mkr_node_parent, 0);
|
|
763
|
-
rb_define_method(mkr_cNode, "next", mkr_node_next, 0);
|
|
764
|
-
rb_define_method(mkr_cNode, "next_sibling", mkr_node_next, 0);
|
|
765
|
-
rb_define_method(mkr_cNode, "previous", mkr_node_previous, 0);
|
|
766
|
-
rb_define_method(mkr_cNode, "previous_sibling", mkr_node_previous, 0);
|
|
767
|
-
rb_define_method(mkr_cNode, "next_element", mkr_node_next_element, 0);
|
|
768
|
-
rb_define_method(mkr_cNode, "previous_element", mkr_node_previous_element, 0);
|
|
769
|
-
|
|
770
|
-
rb_define_method(mkr_cNode, "child", mkr_node_child, 0);
|
|
771
|
-
rb_define_method(mkr_cNode, "children", mkr_node_children, 0);
|
|
772
|
-
rb_define_method(mkr_cNode, "element_children", mkr_node_element_children, 0);
|
|
773
|
-
rb_define_method(mkr_cNode, "elements", mkr_node_element_children, 0);
|
|
774
|
-
rb_define_method(mkr_cNode, "first_element_child", mkr_node_first_element_child, 0);
|
|
775
|
-
rb_define_method(mkr_cNode, "last_element_child", mkr_node_last_element_child, 0);
|
|
776
|
-
rb_define_method(mkr_cNode, "ancestors", mkr_node_ancestors, 0);
|
|
777
|
-
|
|
778
|
-
rb_define_method(mkr_cNode, "[]", mkr_node_aref, 1);
|
|
779
|
-
rb_define_method(mkr_cNode, "key?", mkr_node_has_key, 1);
|
|
780
|
-
rb_define_method(mkr_cNode, "keys", mkr_node_keys, 0);
|
|
781
|
-
rb_define_method(mkr_cNode, "values", mkr_node_values, 0);
|
|
782
|
-
rb_define_method(mkr_cNode, "attribute_nodes", mkr_node_attribute_nodes, 0);
|
|
783
|
-
rb_define_method(mkr_cNode, "value", mkr_node_value, 0);
|
|
784
|
-
rb_define_method(mkr_cNode, "line", mkr_node_line, 0);
|
|
785
|
-
|
|
786
|
-
rb_define_method(mkr_cNode, "==", mkr_node_equals, 1);
|
|
787
|
-
rb_define_method(mkr_cNode, "eql?", mkr_node_equals, 1);
|
|
788
|
-
rb_define_method(mkr_cNode, "hash", mkr_node_hash, 0);
|
|
789
|
-
rb_define_method(mkr_cNode, "pointer_id", mkr_node_pointer_id, 0);
|
|
790
|
-
rb_define_method(mkr_cNode, "clone_node", mkr_node_clone_node, -1);
|
|
791
|
-
|
|
792
|
-
/* DocumentType identifiers (WHATWG DOM names; external_id is the
|
|
793
|
-
* Nokogiri-compatible alias for public_id). */
|
|
794
|
-
rb_define_method(mkr_cDocumentType, "public_id", mkr_doctype_public_id, 0);
|
|
795
|
-
rb_define_method(mkr_cDocumentType, "external_id", mkr_doctype_public_id, 0);
|
|
796
|
-
rb_define_method(mkr_cDocumentType, "system_id", mkr_doctype_system_id, 0);
|
|
797
|
-
|
|
798
|
-
/* <template> contents (WHATWG DOM HTMLTemplateElement.content). */
|
|
799
|
-
rb_define_method(mkr_cElement, "content_fragment", mkr_node_content_fragment, 0);
|
|
800
|
-
}
|