makiri 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/conformance.yml +22 -0
- data/.github/workflows/libfuzzer.yml +83 -0
- data/.github/workflows/release.yml +12 -7
- data/.github/workflows/security.yml +88 -3
- data/.github/workflows/valgrind.yml +135 -0
- data/CHANGELOG.md +152 -15
- data/README.md +183 -13
- data/Rakefile +294 -7
- data/ext/makiri/bridge/bridge.h +28 -0
- data/ext/makiri/bridge/ruby_string.c +282 -12
- data/ext/makiri/core/mkr_alloc.c +40 -3
- data/ext/makiri/core/mkr_alloc.h +28 -5
- data/ext/makiri/core/mkr_buf.c +47 -3
- data/ext/makiri/core/mkr_buf.h +112 -3
- data/ext/makiri/core/mkr_core.c +143 -0
- data/ext/makiri/core/mkr_core.h +11 -2
- data/ext/makiri/core/mkr_hash.h +1 -1
- data/ext/makiri/core/mkr_span.h +186 -0
- data/ext/makiri/core/mkr_text.h +8 -8
- data/ext/makiri/core/mkr_utf8.c +101 -0
- data/ext/makiri/core/mkr_utf8.h +88 -0
- data/ext/makiri/extconf.rb +123 -10
- data/ext/makiri/fuzz/Makefile +95 -0
- data/ext/makiri/fuzz/check_fuzzer.cc +4 -0
- data/ext/makiri/fuzz/xml_fuzz.c +24 -0
- data/ext/makiri/fuzz/xpath_fuzz.c +109 -0
- data/ext/makiri/glue/glue.h +55 -11
- data/ext/makiri/glue/ruby_doc.c +129 -59
- data/ext/makiri/glue/ruby_html_css.c +292 -0
- data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +248 -52
- data/ext/makiri/glue/ruby_html_node.c +859 -0
- data/ext/makiri/glue/ruby_html_serialize.c +154 -0
- data/ext/makiri/glue/ruby_node.c +74 -729
- data/ext/makiri/glue/ruby_node_set.c +167 -32
- data/ext/makiri/glue/ruby_xml.c +602 -0
- data/ext/makiri/glue/ruby_xml_node.c +1373 -0
- data/ext/makiri/glue/ruby_xpath.c +63 -30
- data/ext/makiri/glue/ruby_xpath.h +19 -0
- data/ext/makiri/lexbor_compat/compat.h +42 -9
- data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
- data/ext/makiri/lexbor_compat/dom_index.c +2 -2
- data/ext/makiri/lexbor_compat/post_parse.c +100 -10
- data/ext/makiri/lexbor_compat/source_loc.c +15 -13
- data/ext/makiri/lexbor_compat/text_index.c +14 -8
- data/ext/makiri/lexbor_compat/utf8_input.c +19 -33
- data/ext/makiri/makiri.c +184 -6
- data/ext/makiri/makiri.h +43 -2
- data/ext/makiri/xml/mkr_xml.h +125 -0
- data/ext/makiri/xml/mkr_xml_chars.c +195 -0
- data/ext/makiri/xml/mkr_xml_index.c +169 -0
- data/ext/makiri/xml/mkr_xml_index.h +48 -0
- data/ext/makiri/xml/mkr_xml_mutate.c +817 -0
- data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
- data/ext/makiri/xml/mkr_xml_node.c +399 -0
- data/ext/makiri/xml/mkr_xml_node.h +184 -0
- data/ext/makiri/xml/mkr_xml_tree.c +1515 -0
- data/ext/makiri/xpath/mkr_css.c +1023 -0
- data/ext/makiri/xpath/mkr_css.h +65 -0
- data/ext/makiri/xpath/mkr_xpath.c +96 -32
- data/ext/makiri/xpath/mkr_xpath.h +109 -4
- data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
- data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
- data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +551 -241
- data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +318 -276
- data/ext/makiri/xpath/mkr_xpath_internal.h +177 -206
- data/ext/makiri/xpath/mkr_xpath_lex.c +95 -125
- data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
- data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +145 -0
- data/ext/makiri/xpath/mkr_xpath_number.c +109 -0
- data/ext/makiri/xpath/mkr_xpath_parse.c +83 -94
- data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
- data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
- data/ext/makiri/xpath/mkr_xpath_shared.c +609 -0
- data/ext/makiri/xpath/mkr_xpath_value_body.h +801 -0
- data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
- data/lib/makiri/{attribute.rb → attr.rb} +7 -3
- data/lib/makiri/cdata_section.rb +19 -0
- data/lib/makiri/comment.rb +10 -0
- data/lib/makiri/compat_aliases.rb +30 -0
- data/lib/makiri/document.rb +9 -73
- data/lib/makiri/document_fragment.rb +14 -9
- data/lib/makiri/element.rb +4 -4
- data/lib/makiri/html/document.rb +106 -0
- data/lib/makiri/html/node_methods.rb +19 -0
- data/lib/makiri/html.rb +12 -0
- data/lib/makiri/node.rb +58 -15
- data/lib/makiri/node_set.rb +8 -0
- data/lib/makiri/processing_instruction.rb +10 -0
- data/lib/makiri/text.rb +1 -1
- data/lib/makiri/version.rb +1 -1
- data/lib/makiri/xml/builder.rb +263 -0
- data/lib/makiri/xml/document.rb +24 -0
- data/lib/makiri/xml/node_methods.rb +84 -0
- data/lib/makiri/xml.rb +10 -0
- data/lib/makiri/xpath_context.rb +1 -1
- data/lib/makiri.rb +24 -5
- data/script/build_native_gem.rb +2 -2
- data/script/check_alloc_failures.rb +266 -0
- data/script/check_c_safety.rb +77 -2
- data/script/check_c_safety_allowlist.yml +102 -0
- data/script/check_leaks.rb +64 -0
- data/script/leaks_harness.rb +64 -0
- data/vendor/lexbor/CMakeLists.txt +6 -0
- data/vendor/lexbor/README.md +12 -0
- data/vendor/lexbor/config.cmake +1 -1
- data/vendor/lexbor/source/lexbor/core/base.h +1 -1
- data/vendor/lexbor/source/lexbor/core/config.cmake +9 -1
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +2 -3
- data/vendor/lexbor/source/lexbor/css/selectors/state.c +3 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +21 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +5 -0
- data/vendor/lexbor/source/lexbor/encoding/decode.c +33 -4
- data/vendor/lexbor/source/lexbor/html/base.h +1 -1
- data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +4 -0
- data/vendor/lexbor/source/lexbor/html/serialize.c +545 -41
- data/vendor/lexbor/source/lexbor/html/serialize.h +2 -1
- data/vendor/lexbor/source/lexbor/html/tokenizer.h +2 -2
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1 -1
- data/vendor/lexbor/source/lexbor/html/tree.c +6 -6
- data/vendor/lexbor/source/lexbor/selectors/selectors.c +12 -3
- data/vendor/lexbor/source/lexbor/url/base.h +1 -1
- data/vendor/lexbor/source/lexbor/url/url.c +5 -2
- data/vendor/lexbor/source/lexbor/url/url.h +9 -0
- data/vendor/lexbor/version +1 -1
- metadata +53 -9
- data/ext/makiri/glue/ruby_css.c +0 -185
- data/ext/makiri/glue/ruby_serialize.c +0 -92
- data/ext/makiri/xpath/mkr_xpath_value.c +0 -1286
- data/lib/makiri/cdata.rb +0 -6
|
@@ -0,0 +1,1023 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* CSS selector front-end: lowers a Lexbor-parsed selector list into the native
|
|
3
|
+
* XPath engine's AST (see mkr_css.h). No new evaluator opcodes - every selector
|
|
4
|
+
* becomes existing PATH / step / predicate nodes, so the shared evaluator's
|
|
5
|
+
* budgets, document order, dedup and namespace resolution apply unchanged.
|
|
6
|
+
*
|
|
7
|
+
* This is a SHARED translation unit (compiled once, like mkr_xpath_parse.c): it
|
|
8
|
+
* includes mkr_xpath_internal.h with the default `void` MKR_DOM_NODE - it only
|
|
9
|
+
* BUILDS the representation-neutral AST, never dereferences a node.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#include "mkr_css.h"
|
|
13
|
+
#include "mkr_xpath_internal.h"
|
|
14
|
+
#include "../core/mkr_core.h"
|
|
15
|
+
|
|
16
|
+
#include <lexbor/css/css.h>
|
|
17
|
+
#include <lexbor/selectors/selectors.h>
|
|
18
|
+
|
|
19
|
+
#include <stdlib.h>
|
|
20
|
+
#include <string.h>
|
|
21
|
+
|
|
22
|
+
/* ------------------------------------------------------------------ *
|
|
23
|
+
* Process-global Lexbor CSS parser (selector parsing only - NOT the matcher).
|
|
24
|
+
* CSS compilation runs under the GVL (the glue holds it), so a single global is
|
|
25
|
+
* safe with no locking, like the HTML CSS engine. Created lazily; on failure the
|
|
26
|
+
* caller reports a syntax/engine error.
|
|
27
|
+
* ------------------------------------------------------------------ */
|
|
28
|
+
static lxb_css_memory_t *g_mem;
|
|
29
|
+
static lxb_css_parser_t *g_parser;
|
|
30
|
+
static lxb_css_selectors_t *g_sel;
|
|
31
|
+
static int g_ready;
|
|
32
|
+
|
|
33
|
+
static int
|
|
34
|
+
css_parser_ready(void)
|
|
35
|
+
{
|
|
36
|
+
if (g_ready) return 1;
|
|
37
|
+
|
|
38
|
+
lxb_css_memory_t *mem = lxb_css_memory_create();
|
|
39
|
+
lxb_css_parser_t *par = lxb_css_parser_create();
|
|
40
|
+
lxb_css_selectors_t *sel = lxb_css_selectors_create();
|
|
41
|
+
|
|
42
|
+
int ok = (mem != NULL && par != NULL && sel != NULL)
|
|
43
|
+
&& (lxb_css_memory_init(mem, 128) == LXB_STATUS_OK)
|
|
44
|
+
&& (lxb_css_parser_init(par, NULL) == LXB_STATUS_OK)
|
|
45
|
+
&& (lxb_css_selectors_init(sel) == LXB_STATUS_OK);
|
|
46
|
+
if (!ok) {
|
|
47
|
+
if (sel != NULL) lxb_css_selectors_destroy(sel, true);
|
|
48
|
+
if (par != NULL) lxb_css_parser_destroy(par, true);
|
|
49
|
+
if (mem != NULL) lxb_css_memory_destroy(mem, true);
|
|
50
|
+
return 0;
|
|
51
|
+
}
|
|
52
|
+
lxb_css_parser_memory_set(par, mem);
|
|
53
|
+
lxb_css_parser_selectors_set(par, sel);
|
|
54
|
+
g_mem = mem; g_parser = par; g_sel = sel; g_ready = 1;
|
|
55
|
+
return 1;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/* ------------------------------------------------------------------ *
|
|
59
|
+
* AST builders. Every allocation matches mkr_node_free's free contract: nodes
|
|
60
|
+
* via mkr_callocarray, owned text via mkr_owned_text_from_borrowed_copy, arrays
|
|
61
|
+
* via malloc/realloc. On any OOM/limit the builder sets *err and returns NULL/-1
|
|
62
|
+
* so the whole compile fails closed (a partial AST is freed by the caller).
|
|
63
|
+
* ------------------------------------------------------------------ */
|
|
64
|
+
|
|
65
|
+
typedef struct {
|
|
66
|
+
mkr_xpath_limits_t *limits;
|
|
67
|
+
mkr_xpath_error_t *err;
|
|
68
|
+
const mkr_css_ns_t *ns;
|
|
69
|
+
} css_build_t;
|
|
70
|
+
|
|
71
|
+
static mkr_node_t *
|
|
72
|
+
cb_node(css_build_t *B, mkr_nk_t kind)
|
|
73
|
+
{
|
|
74
|
+
return mkr_node_alloc(B->limits, B->err, kind); /* shared AST factory (co-located with mkr_node_free) */
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
static int
|
|
78
|
+
cb_set_text(css_build_t *B, mkr_owned_text_t *out, const char *s, size_t len)
|
|
79
|
+
{
|
|
80
|
+
return mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text(s, len), B->err, "css name");
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
static mkr_node_t *
|
|
84
|
+
cb_literal(css_build_t *B, const char *s, size_t len)
|
|
85
|
+
{
|
|
86
|
+
mkr_node_t *n = cb_node(B, MKR_NK_LITERAL_STR);
|
|
87
|
+
if (n == NULL) return NULL;
|
|
88
|
+
if (cb_set_text(B, &n->u.literal, s, len) != 0) { mkr_node_free(n); return NULL; }
|
|
89
|
+
return n;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
static mkr_node_t *
|
|
93
|
+
cb_num(css_build_t *B, double v)
|
|
94
|
+
{
|
|
95
|
+
mkr_node_t *n = cb_node(B, MKR_NK_LITERAL_NUM);
|
|
96
|
+
if (n == NULL) return NULL;
|
|
97
|
+
n->u.literal_num = v;
|
|
98
|
+
return n;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
static mkr_node_t *
|
|
102
|
+
cb_binop(css_build_t *B, mkr_op_t op, mkr_node_t *lhs, mkr_node_t *rhs)
|
|
103
|
+
{
|
|
104
|
+
if (lhs == NULL || rhs == NULL) { mkr_node_free(lhs); mkr_node_free(rhs); return NULL; }
|
|
105
|
+
mkr_node_t *n = cb_node(B, MKR_NK_BINOP);
|
|
106
|
+
if (n == NULL) { mkr_node_free(lhs); mkr_node_free(rhs); return NULL; }
|
|
107
|
+
n->u.binop.op = op; n->u.binop.lhs = lhs; n->u.binop.rhs = rhs;
|
|
108
|
+
return n;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/* A function call taking ownership of args[0..nargs). On failure frees args.
|
|
112
|
+
* The name is always an internal compile-time literal; the cb_fncall macro
|
|
113
|
+
* supplies its length so no strlen on a (possibly non-NUL-checked) pointer. */
|
|
114
|
+
static mkr_node_t *
|
|
115
|
+
cb_fncall_n(css_build_t *B, const char *name, size_t namelen, mkr_node_t **args, size_t nargs)
|
|
116
|
+
{
|
|
117
|
+
for (size_t i = 0; i < nargs; i++) {
|
|
118
|
+
if (args[i] == NULL) { for (size_t j = 0; j < nargs; j++) mkr_node_free(args[j]); free(args); return NULL; }
|
|
119
|
+
}
|
|
120
|
+
mkr_node_t *n = cb_node(B, MKR_NK_FNCALL);
|
|
121
|
+
if (n == NULL) { for (size_t j = 0; j < nargs; j++) mkr_node_free(args[j]); free(args); return NULL; }
|
|
122
|
+
if (cb_set_text(B, &n->u.fncall.name, name, namelen) != 0) {
|
|
123
|
+
for (size_t j = 0; j < nargs; j++) mkr_node_free(args[j]); free(args); mkr_node_free(n); return NULL;
|
|
124
|
+
}
|
|
125
|
+
n->u.fncall.args = args; n->u.fncall.nargs = nargs;
|
|
126
|
+
return n;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/* Callers pass a string literal, so sizeof - 1 is its exact length. */
|
|
130
|
+
#define cb_fncall(B, name_lit, args, nargs) \
|
|
131
|
+
cb_fncall_n((B), (name_lit), sizeof(name_lit) - 1, (args), (nargs))
|
|
132
|
+
|
|
133
|
+
static mkr_node_t **
|
|
134
|
+
cb_args(size_t n)
|
|
135
|
+
{
|
|
136
|
+
return (mkr_node_t **)mkr_callocarray(n ? n : 1, sizeof(mkr_node_t *));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/* A single-step relative PATH: axis::nodetest (no predicates). nt_local may be
|
|
140
|
+
* NULL for a wildcard. Used for @attr, preceding-sibling::*, child::node(), etc. */
|
|
141
|
+
static mkr_node_t *
|
|
142
|
+
cb_step_path(css_build_t *B, mkr_axis_t axis, mkr_nt_kind_t nt_kind,
|
|
143
|
+
const char *local, size_t local_len)
|
|
144
|
+
{
|
|
145
|
+
mkr_node_t *n = cb_node(B, MKR_NK_PATH);
|
|
146
|
+
if (n == NULL) return NULL;
|
|
147
|
+
mkr_step_t *steps = (mkr_step_t *)mkr_callocarray(1, sizeof(mkr_step_t));
|
|
148
|
+
if (steps == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "out of memory (css)"); mkr_node_free(n); return NULL; }
|
|
149
|
+
steps[0].axis = axis;
|
|
150
|
+
steps[0].test.kind = nt_kind;
|
|
151
|
+
if (nt_kind == MKR_NT_NAME && local != NULL) {
|
|
152
|
+
if (cb_set_text(B, &steps[0].test.local, local, local_len) != 0) {
|
|
153
|
+
free(steps); mkr_node_free(n); return NULL;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
n->u.path.absolute = 0;
|
|
157
|
+
n->u.path.steps = steps;
|
|
158
|
+
n->u.path.nsteps = 1;
|
|
159
|
+
return n;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/* @prefix:name (or @name when prefix is NULL) as a relative attribute-axis path. */
|
|
163
|
+
static mkr_node_t *
|
|
164
|
+
cb_attr_ns(css_build_t *B, const char *prefix, size_t plen, const char *name, size_t len)
|
|
165
|
+
{
|
|
166
|
+
mkr_node_t *n = cb_node(B, MKR_NK_PATH);
|
|
167
|
+
if (n == NULL) return NULL;
|
|
168
|
+
mkr_step_t *steps = (mkr_step_t *)mkr_callocarray(1, sizeof(mkr_step_t));
|
|
169
|
+
if (steps == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "out of memory (css)"); mkr_node_free(n); return NULL; }
|
|
170
|
+
steps[0].axis = MKR_AXIS_ATTRIBUTE;
|
|
171
|
+
steps[0].test.kind = MKR_NT_NAME;
|
|
172
|
+
if (cb_set_text(B, &steps[0].test.local, name, len) != 0) { free(steps); mkr_node_free(n); return NULL; }
|
|
173
|
+
if (prefix != NULL && plen > 0) {
|
|
174
|
+
if (cb_set_text(B, &steps[0].test.prefix, prefix, plen) != 0) {
|
|
175
|
+
mkr_owned_text_clear(&steps[0].test.local); free(steps); mkr_node_free(n); return NULL;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
n->u.path.absolute = 0;
|
|
179
|
+
n->u.path.steps = steps;
|
|
180
|
+
n->u.path.nsteps = 1;
|
|
181
|
+
return n;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/* @name as a relative attribute-axis path (no namespace). */
|
|
185
|
+
static mkr_node_t *
|
|
186
|
+
cb_attr(css_build_t *B, const char *name, size_t len)
|
|
187
|
+
{
|
|
188
|
+
return cb_attr_ns(B, NULL, 0, name, len);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/* normalize-space(@[prefix:]name) */
|
|
192
|
+
static mkr_node_t *
|
|
193
|
+
cb_norm_attr(css_build_t *B, const char *prefix, size_t plen, const char *name, size_t len)
|
|
194
|
+
{
|
|
195
|
+
mkr_node_t **a = cb_args(1);
|
|
196
|
+
if (a == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
197
|
+
a[0] = cb_attr_ns(B, prefix, plen, name, len);
|
|
198
|
+
return cb_fncall(B, "normalize-space", a, 1);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/* concat(" ", normalize-space(@name), " ") - the whitespace-padded token list. */
|
|
202
|
+
static mkr_node_t *
|
|
203
|
+
cb_padded_tokens(css_build_t *B, const char *prefix, size_t plen, const char *name, size_t len)
|
|
204
|
+
{
|
|
205
|
+
mkr_node_t **a = cb_args(3);
|
|
206
|
+
if (a == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
207
|
+
a[0] = cb_literal(B, " ", 1);
|
|
208
|
+
a[1] = cb_norm_attr(B, prefix, plen, name, len);
|
|
209
|
+
a[2] = cb_literal(B, " ", 1);
|
|
210
|
+
return cb_fncall(B, "concat", a, 3);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/* contains(concat(' ',normalize-space(@name),' '), ' value ') - the [name~=value]
|
|
214
|
+
* / .class membership predicate. +value+ is wrapped with surrounding spaces. */
|
|
215
|
+
static mkr_node_t *
|
|
216
|
+
cb_token_match(css_build_t *B, const char *prefix, size_t plen,
|
|
217
|
+
const char *attr, size_t attrlen,
|
|
218
|
+
const char *value, size_t vlen)
|
|
219
|
+
{
|
|
220
|
+
/* build " value " literal */
|
|
221
|
+
size_t alen = vlen + 2;
|
|
222
|
+
char *padded = mkr_str_alloc(alen); /* alen content bytes + a preset NUL */
|
|
223
|
+
if (padded == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
224
|
+
padded[0] = ' ';
|
|
225
|
+
memcpy(padded + 1, value, vlen);
|
|
226
|
+
padded[1 + vlen] = ' ';
|
|
227
|
+
|
|
228
|
+
mkr_node_t **a = cb_args(2);
|
|
229
|
+
if (a == NULL) { free(padded); mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
230
|
+
a[0] = cb_padded_tokens(B, prefix, plen, attr, attrlen);
|
|
231
|
+
a[1] = cb_literal(B, padded, alen);
|
|
232
|
+
free(padded);
|
|
233
|
+
return cb_fncall(B, "contains", a, 2);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/* ------------------------------------------------------------------ *
|
|
237
|
+
* Step / predicate dynamic arrays.
|
|
238
|
+
* ------------------------------------------------------------------ */
|
|
239
|
+
|
|
240
|
+
typedef struct { mkr_step_t *v; size_t n, cap; } steps_arr_t;
|
|
241
|
+
typedef struct { mkr_node_t **v; size_t n, cap; } preds_arr_t;
|
|
242
|
+
|
|
243
|
+
static int
|
|
244
|
+
steps_push(css_build_t *B, steps_arr_t *a, mkr_step_t s)
|
|
245
|
+
{
|
|
246
|
+
if (mkr_grow_reserve((void **)&a->v, &a->cap, a->n + 1, sizeof(mkr_step_t)) != MKR_OK) {
|
|
247
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "out of memory growing step array");
|
|
248
|
+
return -1;
|
|
249
|
+
}
|
|
250
|
+
a->v[a->n++] = s;
|
|
251
|
+
return 0;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
static int
|
|
255
|
+
preds_push(css_build_t *B, preds_arr_t *a, mkr_node_t *p)
|
|
256
|
+
{
|
|
257
|
+
if (p == NULL) return -1;
|
|
258
|
+
if (mkr_grow_reserve((void **)&a->v, &a->cap, a->n + 1, sizeof(mkr_node_t *)) != MKR_OK) {
|
|
259
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "out of memory growing predicate array");
|
|
260
|
+
mkr_node_free(p);
|
|
261
|
+
return -1;
|
|
262
|
+
}
|
|
263
|
+
a->v[a->n++] = p;
|
|
264
|
+
return 0;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/* ------------------------------------------------------------------ *
|
|
268
|
+
* Lowering a compound (run of CLOSE-linked simple selectors on one element).
|
|
269
|
+
* ------------------------------------------------------------------ */
|
|
270
|
+
|
|
271
|
+
/* Set the step's name test from a type selector, honouring CSS namespace rules
|
|
272
|
+
* and the Nokogiri default-namespace binding (see mkr_css.h). Returns 0 / -1. */
|
|
273
|
+
static int
|
|
274
|
+
lower_type(css_build_t *B, const lxb_css_selector_t *s, mkr_step_t *step,
|
|
275
|
+
preds_arr_t *preds)
|
|
276
|
+
{
|
|
277
|
+
const char *name = (const char *)s->name.data;
|
|
278
|
+
size_t nlen = s->name.length;
|
|
279
|
+
|
|
280
|
+
if (s->type == LXB_CSS_SELECTOR_TYPE_ANY) { /* * or ns|* */
|
|
281
|
+
step->test.kind = MKR_NT_WILDCARD;
|
|
282
|
+
return 0;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/* Namespace component (s->ns): NULL data = bare (no pipe); "*" = any; ""
|
|
286
|
+
* (length 0, non-NULL) = no namespace; else an explicit prefix. */
|
|
287
|
+
const char *nsd = (const char *)s->ns.data;
|
|
288
|
+
size_t nsl = s->ns.length;
|
|
289
|
+
|
|
290
|
+
if (nsd != NULL && nsl == 1 && nsd[0] == '*') {
|
|
291
|
+
/* *|el : any namespace, specific local name -> wildcard + local-name() pred */
|
|
292
|
+
step->test.kind = MKR_NT_WILDCARD;
|
|
293
|
+
mkr_node_t **a = cb_args(0);
|
|
294
|
+
if (a == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return -1; }
|
|
295
|
+
mkr_node_t *ln = cb_fncall(B, "local-name", a, 0);
|
|
296
|
+
mkr_node_t *lit = cb_literal(B, name, nlen);
|
|
297
|
+
return preds_push(B, preds, cb_binop(B, MKR_OP_EQ, ln, lit));
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
step->test.kind = MKR_NT_NAME;
|
|
301
|
+
if (cb_set_text(B, &step->test.local, name, nlen) != 0) return -1;
|
|
302
|
+
|
|
303
|
+
if (nsd != NULL) {
|
|
304
|
+
if (nsl > 0) { /* p|el */
|
|
305
|
+
if (cb_set_text(B, &step->test.prefix, nsd, nsl) != 0) return -1;
|
|
306
|
+
}
|
|
307
|
+
/* nsl == 0 (|el): no-namespace -> leave prefix NULL */
|
|
308
|
+
} else if (B->ns != NULL && B->ns->default_prefix != NULL) {
|
|
309
|
+
/* bare el with a document default namespace -> bind to the default prefix
|
|
310
|
+
* (always the MKR_CSS_DEFAULT_NS_PREFIX sentinel, so its length is known). */
|
|
311
|
+
if (cb_set_text(B, &step->test.prefix, B->ns->default_prefix,
|
|
312
|
+
sizeof(MKR_CSS_DEFAULT_NS_PREFIX) - 1) != 0) return -1;
|
|
313
|
+
}
|
|
314
|
+
return 0;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/* [name op value] attribute predicate -> an expression. */
|
|
318
|
+
static mkr_node_t *
|
|
319
|
+
lower_attribute(css_build_t *B, const lxb_css_selector_t *s)
|
|
320
|
+
{
|
|
321
|
+
const char *nm = (const char *)s->name.data;
|
|
322
|
+
size_t nl = s->name.length;
|
|
323
|
+
const lxb_css_selector_attribute_t *at = &s->u.attribute;
|
|
324
|
+
|
|
325
|
+
if (at->modifier == LXB_CSS_SELECTOR_MODIFIER_I
|
|
326
|
+
|| at->modifier == LXB_CSS_SELECTOR_MODIFIER_S) {
|
|
327
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX,
|
|
328
|
+
"CSS attribute case modifier ([a=v i]) is not supported");
|
|
329
|
+
return NULL;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/* Attribute namespace (s->ns): NULL = bare (no namespace, the common case);
|
|
333
|
+
* "p" = prefixed (@p:name); "*" = any (not yet supported). Unprefixed CSS
|
|
334
|
+
* attribute selectors match the no-namespace attribute, per CSS/XPath. */
|
|
335
|
+
const char *px = NULL; size_t pxl = 0;
|
|
336
|
+
if (s->ns.data != NULL) {
|
|
337
|
+
if (s->ns.length == 1 && s->ns.data[0] == '*') {
|
|
338
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX,
|
|
339
|
+
"any-namespace attribute selectors ([*|a]) are not supported");
|
|
340
|
+
return NULL;
|
|
341
|
+
}
|
|
342
|
+
px = (const char *)s->ns.data; pxl = s->ns.length; /* length 0 (|a) -> no-ns */
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (at->value.data == NULL) { /* [name] - existence */
|
|
346
|
+
return cb_attr_ns(B, px, pxl, nm, nl);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
const char *v = (const char *)at->value.data;
|
|
350
|
+
size_t vl = at->value.length;
|
|
351
|
+
|
|
352
|
+
switch (at->match) {
|
|
353
|
+
case LXB_CSS_SELECTOR_MATCH_EQUAL: /* [a=v] -> @a = 'v' */
|
|
354
|
+
return cb_binop(B, MKR_OP_EQ, cb_attr_ns(B, px, pxl, nm, nl), cb_literal(B, v, vl));
|
|
355
|
+
|
|
356
|
+
case LXB_CSS_SELECTOR_MATCH_INCLUDE: /* [a~=v] -> token match */
|
|
357
|
+
return cb_token_match(B, px, pxl, nm, nl, v, vl);
|
|
358
|
+
|
|
359
|
+
case LXB_CSS_SELECTOR_MATCH_PREFIX: { /* [a^=v] -> starts-with(@a,'v') */
|
|
360
|
+
mkr_node_t **a = cb_args(2);
|
|
361
|
+
if (a == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
362
|
+
a[0] = cb_attr_ns(B, px, pxl, nm, nl); a[1] = cb_literal(B, v, vl);
|
|
363
|
+
return cb_fncall(B, "starts-with", a, 2);
|
|
364
|
+
}
|
|
365
|
+
case LXB_CSS_SELECTOR_MATCH_SUBSTRING: { /* [a*=v] -> contains(@a,'v') */
|
|
366
|
+
mkr_node_t **a = cb_args(2);
|
|
367
|
+
if (a == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
368
|
+
a[0] = cb_attr_ns(B, px, pxl, nm, nl); a[1] = cb_literal(B, v, vl);
|
|
369
|
+
return cb_fncall(B, "contains", a, 2);
|
|
370
|
+
}
|
|
371
|
+
case LXB_CSS_SELECTOR_MATCH_SUFFIX: { /* [a$=v] -> substring(@a, string-length(@a)-len+1)='v' */
|
|
372
|
+
mkr_node_t **sla = cb_args(1);
|
|
373
|
+
if (sla == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
374
|
+
sla[0] = cb_attr_ns(B, px, pxl, nm, nl);
|
|
375
|
+
mkr_node_t *slen = cb_fncall(B, "string-length", sla, 1); /* string-length(@a) */
|
|
376
|
+
mkr_node_t *vlen = cb_num(B, (double)vl);
|
|
377
|
+
mkr_node_t *one = cb_num(B, 1.0);
|
|
378
|
+
mkr_node_t *start = cb_binop(B, MKR_OP_ADD,
|
|
379
|
+
cb_binop(B, MKR_OP_SUB, slen, vlen), one); /* len-vl+1 */
|
|
380
|
+
mkr_node_t **sa = cb_args(2);
|
|
381
|
+
if (sa == NULL) { mkr_node_free(start); mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
382
|
+
sa[0] = cb_attr_ns(B, px, pxl, nm, nl); sa[1] = start;
|
|
383
|
+
mkr_node_t *sub = cb_fncall(B, "substring", sa, 2); /* substring(@a, start) */
|
|
384
|
+
return cb_binop(B, MKR_OP_EQ, sub, cb_literal(B, v, vl));
|
|
385
|
+
}
|
|
386
|
+
case LXB_CSS_SELECTOR_MATCH_DASH: { /* [a|=v] -> @a='v' or starts-with(@a,'v-') */
|
|
387
|
+
mkr_node_t *eq = cb_binop(B, MKR_OP_EQ, cb_attr_ns(B, px, pxl, nm, nl), cb_literal(B, v, vl));
|
|
388
|
+
size_t dl = vl + 1;
|
|
389
|
+
char *dash = mkr_str_alloc(dl); /* dl content bytes + a preset NUL */
|
|
390
|
+
if (dash == NULL) { mkr_node_free(eq); mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
391
|
+
memcpy(dash, v, vl); dash[vl] = '-';
|
|
392
|
+
mkr_node_t **a = cb_args(2);
|
|
393
|
+
if (a == NULL) { free(dash); mkr_node_free(eq); mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
394
|
+
a[0] = cb_attr_ns(B, px, pxl, nm, nl); a[1] = cb_literal(B, dash, dl);
|
|
395
|
+
free(dash);
|
|
396
|
+
mkr_node_t *pre = cb_fncall(B, "starts-with", a, 2);
|
|
397
|
+
return cb_binop(B, MKR_OP_OR, eq, pre);
|
|
398
|
+
}
|
|
399
|
+
default:
|
|
400
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX, "unsupported CSS attribute operator");
|
|
401
|
+
return NULL;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/* Forward declarations (mutual recursion: functional pseudos lower sub-selectors
|
|
406
|
+
* back through the compound/complex lowering). */
|
|
407
|
+
/* +relative_first+: when nonzero the first compound honours its own combinator
|
|
408
|
+
* relative to the context node (used by :has, where `:has(> a)` / `:has(+ a)` /
|
|
409
|
+
* `:has(~ a)` mean child / adjacent / general-sibling of self); the top-level
|
|
410
|
+
* query passes 0, making the first compound a plain descendant of the context. */
|
|
411
|
+
static mkr_node_t *lower_complex(css_build_t *B, const lxb_css_selector_t *first,
|
|
412
|
+
int relative_first);
|
|
413
|
+
/* Boolean self-test for a (possibly multi-compound) complex selector, used by
|
|
414
|
+
* :is()/:where()/:not(). Combinators are expressed with the reverse axes:
|
|
415
|
+
* a b -> self::b/ancestor::a a > b -> self::b/parent::a
|
|
416
|
+
* a + b -> self::b/preceding-sibling::*[1]/self::a a ~ b -> .../preceding-sibling::a
|
|
417
|
+
* so the path is non-empty (truthy) exactly when self matches the selector. */
|
|
418
|
+
static mkr_node_t *lower_complex_selftest(css_build_t *B, const lxb_css_selector_t *first);
|
|
419
|
+
|
|
420
|
+
/* Cap on compounds in one :is()/:not() argument (selector-complexity bound). */
|
|
421
|
+
#define MKR_CSS_MAX_COMPOUNDS 64
|
|
422
|
+
|
|
423
|
+
/* not(axis::*) - "no sibling/child on that axis". */
|
|
424
|
+
static mkr_node_t *
|
|
425
|
+
cb_not_axis(css_build_t *B, mkr_axis_t axis, mkr_nt_kind_t nt)
|
|
426
|
+
{
|
|
427
|
+
mkr_node_t **a = cb_args(1);
|
|
428
|
+
if (a == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
429
|
+
a[0] = cb_step_path(B, axis, nt, NULL, 0);
|
|
430
|
+
return cb_fncall(B, "not", a, 1);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/* not([prefix:]name on axis) - "no same-named sibling on that axis" (of-type). */
|
|
434
|
+
static mkr_node_t *
|
|
435
|
+
cb_not_named_axis(css_build_t *B, mkr_axis_t axis, const mkr_nodetest_t *t)
|
|
436
|
+
{
|
|
437
|
+
mkr_node_t **a = cb_args(1);
|
|
438
|
+
if (a == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
439
|
+
mkr_node_t *p = cb_node(B, MKR_NK_PATH);
|
|
440
|
+
if (p == NULL) { free(a); return NULL; }
|
|
441
|
+
mkr_step_t *st = (mkr_step_t *)mkr_callocarray(1, sizeof(mkr_step_t));
|
|
442
|
+
if (st == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); mkr_node_free(p); free(a); return NULL; }
|
|
443
|
+
st[0].axis = axis;
|
|
444
|
+
st[0].test.kind = MKR_NT_NAME;
|
|
445
|
+
if (cb_set_text(B, &st[0].test.local, (const char *)t->local.ptr, t->local.len) != 0) {
|
|
446
|
+
free(st); mkr_node_free(p); free(a); return NULL;
|
|
447
|
+
}
|
|
448
|
+
if (t->prefix.ptr != NULL && t->prefix.len > 0) {
|
|
449
|
+
if (cb_set_text(B, &st[0].test.prefix, (const char *)t->prefix.ptr, t->prefix.len) != 0) {
|
|
450
|
+
mkr_owned_text_clear(&st[0].test.local); free(st); mkr_node_free(p); free(a); return NULL;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
p->u.path.absolute = 0; p->u.path.steps = st; p->u.path.nsteps = 1;
|
|
454
|
+
a[0] = p;
|
|
455
|
+
return cb_fncall(B, "not", a, 1);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
/* count(axis::test) + 1 - 1-based position among the matched siblings. test is *
|
|
459
|
+
* (local==NULL) for nth-child or a copied name (for nth-of-type). */
|
|
460
|
+
static mkr_node_t *
|
|
461
|
+
cb_pos(css_build_t *B, mkr_axis_t axis, const mkr_nodetest_t *named)
|
|
462
|
+
{
|
|
463
|
+
mkr_node_t *path;
|
|
464
|
+
if (named == NULL) {
|
|
465
|
+
path = cb_step_path(B, axis, MKR_NT_WILDCARD, NULL, 0);
|
|
466
|
+
} else {
|
|
467
|
+
path = cb_node(B, MKR_NK_PATH);
|
|
468
|
+
if (path != NULL) {
|
|
469
|
+
mkr_step_t *st = (mkr_step_t *)mkr_callocarray(1, sizeof(mkr_step_t));
|
|
470
|
+
if (st == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); mkr_node_free(path); path = NULL; }
|
|
471
|
+
else {
|
|
472
|
+
st[0].axis = axis; st[0].test.kind = MKR_NT_NAME;
|
|
473
|
+
if (cb_set_text(B, &st[0].test.local, (const char *)named->local.ptr, named->local.len) != 0) {
|
|
474
|
+
free(st); mkr_node_free(path); path = NULL;
|
|
475
|
+
} else {
|
|
476
|
+
if (named->prefix.ptr != NULL && named->prefix.len > 0
|
|
477
|
+
&& cb_set_text(B, &st[0].test.prefix, (const char *)named->prefix.ptr, named->prefix.len) != 0) {
|
|
478
|
+
mkr_owned_text_clear(&st[0].test.local); free(st); mkr_node_free(path); path = NULL;
|
|
479
|
+
} else { path->u.path.absolute = 0; path->u.path.steps = st; path->u.path.nsteps = 1; }
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
mkr_node_t **a = cb_args(1);
|
|
485
|
+
if (a == NULL) { mkr_node_free(path); mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
486
|
+
a[0] = path;
|
|
487
|
+
return cb_binop(B, MKR_OP_ADD, cb_fncall(B, "count", a, 1), cb_num(B, 1.0));
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
/* The internal of-type position fn (1-based, among same-type siblings). +forward+
|
|
491
|
+
* counts from the start (preceding-sibling direction), else from the end. Used
|
|
492
|
+
* for an *untyped* of-type, whose "type" is the element's own expanded name - a
|
|
493
|
+
* self comparison pure XPath 1.0 cannot make. See MKR_FN_OF_TYPE_POS. */
|
|
494
|
+
static mkr_node_t *
|
|
495
|
+
cb_of_type_pos(css_build_t *B, int forward)
|
|
496
|
+
{
|
|
497
|
+
mkr_node_t **a = cb_args(0);
|
|
498
|
+
if (a == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
499
|
+
return forward ? cb_fncall(B, MKR_FN_OF_TYPE_POS, a, 0)
|
|
500
|
+
: cb_fncall(B, MKR_FN_OF_TYPE_POS_LAST, a, 0);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/* 1-based position expression for :nth-*: an untyped of-type uses the internal
|
|
504
|
+
* of-type-pos fn; otherwise count(axis::test)+1 (nth-child when named==NULL, a
|
|
505
|
+
* typed of-type when named). */
|
|
506
|
+
static mkr_node_t *
|
|
507
|
+
cb_pos_expr(css_build_t *B, mkr_axis_t axis, const mkr_nodetest_t *named, int oftype_untyped)
|
|
508
|
+
{
|
|
509
|
+
if (oftype_untyped) return cb_of_type_pos(B, axis == MKR_AXIS_PRECEDING_SIBLING);
|
|
510
|
+
return cb_pos(B, axis, named);
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
/* The :nth-*(an+b) match condition over the position expression on +axis+
|
|
514
|
+
* (preceding/following-sibling), optionally restricted to +named+ (typed
|
|
515
|
+
* of-type) or +oftype_untyped+ (of-type with no explicit type selector). */
|
|
516
|
+
static mkr_node_t *
|
|
517
|
+
cb_nth(css_build_t *B, mkr_axis_t axis, const mkr_nodetest_t *named,
|
|
518
|
+
int oftype_untyped, long a, long b)
|
|
519
|
+
{
|
|
520
|
+
if (a == 0) { /* position = b */
|
|
521
|
+
return cb_binop(B, MKR_OP_EQ, cb_pos_expr(B, axis, named, oftype_untyped), cb_num(B, (double)b));
|
|
522
|
+
}
|
|
523
|
+
/* (pos-b) mod a == 0 AND (pos-b) div a >= 0 (forward, valid index) */
|
|
524
|
+
mkr_node_t *d1 = cb_binop(B, MKR_OP_SUB, cb_pos_expr(B, axis, named, oftype_untyped), cb_num(B, (double)b));
|
|
525
|
+
mkr_node_t *modz = cb_binop(B, MKR_OP_EQ, cb_binop(B, MKR_OP_MOD, d1, cb_num(B, (double)a)), cb_num(B, 0.0));
|
|
526
|
+
mkr_node_t *d2 = cb_binop(B, MKR_OP_SUB, cb_pos_expr(B, axis, named, oftype_untyped), cb_num(B, (double)b));
|
|
527
|
+
mkr_node_t *qge = cb_binop(B, MKR_OP_GE, cb_binop(B, MKR_OP_DIV, d2, cb_num(B, (double)a)), cb_num(B, 0.0));
|
|
528
|
+
return cb_binop(B, MKR_OP_AND, modz, qge);
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/* Simple (non-functional) structural pseudo-classes. +step+ supplies the element
|
|
532
|
+
* name for the of-type family. */
|
|
533
|
+
static mkr_node_t *
|
|
534
|
+
lower_pseudo_simple(css_build_t *B, const lxb_css_selector_t *s, const mkr_step_t *step)
|
|
535
|
+
{
|
|
536
|
+
switch (s->u.pseudo.type) {
|
|
537
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FIRST_CHILD:
|
|
538
|
+
return cb_not_axis(B, MKR_AXIS_PRECEDING_SIBLING, MKR_NT_WILDCARD);
|
|
539
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_LAST_CHILD:
|
|
540
|
+
return cb_not_axis(B, MKR_AXIS_FOLLOWING_SIBLING, MKR_NT_WILDCARD);
|
|
541
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_ONLY_CHILD:
|
|
542
|
+
return cb_binop(B, MKR_OP_AND,
|
|
543
|
+
cb_not_axis(B, MKR_AXIS_PRECEDING_SIBLING, MKR_NT_WILDCARD),
|
|
544
|
+
cb_not_axis(B, MKR_AXIS_FOLLOWING_SIBLING, MKR_NT_WILDCARD));
|
|
545
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_EMPTY: /* not(node()) */
|
|
546
|
+
return cb_not_axis(B, MKR_AXIS_CHILD, MKR_NT_NODE);
|
|
547
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_ROOT: /* not(parent::*) */
|
|
548
|
+
return cb_not_axis(B, MKR_AXIS_PARENT, MKR_NT_WILDCARD);
|
|
549
|
+
|
|
550
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FIRST_OF_TYPE:
|
|
551
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_LAST_OF_TYPE:
|
|
552
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_ONLY_OF_TYPE: {
|
|
553
|
+
/* Typed (a:first-of-type) -> not(preceding-sibling::a); untyped
|
|
554
|
+
* (:first-of-type) -> of-type-pos()=1 (the type is the element's own
|
|
555
|
+
* expanded name, compared at eval time - see cb_of_type_pos). */
|
|
556
|
+
unsigned pt = s->u.pseudo.type;
|
|
557
|
+
if (step->test.kind != MKR_NT_NAME) {
|
|
558
|
+
if (pt == LXB_CSS_SELECTOR_PSEUDO_CLASS_FIRST_OF_TYPE)
|
|
559
|
+
return cb_binop(B, MKR_OP_EQ, cb_of_type_pos(B, 1), cb_num(B, 1.0));
|
|
560
|
+
if (pt == LXB_CSS_SELECTOR_PSEUDO_CLASS_LAST_OF_TYPE)
|
|
561
|
+
return cb_binop(B, MKR_OP_EQ, cb_of_type_pos(B, 0), cb_num(B, 1.0));
|
|
562
|
+
return cb_binop(B, MKR_OP_AND,
|
|
563
|
+
cb_binop(B, MKR_OP_EQ, cb_of_type_pos(B, 1), cb_num(B, 1.0)),
|
|
564
|
+
cb_binop(B, MKR_OP_EQ, cb_of_type_pos(B, 0), cb_num(B, 1.0)));
|
|
565
|
+
}
|
|
566
|
+
if (pt == LXB_CSS_SELECTOR_PSEUDO_CLASS_FIRST_OF_TYPE)
|
|
567
|
+
return cb_not_named_axis(B, MKR_AXIS_PRECEDING_SIBLING, &step->test);
|
|
568
|
+
if (pt == LXB_CSS_SELECTOR_PSEUDO_CLASS_LAST_OF_TYPE)
|
|
569
|
+
return cb_not_named_axis(B, MKR_AXIS_FOLLOWING_SIBLING, &step->test);
|
|
570
|
+
return cb_binop(B, MKR_OP_AND,
|
|
571
|
+
cb_not_named_axis(B, MKR_AXIS_PRECEDING_SIBLING, &step->test),
|
|
572
|
+
cb_not_named_axis(B, MKR_AXIS_FOLLOWING_SIBLING, &step->test));
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
default:
|
|
576
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX, "unsupported CSS pseudo-class");
|
|
577
|
+
return NULL;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
/* OR of compound self-tests over each comma-arg of a selector list (:is / :not).
|
|
582
|
+
* Each arg must be a single compound (no combinators); returns NULL on error. */
|
|
583
|
+
static mkr_node_t *
|
|
584
|
+
lower_selector_list_selftest(css_build_t *B, const lxb_css_selector_list_t *list)
|
|
585
|
+
{
|
|
586
|
+
mkr_node_t *acc = NULL;
|
|
587
|
+
for (const lxb_css_selector_list_t *g = list; g != NULL; g = g->next) {
|
|
588
|
+
mkr_node_t *one = lower_complex_selftest(B, g->first);
|
|
589
|
+
if (one == NULL) { mkr_node_free(acc); return NULL; }
|
|
590
|
+
acc = (acc == NULL) ? one : cb_binop(B, MKR_OP_OR, acc, one);
|
|
591
|
+
if (acc == NULL) return NULL;
|
|
592
|
+
}
|
|
593
|
+
return acc;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
/* child::text()[pred] - a relative path selecting the element's direct child
|
|
597
|
+
* text nodes that satisfy +pred+ (consumed). In predicate position a non-empty
|
|
598
|
+
* node-set is truthy, so this is "some direct child text node matches", which is
|
|
599
|
+
* exactly how Lexbor's :lexbor-contains matcher scans (immediate child TEXT
|
|
600
|
+
* nodes only, not the deep string-value). +pred+ is owned on success or freed on
|
|
601
|
+
* any failure. */
|
|
602
|
+
static mkr_node_t *
|
|
603
|
+
cb_child_text_pred(css_build_t *B, mkr_node_t *pred)
|
|
604
|
+
{
|
|
605
|
+
if (pred == NULL) return NULL;
|
|
606
|
+
mkr_node_t *n = cb_node(B, MKR_NK_PATH);
|
|
607
|
+
if (n == NULL) { mkr_node_free(pred); return NULL; }
|
|
608
|
+
mkr_step_t *steps = (mkr_step_t *)mkr_callocarray(1, sizeof(mkr_step_t));
|
|
609
|
+
if (steps == NULL) {
|
|
610
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "out of memory (css)");
|
|
611
|
+
mkr_node_free(pred); mkr_node_free(n); return NULL;
|
|
612
|
+
}
|
|
613
|
+
mkr_node_t **pv = (mkr_node_t **)mkr_callocarray(1, sizeof(mkr_node_t *));
|
|
614
|
+
if (pv == NULL) {
|
|
615
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "out of memory (css)");
|
|
616
|
+
free(steps); mkr_node_free(pred); mkr_node_free(n); return NULL;
|
|
617
|
+
}
|
|
618
|
+
pv[0] = pred;
|
|
619
|
+
steps[0].axis = MKR_AXIS_CHILD;
|
|
620
|
+
steps[0].test.kind = MKR_NT_TEXT;
|
|
621
|
+
steps[0].predicates = pv;
|
|
622
|
+
steps[0].npredicates = 1;
|
|
623
|
+
n->u.path.absolute = 0;
|
|
624
|
+
n->u.path.steps = steps;
|
|
625
|
+
n->u.path.nsteps = 1;
|
|
626
|
+
return n;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/* Functional pseudo-classes: :nth-*(an+b), :not(), :is()/:where(), :has(). */
|
|
630
|
+
static mkr_node_t *
|
|
631
|
+
lower_pseudo_func(css_build_t *B, const lxb_css_selector_t *s, const mkr_step_t *step)
|
|
632
|
+
{
|
|
633
|
+
unsigned type = s->u.pseudo.type;
|
|
634
|
+
|
|
635
|
+
switch (type) {
|
|
636
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD:
|
|
637
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD:
|
|
638
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE:
|
|
639
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE: {
|
|
640
|
+
const lxb_css_selector_anb_of_t *anb = (const lxb_css_selector_anb_of_t *)s->u.pseudo.data;
|
|
641
|
+
if (anb == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX, "malformed :nth-*()"); return NULL; }
|
|
642
|
+
if (anb->of != NULL) {
|
|
643
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX, ":nth-*(... of S) is not supported");
|
|
644
|
+
return NULL;
|
|
645
|
+
}
|
|
646
|
+
int last = (type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD
|
|
647
|
+
|| type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE);
|
|
648
|
+
int of_type = (type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE
|
|
649
|
+
|| type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE);
|
|
650
|
+
mkr_axis_t axis = last ? MKR_AXIS_FOLLOWING_SIBLING : MKR_AXIS_PRECEDING_SIBLING;
|
|
651
|
+
const mkr_nodetest_t *named = NULL;
|
|
652
|
+
int oftype_untyped = 0;
|
|
653
|
+
if (of_type) {
|
|
654
|
+
/* Typed (a:nth-of-type) counts same-name siblings via a literal name;
|
|
655
|
+
* untyped (:nth-of-type) compares the element's own expanded name at
|
|
656
|
+
* eval time through the internal of-type-pos fn. */
|
|
657
|
+
if (step->test.kind == MKR_NT_NAME) named = &step->test;
|
|
658
|
+
else oftype_untyped = 1;
|
|
659
|
+
}
|
|
660
|
+
return cb_nth(B, axis, named, oftype_untyped, anb->anb.a, anb->anb.b);
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NOT: {
|
|
664
|
+
mkr_node_t *inner = lower_selector_list_selftest(B,
|
|
665
|
+
(const lxb_css_selector_list_t *)s->u.pseudo.data);
|
|
666
|
+
if (inner == NULL) return NULL;
|
|
667
|
+
mkr_node_t **a = cb_args(1);
|
|
668
|
+
if (a == NULL) { mkr_node_free(inner); mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
669
|
+
a[0] = inner;
|
|
670
|
+
return cb_fncall(B, "not", a, 1);
|
|
671
|
+
}
|
|
672
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_IS:
|
|
673
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_WHERE:
|
|
674
|
+
return lower_selector_list_selftest(B,
|
|
675
|
+
(const lxb_css_selector_list_t *)s->u.pseudo.data);
|
|
676
|
+
|
|
677
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_HAS: {
|
|
678
|
+
/* OR of relative descendant/child paths; truthy when any matches. */
|
|
679
|
+
const lxb_css_selector_list_t *list = (const lxb_css_selector_list_t *)s->u.pseudo.data;
|
|
680
|
+
mkr_node_t *acc = NULL;
|
|
681
|
+
for (const lxb_css_selector_list_t *g = list; g != NULL; g = g->next) {
|
|
682
|
+
/* relative to self: honours a leading >, +, ~ (else descendant) */
|
|
683
|
+
mkr_node_t *path = lower_complex(B, g->first, 1);
|
|
684
|
+
if (path == NULL) { mkr_node_free(acc); return NULL; }
|
|
685
|
+
acc = (acc == NULL) ? path : cb_binop(B, MKR_OP_OR, acc, path);
|
|
686
|
+
if (acc == NULL) return NULL;
|
|
687
|
+
}
|
|
688
|
+
return acc;
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_LEXBOR_CONTAINS: {
|
|
692
|
+
/* :lexbor-contains("t") -> child::text()[contains(., "t")]: true when a
|
|
693
|
+
* direct child text node contains t. This mirrors Lexbor's matcher, which
|
|
694
|
+
* scans only the element's immediate child TEXT nodes (NOT the deep
|
|
695
|
+
* string-value), so the XML path agrees with the HTML one. The `i` flag is
|
|
696
|
+
* ASCII case-insensitive: fold each side with translate(). Lexbor's own
|
|
697
|
+
* selector; not a CSS standard. The inner "." is the child text node. */
|
|
698
|
+
const lxb_css_selector_contains_t *c =
|
|
699
|
+
(const lxb_css_selector_contains_t *)s->u.pseudo.data;
|
|
700
|
+
if (c == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX, "malformed :lexbor-contains()"); return NULL; }
|
|
701
|
+
const char *needle = (const char *)c->str.data;
|
|
702
|
+
size_t nlen = c->str.length;
|
|
703
|
+
|
|
704
|
+
if (!c->insensitive) {
|
|
705
|
+
mkr_node_t **a = cb_args(2);
|
|
706
|
+
if (a == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
707
|
+
a[0] = cb_step_path(B, MKR_AXIS_SELF, MKR_NT_NODE, NULL, 0); /* "." */
|
|
708
|
+
a[1] = cb_literal(B, needle, nlen);
|
|
709
|
+
return cb_child_text_pred(B, cb_fncall(B, "contains", a, 2));
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
/* ASCII case-insensitive: contains(translate(., A-Z, a-z), lower(needle)) */
|
|
713
|
+
static const char UPPER[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
|
714
|
+
static const char LOWER[] = "abcdefghijklmnopqrstuvwxyz";
|
|
715
|
+
char *low = mkr_str_alloc(nlen);
|
|
716
|
+
if (low == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
717
|
+
for (size_t i = 0; i < nlen; i++) {
|
|
718
|
+
unsigned char ch = (unsigned char)needle[i];
|
|
719
|
+
low[i] = (ch >= 'A' && ch <= 'Z') ? (char)(ch - 'A' + 'a') : (char)ch;
|
|
720
|
+
}
|
|
721
|
+
mkr_node_t **ta = cb_args(3);
|
|
722
|
+
if (ta == NULL) { free(low); mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
723
|
+
ta[0] = cb_step_path(B, MKR_AXIS_SELF, MKR_NT_NODE, NULL, 0);
|
|
724
|
+
ta[1] = cb_literal(B, UPPER, sizeof(UPPER) - 1);
|
|
725
|
+
ta[2] = cb_literal(B, LOWER, sizeof(LOWER) - 1);
|
|
726
|
+
mkr_node_t *folded = cb_fncall(B, "translate", ta, 3);
|
|
727
|
+
mkr_node_t **a = cb_args(2);
|
|
728
|
+
if (a == NULL) { free(low); mkr_node_free(folded); mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); return NULL; }
|
|
729
|
+
a[0] = folded;
|
|
730
|
+
a[1] = cb_literal(B, low, nlen);
|
|
731
|
+
free(low);
|
|
732
|
+
return cb_child_text_pred(B, cb_fncall(B, "contains", a, 2));
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
default:
|
|
736
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX, "unsupported functional CSS pseudo-class");
|
|
737
|
+
return NULL;
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
/* Fold one simple selector into the current step (sets nodetest for a type, else
|
|
742
|
+
* appends a predicate). Returns 0 / -1. */
|
|
743
|
+
static int
|
|
744
|
+
fold_simple(css_build_t *B, const lxb_css_selector_t *s, mkr_step_t *step,
|
|
745
|
+
preds_arr_t *preds)
|
|
746
|
+
{
|
|
747
|
+
switch (s->type) {
|
|
748
|
+
case LXB_CSS_SELECTOR_TYPE_ANY:
|
|
749
|
+
case LXB_CSS_SELECTOR_TYPE_ELEMENT:
|
|
750
|
+
return lower_type(B, s, step, preds);
|
|
751
|
+
|
|
752
|
+
case LXB_CSS_SELECTOR_TYPE_ID: /* #id -> @id = 'id' */
|
|
753
|
+
return preds_push(B, preds,
|
|
754
|
+
cb_binop(B, MKR_OP_EQ, cb_attr(B, "id", 2),
|
|
755
|
+
cb_literal(B, (const char *)s->name.data, s->name.length)));
|
|
756
|
+
|
|
757
|
+
case LXB_CSS_SELECTOR_TYPE_CLASS: /* .class -> token match on @class */
|
|
758
|
+
return preds_push(B, preds,
|
|
759
|
+
cb_token_match(B, NULL, 0, "class", 5, (const char *)s->name.data, s->name.length));
|
|
760
|
+
|
|
761
|
+
case LXB_CSS_SELECTOR_TYPE_ATTRIBUTE:
|
|
762
|
+
return preds_push(B, preds, lower_attribute(B, s));
|
|
763
|
+
|
|
764
|
+
case LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS:
|
|
765
|
+
return preds_push(B, preds, lower_pseudo_simple(B, s, step));
|
|
766
|
+
|
|
767
|
+
case LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION:
|
|
768
|
+
return preds_push(B, preds, lower_pseudo_func(B, s, step));
|
|
769
|
+
|
|
770
|
+
case LXB_CSS_SELECTOR_TYPE_PSEUDO_ELEMENT:
|
|
771
|
+
case LXB_CSS_SELECTOR_TYPE_PSEUDO_ELEMENT_FUNCTION:
|
|
772
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX, "CSS pseudo-elements are not selectable");
|
|
773
|
+
return -1;
|
|
774
|
+
|
|
775
|
+
default:
|
|
776
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX, "unsupported CSS selector component");
|
|
777
|
+
return -1;
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
/* Axis connecting a compound to its predecessor, from the leading combinator. */
|
|
782
|
+
static mkr_axis_t
|
|
783
|
+
axis_for_combinator(lxb_css_selector_combinator_t c, int is_first)
|
|
784
|
+
{
|
|
785
|
+
if (is_first) return MKR_AXIS_DESCENDANT; /* relative to context, descendant-only */
|
|
786
|
+
switch (c) {
|
|
787
|
+
case LXB_CSS_SELECTOR_COMBINATOR_CHILD: return MKR_AXIS_CHILD;
|
|
788
|
+
case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING: return MKR_AXIS_FOLLOWING_SIBLING; /* ~ */
|
|
789
|
+
case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT:
|
|
790
|
+
default: return MKR_AXIS_DESCENDANT;
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
/* Build one step for a compound [cfirst .. clast] (inclusive chain), with the
|
|
795
|
+
* given axis, appending it to +steps+. Returns 0 / -1. */
|
|
796
|
+
static int
|
|
797
|
+
emit_compound_step(css_build_t *B, steps_arr_t *steps, mkr_axis_t axis,
|
|
798
|
+
const lxb_css_selector_t *cfirst, const lxb_css_selector_t *clast)
|
|
799
|
+
{
|
|
800
|
+
mkr_step_t step; memset(&step, 0, sizeof(step));
|
|
801
|
+
step.axis = axis;
|
|
802
|
+
step.test.kind = MKR_NT_WILDCARD; /* default; a type simple overrides */
|
|
803
|
+
preds_arr_t preds = {0};
|
|
804
|
+
|
|
805
|
+
for (const lxb_css_selector_t *s = cfirst; ; s = s->next) {
|
|
806
|
+
if (fold_simple(B, s, &step, &preds) != 0) {
|
|
807
|
+
for (size_t i = 0; i < preds.n; i++) mkr_node_free(preds.v[i]);
|
|
808
|
+
free(preds.v);
|
|
809
|
+
mkr_owned_text_clear(&step.test.local);
|
|
810
|
+
mkr_owned_text_clear(&step.test.prefix);
|
|
811
|
+
return -1;
|
|
812
|
+
}
|
|
813
|
+
if (s == clast) break;
|
|
814
|
+
}
|
|
815
|
+
step.predicates = preds.v;
|
|
816
|
+
step.npredicates = preds.n;
|
|
817
|
+
if (steps_push(B, steps, step) != 0) {
|
|
818
|
+
for (size_t i = 0; i < preds.n; i++) mkr_node_free(preds.v[i]);
|
|
819
|
+
free(preds.v);
|
|
820
|
+
mkr_owned_text_clear(&step.test.local);
|
|
821
|
+
mkr_owned_text_clear(&step.test.prefix);
|
|
822
|
+
return -1;
|
|
823
|
+
}
|
|
824
|
+
return 0;
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
/* A compound selector (a CLOSE-combinator-linked run of simples) plus the
|
|
828
|
+
* combinator that connects it to its left neighbour. */
|
|
829
|
+
typedef struct {
|
|
830
|
+
const lxb_css_selector_t *first, *last;
|
|
831
|
+
lxb_css_selector_combinator_t comb;
|
|
832
|
+
} mkr_css_compound_t;
|
|
833
|
+
|
|
834
|
+
typedef struct { const lxb_css_selector_t *cstart; } css_compound_iter_t;
|
|
835
|
+
|
|
836
|
+
/* Walk a complex-selector chain compound by compound, left to right. This is
|
|
837
|
+
* the single splitter shared by lower_complex (forward) and lower_complex_selftest
|
|
838
|
+
* (right-to-left), so the boundary rule - and the MKR_CSS_MAX_COMPOUNDS cap the
|
|
839
|
+
* callers apply - live in one place. Returns 1 + fills *out, or 0 at the end. */
|
|
840
|
+
static int
|
|
841
|
+
css_compound_next(css_compound_iter_t *it, mkr_css_compound_t *out)
|
|
842
|
+
{
|
|
843
|
+
const lxb_css_selector_t *cstart = it->cstart;
|
|
844
|
+
if (cstart == NULL) return 0;
|
|
845
|
+
for (const lxb_css_selector_t *s = cstart; s != NULL; s = s->next) {
|
|
846
|
+
const lxb_css_selector_t *nxt = s->next;
|
|
847
|
+
if (nxt != NULL && nxt->combinator == LXB_CSS_SELECTOR_COMBINATOR_CLOSE) continue;
|
|
848
|
+
out->first = cstart;
|
|
849
|
+
out->last = s;
|
|
850
|
+
out->comb = cstart->combinator;
|
|
851
|
+
it->cstart = nxt;
|
|
852
|
+
return 1;
|
|
853
|
+
}
|
|
854
|
+
it->cstart = NULL;
|
|
855
|
+
return 0;
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
/* Lower one complex selector (a chain) into a relative PATH node. */
|
|
859
|
+
static mkr_node_t *
|
|
860
|
+
lower_complex(css_build_t *B, const lxb_css_selector_t *first, int relative_first)
|
|
861
|
+
{
|
|
862
|
+
steps_arr_t steps = {0};
|
|
863
|
+
css_compound_iter_t it = { first };
|
|
864
|
+
mkr_css_compound_t comp;
|
|
865
|
+
size_t nc = 0;
|
|
866
|
+
int first_compound = 1;
|
|
867
|
+
|
|
868
|
+
while (css_compound_next(&it, &comp)) {
|
|
869
|
+
if (nc >= MKR_CSS_MAX_COMPOUNDS) {
|
|
870
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_LIMIT, "CSS selector too complex");
|
|
871
|
+
goto fail;
|
|
872
|
+
}
|
|
873
|
+
nc++;
|
|
874
|
+
|
|
875
|
+
/* In relative mode the first compound honours its own combinator (so it can
|
|
876
|
+
* be a child/adjacent/general-sibling of the context), not a forced descendant. */
|
|
877
|
+
int is_first = first_compound && !relative_first;
|
|
878
|
+
first_compound = 0;
|
|
879
|
+
lxb_css_selector_combinator_t comb = comp.comb;
|
|
880
|
+
|
|
881
|
+
if (!is_first && comb == LXB_CSS_SELECTOR_COMBINATOR_SIBLING) {
|
|
882
|
+
/* a + b -> following-sibling::*[1] / self::b (two steps) */
|
|
883
|
+
mkr_step_t fs; memset(&fs, 0, sizeof(fs));
|
|
884
|
+
fs.axis = MKR_AXIS_FOLLOWING_SIBLING;
|
|
885
|
+
fs.test.kind = MKR_NT_WILDCARD;
|
|
886
|
+
mkr_node_t **p = (mkr_node_t **)mkr_callocarray(1, sizeof(mkr_node_t *));
|
|
887
|
+
if (p == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); goto fail; }
|
|
888
|
+
p[0] = cb_num(B, 1.0);
|
|
889
|
+
if (p[0] == NULL) { free(p); goto fail; }
|
|
890
|
+
fs.predicates = p; fs.npredicates = 1;
|
|
891
|
+
if (steps_push(B, &steps, fs) != 0) { mkr_node_free(p[0]); free(p); goto fail; }
|
|
892
|
+
if (emit_compound_step(B, &steps, MKR_AXIS_SELF, comp.first, comp.last) != 0) goto fail;
|
|
893
|
+
} else {
|
|
894
|
+
mkr_axis_t axis = axis_for_combinator(comb, is_first);
|
|
895
|
+
if (emit_compound_step(B, &steps, axis, comp.first, comp.last) != 0) goto fail;
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
mkr_node_t *path = cb_node(B, MKR_NK_PATH);
|
|
900
|
+
if (path == NULL) goto fail;
|
|
901
|
+
path->u.path.absolute = 0;
|
|
902
|
+
path->u.path.steps = steps.v;
|
|
903
|
+
path->u.path.nsteps = steps.n;
|
|
904
|
+
return path;
|
|
905
|
+
|
|
906
|
+
fail:
|
|
907
|
+
for (size_t i = 0; i < steps.n; i++) mkr_step_clear(&steps.v[i]);
|
|
908
|
+
free(steps.v);
|
|
909
|
+
return NULL;
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
/* Boolean expression: does SELF match the compound [cfirst..clast]? Reuses
|
|
913
|
+
* fold_simple to lower the compound's simples (type -> nodetest, others ->
|
|
914
|
+
* self-relative predicates), then combines as self::<type> AND pred1 AND ...
|
|
915
|
+
* Used by :is()/:where()/:not(). */
|
|
916
|
+
/* The reverse of a forward combinator, for walking from the subject (self) back
|
|
917
|
+
* to the preceding compound. Adjacent (+) is handled separately (two steps). */
|
|
918
|
+
static mkr_axis_t
|
|
919
|
+
reverse_axis(lxb_css_selector_combinator_t c)
|
|
920
|
+
{
|
|
921
|
+
switch (c) {
|
|
922
|
+
case LXB_CSS_SELECTOR_COMBINATOR_CHILD: return MKR_AXIS_PARENT;
|
|
923
|
+
case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING: return MKR_AXIS_PRECEDING_SIBLING; /* ~ */
|
|
924
|
+
case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT:
|
|
925
|
+
default: return MKR_AXIS_ANCESTOR;
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
static mkr_node_t *
|
|
930
|
+
lower_complex_selftest(css_build_t *B, const lxb_css_selector_t *first)
|
|
931
|
+
{
|
|
932
|
+
/* Split the chain into compounds (CLOSE-linked runs), left to right, via the
|
|
933
|
+
* shared splitter so the boundary rule and MKR_CSS_MAX_COMPOUNDS cap match
|
|
934
|
+
* lower_complex's exactly. */
|
|
935
|
+
mkr_css_compound_t comps[MKR_CSS_MAX_COMPOUNDS];
|
|
936
|
+
size_t nc = 0;
|
|
937
|
+
css_compound_iter_t it = { first };
|
|
938
|
+
mkr_css_compound_t comp;
|
|
939
|
+
while (css_compound_next(&it, &comp)) {
|
|
940
|
+
if (nc >= MKR_CSS_MAX_COMPOUNDS) {
|
|
941
|
+
mkr_err_set(B->err, MKR_XPATH_ERR_LIMIT, "CSS selector too complex");
|
|
942
|
+
return NULL;
|
|
943
|
+
}
|
|
944
|
+
comps[nc++] = comp;
|
|
945
|
+
}
|
|
946
|
+
if (nc == 0) { mkr_err_set(B->err, MKR_XPATH_ERR_SYNTAX, "empty CSS selector"); return NULL; }
|
|
947
|
+
|
|
948
|
+
/* Build self::<subject> then reverse back-steps to each earlier compound. The
|
|
949
|
+
* whole path is non-empty (truthy) exactly when self matches the selector. */
|
|
950
|
+
steps_arr_t steps = {0};
|
|
951
|
+
if (emit_compound_step(B, &steps, MKR_AXIS_SELF, comps[nc - 1].first, comps[nc - 1].last) != 0) {
|
|
952
|
+
goto fail;
|
|
953
|
+
}
|
|
954
|
+
for (size_t i = nc - 1; i > 0; i--) {
|
|
955
|
+
lxb_css_selector_combinator_t comb = comps[i].comb; /* connects comps[i] to comps[i-1] */
|
|
956
|
+
if (comb == LXB_CSS_SELECTOR_COMBINATOR_SIBLING) {
|
|
957
|
+
/* reverse adjacent: the immediately-preceding sibling must be comps[i-1] */
|
|
958
|
+
mkr_step_t ps; memset(&ps, 0, sizeof(ps));
|
|
959
|
+
ps.axis = MKR_AXIS_PRECEDING_SIBLING;
|
|
960
|
+
ps.test.kind = MKR_NT_WILDCARD;
|
|
961
|
+
mkr_node_t **p = (mkr_node_t **)mkr_callocarray(1, sizeof(mkr_node_t *));
|
|
962
|
+
if (p == NULL) { mkr_err_set(B->err, MKR_XPATH_ERR_OOM, "oom"); goto fail; }
|
|
963
|
+
p[0] = cb_num(B, 1.0);
|
|
964
|
+
if (p[0] == NULL) { free(p); goto fail; }
|
|
965
|
+
ps.predicates = p; ps.npredicates = 1;
|
|
966
|
+
if (steps_push(B, &steps, ps) != 0) { mkr_node_free(p[0]); free(p); goto fail; }
|
|
967
|
+
if (emit_compound_step(B, &steps, MKR_AXIS_SELF, comps[i - 1].first, comps[i - 1].last) != 0) {
|
|
968
|
+
goto fail;
|
|
969
|
+
}
|
|
970
|
+
} else {
|
|
971
|
+
if (emit_compound_step(B, &steps, reverse_axis(comb),
|
|
972
|
+
comps[i - 1].first, comps[i - 1].last) != 0) {
|
|
973
|
+
goto fail;
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
mkr_node_t *path = cb_node(B, MKR_NK_PATH);
|
|
979
|
+
if (path == NULL) goto fail;
|
|
980
|
+
path->u.path.absolute = 0;
|
|
981
|
+
path->u.path.steps = steps.v;
|
|
982
|
+
path->u.path.nsteps = steps.n;
|
|
983
|
+
return path;
|
|
984
|
+
|
|
985
|
+
fail:
|
|
986
|
+
for (size_t i = 0; i < steps.n; i++) mkr_step_clear(&steps.v[i]);
|
|
987
|
+
free(steps.v);
|
|
988
|
+
return NULL;
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
mkr_node_t *
|
|
992
|
+
mkr_css_compile(mkr_verified_text_t selector, const mkr_css_ns_t *ns,
|
|
993
|
+
mkr_xpath_limits_t *limits, mkr_xpath_error_t *err)
|
|
994
|
+
{
|
|
995
|
+
css_build_t B = { limits, err, ns };
|
|
996
|
+
|
|
997
|
+
if (!css_parser_ready()) {
|
|
998
|
+
mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "failed to initialise CSS parser");
|
|
999
|
+
return NULL;
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
lxb_css_selector_list_t *list =
|
|
1003
|
+
lxb_css_selectors_parse(g_parser, (const lxb_char_t *)selector.ptr, selector.len);
|
|
1004
|
+
if (list == NULL || g_parser->status != LXB_STATUS_OK) {
|
|
1005
|
+
lxb_css_memory_clean(g_mem);
|
|
1006
|
+
lxb_css_parser_clean(g_parser);
|
|
1007
|
+
mkr_err_set(err, MKR_XPATH_ERR_SYNTAX, "invalid CSS selector");
|
|
1008
|
+
return NULL;
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
/* Lower each comma-group (list -> list->next) to a PATH, union them. */
|
|
1012
|
+
mkr_node_t *acc = NULL;
|
|
1013
|
+
for (lxb_css_selector_list_t *g = list; g != NULL; g = g->next) {
|
|
1014
|
+
mkr_node_t *path = lower_complex(&B, g->first, 0); /* top-level: descendant of context */
|
|
1015
|
+
if (path == NULL) { mkr_node_free(acc); acc = NULL; break; }
|
|
1016
|
+
acc = (acc == NULL) ? path : cb_binop(&B, MKR_OP_UNION, acc, path);
|
|
1017
|
+
if (acc == NULL) break; /* cb_binop freed both on failure */
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
lxb_css_memory_clean(g_mem);
|
|
1021
|
+
lxb_css_parser_clean(g_parser);
|
|
1022
|
+
return acc; /* NULL with *err set on failure */
|
|
1023
|
+
}
|