makiri 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +12 -7
- data/CHANGELOG.md +93 -14
- data/README.md +173 -7
- data/Rakefile +103 -7
- data/ext/makiri/bridge/bridge.h +28 -0
- data/ext/makiri/bridge/ruby_string.c +217 -0
- data/ext/makiri/core/mkr_alloc.h +1 -1
- data/ext/makiri/core/mkr_buf.c +35 -1
- data/ext/makiri/core/mkr_buf.h +37 -3
- data/ext/makiri/core/mkr_core.h +1 -1
- data/ext/makiri/core/mkr_hash.h +1 -1
- data/ext/makiri/core/mkr_text.h +8 -8
- data/ext/makiri/extconf.rb +20 -2
- data/ext/makiri/glue/glue.h +47 -11
- data/ext/makiri/glue/ruby_doc.c +117 -43
- data/ext/makiri/glue/ruby_html_css.c +246 -0
- data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +242 -51
- data/ext/makiri/glue/ruby_html_node.c +888 -0
- data/ext/makiri/glue/ruby_html_serialize.c +154 -0
- data/ext/makiri/glue/ruby_node.c +54 -748
- data/ext/makiri/glue/ruby_node_set.c +167 -32
- data/ext/makiri/glue/ruby_xml.c +420 -0
- data/ext/makiri/glue/ruby_xml_node.c +1386 -0
- data/ext/makiri/glue/ruby_xpath.c +59 -26
- data/ext/makiri/glue/ruby_xpath.h +19 -0
- data/ext/makiri/lexbor_compat/compat.h +42 -9
- data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
- data/ext/makiri/lexbor_compat/dom_index.c +2 -2
- data/ext/makiri/lexbor_compat/post_parse.c +100 -10
- data/ext/makiri/lexbor_compat/source_loc.c +13 -9
- data/ext/makiri/lexbor_compat/text_index.c +14 -8
- data/ext/makiri/lexbor_compat/utf8_input.c +85 -26
- data/ext/makiri/makiri.c +139 -6
- data/ext/makiri/makiri.h +43 -2
- data/ext/makiri/xml/mkr_xml.h +126 -0
- data/ext/makiri/xml/mkr_xml_chars.c +225 -0
- data/ext/makiri/xml/mkr_xml_mutate.c +875 -0
- data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
- data/ext/makiri/xml/mkr_xml_node.c +267 -0
- data/ext/makiri/xml/mkr_xml_node.h +119 -0
- data/ext/makiri/xml/mkr_xml_tree.c +1479 -0
- data/ext/makiri/xpath/mkr_xpath.c +59 -32
- data/ext/makiri/xpath/mkr_xpath.h +96 -4
- data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
- data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
- data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +202 -175
- data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +110 -86
- data/ext/makiri/xpath/mkr_xpath_internal.h +91 -200
- data/ext/makiri/xpath/mkr_xpath_lex.c +2 -2
- data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
- data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +142 -0
- data/ext/makiri/xpath/mkr_xpath_parse.c +5 -5
- data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
- data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
- data/ext/makiri/xpath/mkr_xpath_shared.c +593 -0
- data/ext/makiri/xpath/{mkr_xpath_value.c → mkr_xpath_value_body.h} +145 -656
- data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
- data/lib/makiri/{attribute.rb → attr.rb} +7 -3
- data/lib/makiri/cdata_section.rb +21 -0
- data/lib/makiri/comment.rb +12 -0
- data/lib/makiri/compat_aliases.rb +30 -0
- data/lib/makiri/document.rb +4 -76
- data/lib/makiri/document_fragment.rb +14 -9
- data/lib/makiri/element.rb +5 -3
- data/lib/makiri/html/document.rb +106 -0
- data/lib/makiri/html/node_methods.rb +19 -0
- data/lib/makiri/html.rb +12 -0
- data/lib/makiri/node.rb +58 -15
- data/lib/makiri/node_set.rb +8 -0
- data/lib/makiri/processing_instruction.rb +12 -0
- data/lib/makiri/text.rb +2 -0
- data/lib/makiri/version.rb +1 -1
- data/lib/makiri/xml/document.rb +24 -0
- data/lib/makiri/xml/node_methods.rb +37 -0
- data/lib/makiri/xml.rb +10 -0
- data/lib/makiri/xpath_context.rb +1 -1
- data/lib/makiri.rb +23 -5
- data/script/build_native_gem.rb +2 -2
- data/script/check_c_safety.rb +32 -0
- data/script/check_c_safety_allowlist.yml +83 -0
- metadata +35 -9
- data/ext/makiri/glue/ruby_css.c +0 -185
- data/ext/makiri/glue/ruby_serialize.c +0 -92
- data/lib/makiri/cdata.rb +0 -6
|
@@ -0,0 +1,593 @@
|
|
|
1
|
+
/* mkr_xpath_shared.c - the representation-independent engine primitives.
|
|
2
|
+
*
|
|
3
|
+
* Compiled exactly ONCE (one normal .c, not a monomorphized body): none of
|
|
4
|
+
* these functions dereferences a DOM node. They move node *pointers* (node-set
|
|
5
|
+
* build/clone/free), own/compare engine strings, manage the per-eval
|
|
6
|
+
* string-value cache and document-order-index lifecycles, and walk/destroy the
|
|
7
|
+
* AST. A pointer is a pointer whichever representation it points at, so the
|
|
8
|
+
* machine code is identical for HTML and XML -- one shared copy, not two.
|
|
9
|
+
*
|
|
10
|
+
* Contrast the engine bodies (mkr_xpath_{value,funcs,eval}_body.h): those are
|
|
11
|
+
* .h files precisely because they ARE compiled twice (mkr_xpath_engine_html.c /
|
|
12
|
+
* _xml.c include them with MKR_NODE_* bound to each representation). This file
|
|
13
|
+
* is compiled once, so its code lives directly in the .c -- there is nothing to
|
|
14
|
+
* include twice. mkr_xpath_internal.h is included WITHOUT a prelude, so
|
|
15
|
+
* MKR_DOM_NODE stays the neutral `void` default; the node pointers below are
|
|
16
|
+
* never dereferenced, so void* is exact.
|
|
17
|
+
*
|
|
18
|
+
* The driver (mkr_xpath.c), the parser/lexer, AND both engine instances call
|
|
19
|
+
* these by their bare names. Three are extern rather than file-static
|
|
20
|
+
* (mkr_pointer_hash, mkr_str_cache_index_put, mkr_str_cache_reindex): the
|
|
21
|
+
* string-value cache splits its pure index bookkeeping (here) from its
|
|
22
|
+
* node-dereferencing insert (mkr_get_cached_node_text, in the per-instance
|
|
23
|
+
* value body), so both sides share the one index implementation.
|
|
24
|
+
*/
|
|
25
|
+
#include "mkr_xpath_internal.h"
|
|
26
|
+
#include "../core/mkr_core.h"
|
|
27
|
+
|
|
28
|
+
#include <stdint.h>
|
|
29
|
+
#include <stdlib.h>
|
|
30
|
+
#include <string.h>
|
|
31
|
+
|
|
32
|
+
/* ---------- pointer hash (shared by str-cache + doc-order index) ---------- */
|
|
33
|
+
|
|
34
|
+
uint32_t
|
|
35
|
+
mkr_pointer_hash(const void *p)
|
|
36
|
+
{
|
|
37
|
+
uintptr_t x = (uintptr_t)p;
|
|
38
|
+
/* SplitMix-style mixing - cheap and good enough for pointer keys. */
|
|
39
|
+
x = (x ^ (x >> 16)) * 0x9E3779B9u;
|
|
40
|
+
x = (x ^ (x >> 13)) * 0x85EBCA6Bu;
|
|
41
|
+
return (uint32_t)(x ^ (x >> 16));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/* ---------- node-set ---------- */
|
|
45
|
+
|
|
46
|
+
void
|
|
47
|
+
mkr_nodeset_init(mkr_nodeset_t *ns)
|
|
48
|
+
{
|
|
49
|
+
ns->items = NULL;
|
|
50
|
+
ns->count = 0;
|
|
51
|
+
ns->capacity = 0;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
int
|
|
55
|
+
mkr_nodeset_push(mkr_nodeset_t *ns, MKR_DOM_NODE *node,
|
|
56
|
+
mkr_xpath_limits_t *limits, mkr_xpath_error_t *err)
|
|
57
|
+
{
|
|
58
|
+
if (node == NULL) return 0;
|
|
59
|
+
if (limits != NULL && mkr_limit_check_nodeset_size(limits, ns->count + 1, err) != 0) {
|
|
60
|
+
return -1;
|
|
61
|
+
}
|
|
62
|
+
if (mkr_grow_reserve((void **)&ns->items, &ns->capacity, ns->count + 1,
|
|
63
|
+
sizeof(*ns->items)) != MKR_OK) {
|
|
64
|
+
mkr_err_set(err, MKR_XPATH_ERR_OOM, "out of memory growing node-set");
|
|
65
|
+
return -1;
|
|
66
|
+
}
|
|
67
|
+
ns->items[ns->count++] = node;
|
|
68
|
+
return 0;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
void
|
|
72
|
+
mkr_nodeset_clear(mkr_nodeset_t *ns)
|
|
73
|
+
{
|
|
74
|
+
if (ns == NULL) return;
|
|
75
|
+
free(ns->items);
|
|
76
|
+
ns->items = NULL;
|
|
77
|
+
ns->count = 0;
|
|
78
|
+
ns->capacity = 0;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/* ---------- owned / borrowed text ---------- */
|
|
82
|
+
|
|
83
|
+
void
|
|
84
|
+
mkr_owned_text_init(mkr_owned_text_t *t)
|
|
85
|
+
{
|
|
86
|
+
if (t == NULL) return;
|
|
87
|
+
t->ptr = NULL;
|
|
88
|
+
t->len = 0;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
void
|
|
92
|
+
mkr_owned_text_clear(mkr_owned_text_t *t)
|
|
93
|
+
{
|
|
94
|
+
if (t == NULL) return;
|
|
95
|
+
free(t->ptr);
|
|
96
|
+
t->ptr = NULL;
|
|
97
|
+
t->len = 0;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
int
|
|
101
|
+
mkr_borrowed_text_eq(mkr_borrowed_text_t a, mkr_borrowed_text_t b)
|
|
102
|
+
{
|
|
103
|
+
if (a.ptr == NULL || b.ptr == NULL) return a.ptr == b.ptr;
|
|
104
|
+
return a.len == b.len && memcmp(a.ptr, b.ptr, a.len) == 0;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/* Copy an already-valid borrowed text into owned storage. Taking
|
|
108
|
+
* mkr_borrowed_text_t (not raw char*+len) keeps the type contract: an
|
|
109
|
+
* mkr_owned_text_t can only be minted from text the caller has asserted valid
|
|
110
|
+
* (via mkr_borrowed_text / mkr_borrowed_text_from_verified /
|
|
111
|
+
* mkr_borrowed_text_from_owned), so every raw-bytes -> text entry point is
|
|
112
|
+
* greppable. */
|
|
113
|
+
int
|
|
114
|
+
mkr_owned_text_from_borrowed_copy(mkr_owned_text_t *out, mkr_borrowed_text_t t,
|
|
115
|
+
mkr_xpath_error_t *err, const char *what)
|
|
116
|
+
{
|
|
117
|
+
if (out == NULL) {
|
|
118
|
+
mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_owned_text_from_borrowed_copy: bad args");
|
|
119
|
+
return -1;
|
|
120
|
+
}
|
|
121
|
+
mkr_owned_text_init(out);
|
|
122
|
+
const char *s = t.ptr ? t.ptr : "";
|
|
123
|
+
size_t len = t.ptr ? t.len : 0;
|
|
124
|
+
char *p = mkr_strndup(s, len);
|
|
125
|
+
if (p == NULL) {
|
|
126
|
+
mkr_err_set(err, MKR_XPATH_ERR_OOM, what ? what : "out of memory copying text");
|
|
127
|
+
return -1;
|
|
128
|
+
}
|
|
129
|
+
out->ptr = p;
|
|
130
|
+
out->len = len;
|
|
131
|
+
return 0;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/* ---------- value ---------- */
|
|
135
|
+
|
|
136
|
+
void
|
|
137
|
+
mkr_val_clear(mkr_val_t *v)
|
|
138
|
+
{
|
|
139
|
+
if (v == NULL) return;
|
|
140
|
+
switch (v->type) {
|
|
141
|
+
case MKR_XPATH_TYPE_NODESET:
|
|
142
|
+
mkr_nodeset_clear(&v->u.nodeset);
|
|
143
|
+
break;
|
|
144
|
+
case MKR_XPATH_TYPE_STRING:
|
|
145
|
+
mkr_owned_text_clear(&v->u.string);
|
|
146
|
+
break;
|
|
147
|
+
default:
|
|
148
|
+
break;
|
|
149
|
+
}
|
|
150
|
+
memset(v, 0, sizeof(*v));
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
void
|
|
154
|
+
mkr_val_set_owned_text(mkr_val_t *v, mkr_owned_text_t text)
|
|
155
|
+
{
|
|
156
|
+
if (v == NULL) return;
|
|
157
|
+
v->type = MKR_XPATH_TYPE_STRING;
|
|
158
|
+
v->u.string = text;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/* Set +v+ to a STRING by copying a borrowed view: the engine allocates and owns
|
|
162
|
+
* the copy. This is how callers outside the engine (the glue handler bridge)
|
|
163
|
+
* hand a string into a value - they pass what they have, a borrowed slice, and
|
|
164
|
+
* never construct an mkr_owned_text_t themselves. Keeping the copy-and-own step
|
|
165
|
+
* here keeps allocation and freeing of owned strings in one layer. Returns 0 on
|
|
166
|
+
* success, -1 on OOM (err populated; +v+ left untouched). */
|
|
167
|
+
int
|
|
168
|
+
mkr_val_set_borrowed_text_copy(mkr_val_t *v, mkr_borrowed_text_t text,
|
|
169
|
+
mkr_xpath_error_t *err, const char *what)
|
|
170
|
+
{
|
|
171
|
+
if (v == NULL) {
|
|
172
|
+
mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_val_set_borrowed_text_copy: bad args");
|
|
173
|
+
return -1;
|
|
174
|
+
}
|
|
175
|
+
mkr_owned_text_t owned;
|
|
176
|
+
if (mkr_owned_text_from_borrowed_copy(&owned, text, err, what) != 0) {
|
|
177
|
+
return -1;
|
|
178
|
+
}
|
|
179
|
+
mkr_val_set_owned_text(v, owned);
|
|
180
|
+
return 0;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/* ---------- per-evaluate document-order index (lifecycle) ---------- */
|
|
184
|
+
|
|
185
|
+
void
|
|
186
|
+
mkr_doc_order_index_init(mkr_doc_order_index_t *idx)
|
|
187
|
+
{
|
|
188
|
+
idx->buckets = NULL;
|
|
189
|
+
idx->cap = 0;
|
|
190
|
+
idx->count = 0;
|
|
191
|
+
idx->built = 0;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
void
|
|
195
|
+
mkr_doc_order_index_clear(mkr_doc_order_index_t *idx)
|
|
196
|
+
{
|
|
197
|
+
if (idx == NULL) return;
|
|
198
|
+
free(idx->buckets);
|
|
199
|
+
idx->buckets = NULL;
|
|
200
|
+
idx->cap = 0;
|
|
201
|
+
idx->count = 0;
|
|
202
|
+
idx->built = 0;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/* ---------- per-evaluation string-value cache (lifecycle + index) ---------- */
|
|
206
|
+
|
|
207
|
+
void
|
|
208
|
+
mkr_str_cache_init(mkr_str_cache_t *c)
|
|
209
|
+
{
|
|
210
|
+
c->entries = NULL;
|
|
211
|
+
c->count = 0;
|
|
212
|
+
c->cap = 0;
|
|
213
|
+
c->buckets = NULL;
|
|
214
|
+
c->bucket_cap = 0;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/* Insert entry index `idx` (keyed by entries[idx].node) into the index. The
|
|
218
|
+
* index must have room (callers grow/rehash first). Extern: shared by the pure
|
|
219
|
+
* reindex below and the node-dereferencing insert in the per-instance body. */
|
|
220
|
+
void
|
|
221
|
+
mkr_str_cache_index_put(mkr_str_cache_t *c, size_t idx)
|
|
222
|
+
{
|
|
223
|
+
size_t mask = c->bucket_cap - 1;
|
|
224
|
+
size_t j = mkr_pointer_hash(c->entries[idx].node) & mask;
|
|
225
|
+
while (c->buckets[j] != 0) {
|
|
226
|
+
j = (j + 1) & mask;
|
|
227
|
+
}
|
|
228
|
+
c->buckets[j] = idx + 1;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/* Rebuild the index from entries[0, count). Returns -1 on OOM. Extern: see above. */
|
|
232
|
+
int
|
|
233
|
+
mkr_str_cache_reindex(mkr_str_cache_t *c, size_t bucket_cap)
|
|
234
|
+
{
|
|
235
|
+
size_t *buckets = mkr_callocarray(bucket_cap, sizeof(*buckets));
|
|
236
|
+
if (buckets == NULL) return -1;
|
|
237
|
+
free(c->buckets);
|
|
238
|
+
c->buckets = buckets;
|
|
239
|
+
c->bucket_cap = bucket_cap;
|
|
240
|
+
for (size_t i = 0; i < c->count; ++i) {
|
|
241
|
+
mkr_str_cache_index_put(c, i);
|
|
242
|
+
}
|
|
243
|
+
return 0;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
void
|
|
247
|
+
mkr_str_cache_truncate(mkr_str_cache_t *c, size_t target_count)
|
|
248
|
+
{
|
|
249
|
+
if (c == NULL || target_count >= c->count) return;
|
|
250
|
+
for (size_t i = target_count; i < c->count; ++i) {
|
|
251
|
+
free(c->entries[i].str);
|
|
252
|
+
}
|
|
253
|
+
c->count = target_count;
|
|
254
|
+
/* Drop the removed nodes from the index. A full truncate just clears it;
|
|
255
|
+
* a partial one (nested-eval snapshot restore) rebuilds from what remains. */
|
|
256
|
+
if (c->buckets != NULL) {
|
|
257
|
+
if (target_count == 0) {
|
|
258
|
+
size_t buckets_bytes;
|
|
259
|
+
if (!mkr_size_mul(c->bucket_cap, sizeof(*c->buckets), &buckets_bytes)) {
|
|
260
|
+
free(c->buckets);
|
|
261
|
+
c->buckets = NULL;
|
|
262
|
+
c->bucket_cap = 0;
|
|
263
|
+
return;
|
|
264
|
+
}
|
|
265
|
+
memset(c->buckets, 0, buckets_bytes);
|
|
266
|
+
} else {
|
|
267
|
+
mkr_str_cache_reindex(c, c->bucket_cap);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
void
|
|
273
|
+
mkr_str_cache_clear(mkr_str_cache_t *c)
|
|
274
|
+
{
|
|
275
|
+
if (c == NULL) return;
|
|
276
|
+
for (size_t i = 0; i < c->count; ++i) {
|
|
277
|
+
free(c->entries[i].str);
|
|
278
|
+
}
|
|
279
|
+
free(c->entries);
|
|
280
|
+
free(c->buckets);
|
|
281
|
+
c->entries = NULL;
|
|
282
|
+
c->count = 0;
|
|
283
|
+
c->cap = 0;
|
|
284
|
+
c->buckets = NULL;
|
|
285
|
+
c->bucket_cap = 0;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/* ---------- AST destructors ---------- */
|
|
289
|
+
|
|
290
|
+
void
|
|
291
|
+
mkr_step_clear(mkr_step_t *s)
|
|
292
|
+
{
|
|
293
|
+
if (s == NULL) return;
|
|
294
|
+
mkr_owned_text_clear(&s->test.prefix);
|
|
295
|
+
mkr_owned_text_clear(&s->test.local);
|
|
296
|
+
mkr_owned_text_clear(&s->test.pi_target);
|
|
297
|
+
for (size_t i = 0; i < s->npredicates; ++i) {
|
|
298
|
+
mkr_node_free(s->predicates[i]);
|
|
299
|
+
}
|
|
300
|
+
free(s->predicates);
|
|
301
|
+
memset(s, 0, sizeof(*s));
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/* ---------- AST hoisting helpers ---------- */
|
|
305
|
+
|
|
306
|
+
/* Pure XPath 1.0 built-ins safe to hoist when all args are CI. Listed
|
|
307
|
+
* explicitly to keep the set conservative. Functions that read the
|
|
308
|
+
* context node (last/position, 0-arg string/normalize-space/local-
|
|
309
|
+
* name/etc., lang) or that may depend on dynamic state (id, handler-
|
|
310
|
+
* routed) are intentionally absent. */
|
|
311
|
+
static int
|
|
312
|
+
is_pure_builtin_name(const char *name, size_t nargs)
|
|
313
|
+
{
|
|
314
|
+
if (name == NULL) return 0;
|
|
315
|
+
/* 0-arg only - these read no input. */
|
|
316
|
+
if (nargs == 0) {
|
|
317
|
+
return strcmp(name, "true") == 0 || strcmp(name, "false") == 0;
|
|
318
|
+
}
|
|
319
|
+
/* n-arg pure functions - all args must themselves be CI (checked
|
|
320
|
+
* by the caller). */
|
|
321
|
+
static const char *pure_names[] = {
|
|
322
|
+
"count", "string-length", "number", "boolean", "not",
|
|
323
|
+
"floor", "ceiling", "round", "sum",
|
|
324
|
+
"concat", "starts-with", "contains",
|
|
325
|
+
"substring-before", "substring-after", "substring",
|
|
326
|
+
"translate",
|
|
327
|
+
NULL,
|
|
328
|
+
};
|
|
329
|
+
for (size_t i = 0; pure_names[i]; ++i) {
|
|
330
|
+
if (strcmp(pure_names[i], name) == 0) return 1;
|
|
331
|
+
}
|
|
332
|
+
return 0;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
static void
|
|
336
|
+
mark_step_predicates(mkr_step_t *s)
|
|
337
|
+
{
|
|
338
|
+
for (size_t i = 0; i < s->npredicates; ++i) {
|
|
339
|
+
mkr_mark_context_independent(s->predicates[i]);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
void
|
|
344
|
+
mkr_mark_context_independent(mkr_node_t *n)
|
|
345
|
+
{
|
|
346
|
+
if (n == NULL) return;
|
|
347
|
+
int ci = 0;
|
|
348
|
+
switch (n->kind) {
|
|
349
|
+
case MKR_NK_LITERAL_STR:
|
|
350
|
+
case MKR_NK_LITERAL_NUM:
|
|
351
|
+
ci = 1;
|
|
352
|
+
break;
|
|
353
|
+
case MKR_NK_VARREF:
|
|
354
|
+
/* Conservative: variables not hoisted even though XPath 1.0 says
|
|
355
|
+
* they're fixed per evaluation. */
|
|
356
|
+
ci = 0;
|
|
357
|
+
break;
|
|
358
|
+
case MKR_NK_FNCALL: {
|
|
359
|
+
/* Recurse first so subtrees get their own CI marks even when this
|
|
360
|
+
* call itself is not hoistable. */
|
|
361
|
+
for (size_t i = 0; i < n->u.fncall.nargs; ++i) {
|
|
362
|
+
mkr_mark_context_independent(n->u.fncall.args[i]);
|
|
363
|
+
}
|
|
364
|
+
if (n->u.fncall.prefix.ptr != NULL) {
|
|
365
|
+
ci = 0; /* Handler-routed or namespaced builtins → non-CI. */
|
|
366
|
+
break;
|
|
367
|
+
}
|
|
368
|
+
if (!is_pure_builtin_name(n->u.fncall.name.ptr, n->u.fncall.nargs)) {
|
|
369
|
+
ci = 0;
|
|
370
|
+
break;
|
|
371
|
+
}
|
|
372
|
+
ci = 1;
|
|
373
|
+
for (size_t i = 0; i < n->u.fncall.nargs; ++i) {
|
|
374
|
+
if (!n->u.fncall.args[i]->is_context_independent) { ci = 0; break; }
|
|
375
|
+
}
|
|
376
|
+
break;
|
|
377
|
+
}
|
|
378
|
+
case MKR_NK_UNARY:
|
|
379
|
+
mkr_mark_context_independent(n->u.unary.expr);
|
|
380
|
+
ci = n->u.unary.expr ? n->u.unary.expr->is_context_independent : 0;
|
|
381
|
+
break;
|
|
382
|
+
case MKR_NK_BINOP:
|
|
383
|
+
mkr_mark_context_independent(n->u.binop.lhs);
|
|
384
|
+
mkr_mark_context_independent(n->u.binop.rhs);
|
|
385
|
+
ci = (n->u.binop.lhs && n->u.binop.lhs->is_context_independent)
|
|
386
|
+
&& (n->u.binop.rhs && n->u.binop.rhs->is_context_independent);
|
|
387
|
+
break;
|
|
388
|
+
case MKR_NK_PATH:
|
|
389
|
+
/* Absolute path is CI: seed is the document root regardless of
|
|
390
|
+
* outer context. Relative paths use the outer context node and
|
|
391
|
+
* are not hoistable. Predicates inside the path are evaluated
|
|
392
|
+
* against the path's own context, so their position()/last() do
|
|
393
|
+
* not leak - recurse so any pure sub-expressions still get marks. */
|
|
394
|
+
ci = n->u.path.absolute ? 1 : 0;
|
|
395
|
+
for (size_t i = 0; i < n->u.path.nsteps; ++i) {
|
|
396
|
+
mark_step_predicates(&n->u.path.steps[i]);
|
|
397
|
+
}
|
|
398
|
+
break;
|
|
399
|
+
case MKR_NK_FILTER:
|
|
400
|
+
/* Conservative: filter expressions are not hoisted in v1. */
|
|
401
|
+
ci = 0;
|
|
402
|
+
mkr_mark_context_independent(n->u.filter.expr);
|
|
403
|
+
for (size_t i = 0; i < n->u.filter.npreds; ++i) {
|
|
404
|
+
mkr_mark_context_independent(n->u.filter.preds[i]);
|
|
405
|
+
}
|
|
406
|
+
for (size_t i = 0; i < n->u.filter.npath; ++i) {
|
|
407
|
+
mark_step_predicates(&n->u.filter.path_steps[i]);
|
|
408
|
+
}
|
|
409
|
+
break;
|
|
410
|
+
}
|
|
411
|
+
n->is_context_independent = (uint8_t)ci;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
static void
|
|
415
|
+
clear_memos_step(mkr_step_t *s)
|
|
416
|
+
{
|
|
417
|
+
for (size_t i = 0; i < s->npredicates; ++i) {
|
|
418
|
+
mkr_node_clear_memos(s->predicates[i]);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/* ---------- peephole: //X fusion ---------- */
|
|
423
|
+
|
|
424
|
+
/*
|
|
425
|
+
* Collapse pairs of consecutive steps:
|
|
426
|
+
* (axis=descendant-or-self, test=node(), no predicates)
|
|
427
|
+
* (axis=child, test=*, no predicates)
|
|
428
|
+
* into a single
|
|
429
|
+
* (axis=descendant, test=*, no predicates)
|
|
430
|
+
*
|
|
431
|
+
* The fusion is safe per XPath 1.0 only when the child step has no
|
|
432
|
+
* predicates: otherwise '//X[1]' would change meaning ("first X per
|
|
433
|
+
* parent" vs "first X in doc order"). The synthesised // step always
|
|
434
|
+
* has no predicates by construction, so we don't need to check the
|
|
435
|
+
* first step's predicate list - only the child step's.
|
|
436
|
+
*/
|
|
437
|
+
static void
|
|
438
|
+
fuse_descendant_or_self_steps(mkr_step_t *steps, size_t *nsteps_ptr)
|
|
439
|
+
{
|
|
440
|
+
if (steps == NULL || *nsteps_ptr < 2) return;
|
|
441
|
+
size_t nsteps = *nsteps_ptr;
|
|
442
|
+
size_t w = 0, r = 0;
|
|
443
|
+
while (r < nsteps) {
|
|
444
|
+
if (r + 1 < nsteps
|
|
445
|
+
&& steps[r].axis == MKR_AXIS_DESCENDANT_OR_SELF
|
|
446
|
+
&& steps[r].test.kind == MKR_NT_NODE
|
|
447
|
+
&& steps[r].test.prefix.ptr == NULL
|
|
448
|
+
&& steps[r].npredicates == 0
|
|
449
|
+
&& steps[r + 1].axis == MKR_AXIS_CHILD
|
|
450
|
+
&& steps[r + 1].npredicates == 0) {
|
|
451
|
+
/* Drop the desc-or-self step and promote the child step. */
|
|
452
|
+
mkr_step_clear(&steps[r]);
|
|
453
|
+
steps[w] = steps[r + 1];
|
|
454
|
+
memset(&steps[r + 1], 0, sizeof(steps[r + 1]));
|
|
455
|
+
steps[w].axis = MKR_AXIS_DESCENDANT;
|
|
456
|
+
w++;
|
|
457
|
+
r += 2;
|
|
458
|
+
} else {
|
|
459
|
+
if (w != r) {
|
|
460
|
+
steps[w] = steps[r];
|
|
461
|
+
memset(&steps[r], 0, sizeof(steps[r]));
|
|
462
|
+
}
|
|
463
|
+
w++;
|
|
464
|
+
r++;
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
*nsteps_ptr = w;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
void
|
|
471
|
+
mkr_apply_peephole(mkr_node_t *n)
|
|
472
|
+
{
|
|
473
|
+
if (n == NULL) return;
|
|
474
|
+
switch (n->kind) {
|
|
475
|
+
case MKR_NK_FNCALL:
|
|
476
|
+
for (size_t i = 0; i < n->u.fncall.nargs; ++i) mkr_apply_peephole(n->u.fncall.args[i]);
|
|
477
|
+
break;
|
|
478
|
+
case MKR_NK_UNARY:
|
|
479
|
+
mkr_apply_peephole(n->u.unary.expr);
|
|
480
|
+
break;
|
|
481
|
+
case MKR_NK_BINOP:
|
|
482
|
+
mkr_apply_peephole(n->u.binop.lhs);
|
|
483
|
+
mkr_apply_peephole(n->u.binop.rhs);
|
|
484
|
+
break;
|
|
485
|
+
case MKR_NK_PATH:
|
|
486
|
+
fuse_descendant_or_self_steps(n->u.path.steps, &n->u.path.nsteps);
|
|
487
|
+
for (size_t i = 0; i < n->u.path.nsteps; ++i) {
|
|
488
|
+
for (size_t j = 0; j < n->u.path.steps[i].npredicates; ++j) {
|
|
489
|
+
mkr_apply_peephole(n->u.path.steps[i].predicates[j]);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
break;
|
|
493
|
+
case MKR_NK_FILTER:
|
|
494
|
+
mkr_apply_peephole(n->u.filter.expr);
|
|
495
|
+
for (size_t i = 0; i < n->u.filter.npreds; ++i) mkr_apply_peephole(n->u.filter.preds[i]);
|
|
496
|
+
fuse_descendant_or_self_steps(n->u.filter.path_steps, &n->u.filter.npath);
|
|
497
|
+
for (size_t i = 0; i < n->u.filter.npath; ++i) {
|
|
498
|
+
for (size_t j = 0; j < n->u.filter.path_steps[i].npredicates; ++j) {
|
|
499
|
+
mkr_apply_peephole(n->u.filter.path_steps[i].predicates[j]);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
break;
|
|
503
|
+
default:
|
|
504
|
+
break;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
void
|
|
509
|
+
mkr_node_clear_memos(mkr_node_t *n)
|
|
510
|
+
{
|
|
511
|
+
if (n == NULL) return;
|
|
512
|
+
if (n->memoized) {
|
|
513
|
+
mkr_val_clear(&n->memo_value);
|
|
514
|
+
n->memoized = 0;
|
|
515
|
+
}
|
|
516
|
+
switch (n->kind) {
|
|
517
|
+
case MKR_NK_FNCALL:
|
|
518
|
+
for (size_t i = 0; i < n->u.fncall.nargs; ++i) mkr_node_clear_memos(n->u.fncall.args[i]);
|
|
519
|
+
break;
|
|
520
|
+
case MKR_NK_UNARY:
|
|
521
|
+
mkr_node_clear_memos(n->u.unary.expr);
|
|
522
|
+
break;
|
|
523
|
+
case MKR_NK_BINOP:
|
|
524
|
+
mkr_node_clear_memos(n->u.binop.lhs);
|
|
525
|
+
mkr_node_clear_memos(n->u.binop.rhs);
|
|
526
|
+
break;
|
|
527
|
+
case MKR_NK_PATH:
|
|
528
|
+
for (size_t i = 0; i < n->u.path.nsteps; ++i) clear_memos_step(&n->u.path.steps[i]);
|
|
529
|
+
break;
|
|
530
|
+
case MKR_NK_FILTER:
|
|
531
|
+
mkr_node_clear_memos(n->u.filter.expr);
|
|
532
|
+
for (size_t i = 0; i < n->u.filter.npreds; ++i) mkr_node_clear_memos(n->u.filter.preds[i]);
|
|
533
|
+
for (size_t i = 0; i < n->u.filter.npath; ++i) clear_memos_step(&n->u.filter.path_steps[i]);
|
|
534
|
+
break;
|
|
535
|
+
default:
|
|
536
|
+
break;
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
void
|
|
541
|
+
mkr_node_free(mkr_node_t *n)
|
|
542
|
+
{
|
|
543
|
+
if (n == NULL) return;
|
|
544
|
+
/* Free any memoized value first (idempotent). */
|
|
545
|
+
if (n->memoized) {
|
|
546
|
+
mkr_val_clear(&n->memo_value);
|
|
547
|
+
n->memoized = 0;
|
|
548
|
+
}
|
|
549
|
+
switch (n->kind) {
|
|
550
|
+
case MKR_NK_LITERAL_STR:
|
|
551
|
+
mkr_owned_text_clear(&n->u.literal);
|
|
552
|
+
break;
|
|
553
|
+
case MKR_NK_LITERAL_NUM:
|
|
554
|
+
break;
|
|
555
|
+
case MKR_NK_VARREF:
|
|
556
|
+
mkr_owned_text_clear(&n->u.varref.prefix);
|
|
557
|
+
mkr_owned_text_clear(&n->u.varref.name);
|
|
558
|
+
break;
|
|
559
|
+
case MKR_NK_FNCALL:
|
|
560
|
+
mkr_owned_text_clear(&n->u.fncall.prefix);
|
|
561
|
+
mkr_owned_text_clear(&n->u.fncall.name);
|
|
562
|
+
for (size_t i = 0; i < n->u.fncall.nargs; ++i) {
|
|
563
|
+
mkr_node_free(n->u.fncall.args[i]);
|
|
564
|
+
}
|
|
565
|
+
free(n->u.fncall.args);
|
|
566
|
+
break;
|
|
567
|
+
case MKR_NK_UNARY:
|
|
568
|
+
mkr_node_free(n->u.unary.expr);
|
|
569
|
+
break;
|
|
570
|
+
case MKR_NK_BINOP:
|
|
571
|
+
mkr_node_free(n->u.binop.lhs);
|
|
572
|
+
mkr_node_free(n->u.binop.rhs);
|
|
573
|
+
break;
|
|
574
|
+
case MKR_NK_PATH:
|
|
575
|
+
for (size_t i = 0; i < n->u.path.nsteps; ++i) {
|
|
576
|
+
mkr_step_clear(&n->u.path.steps[i]);
|
|
577
|
+
}
|
|
578
|
+
free(n->u.path.steps);
|
|
579
|
+
break;
|
|
580
|
+
case MKR_NK_FILTER:
|
|
581
|
+
mkr_node_free(n->u.filter.expr);
|
|
582
|
+
for (size_t i = 0; i < n->u.filter.npreds; ++i) {
|
|
583
|
+
mkr_node_free(n->u.filter.preds[i]);
|
|
584
|
+
}
|
|
585
|
+
free(n->u.filter.preds);
|
|
586
|
+
for (size_t i = 0; i < n->u.filter.npath; ++i) {
|
|
587
|
+
mkr_step_clear(&n->u.filter.path_steps[i]);
|
|
588
|
+
}
|
|
589
|
+
free(n->u.filter.path_steps);
|
|
590
|
+
break;
|
|
591
|
+
}
|
|
592
|
+
free(n);
|
|
593
|
+
}
|