makiri 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +12 -7
  3. data/CHANGELOG.md +93 -14
  4. data/README.md +173 -7
  5. data/Rakefile +103 -7
  6. data/ext/makiri/bridge/bridge.h +28 -0
  7. data/ext/makiri/bridge/ruby_string.c +217 -0
  8. data/ext/makiri/core/mkr_alloc.h +1 -1
  9. data/ext/makiri/core/mkr_buf.c +35 -1
  10. data/ext/makiri/core/mkr_buf.h +37 -3
  11. data/ext/makiri/core/mkr_core.h +1 -1
  12. data/ext/makiri/core/mkr_hash.h +1 -1
  13. data/ext/makiri/core/mkr_text.h +8 -8
  14. data/ext/makiri/extconf.rb +20 -2
  15. data/ext/makiri/glue/glue.h +47 -11
  16. data/ext/makiri/glue/ruby_doc.c +117 -43
  17. data/ext/makiri/glue/ruby_html_css.c +246 -0
  18. data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +242 -51
  19. data/ext/makiri/glue/ruby_html_node.c +888 -0
  20. data/ext/makiri/glue/ruby_html_serialize.c +154 -0
  21. data/ext/makiri/glue/ruby_node.c +54 -748
  22. data/ext/makiri/glue/ruby_node_set.c +167 -32
  23. data/ext/makiri/glue/ruby_xml.c +420 -0
  24. data/ext/makiri/glue/ruby_xml_node.c +1386 -0
  25. data/ext/makiri/glue/ruby_xpath.c +59 -26
  26. data/ext/makiri/glue/ruby_xpath.h +19 -0
  27. data/ext/makiri/lexbor_compat/compat.h +42 -9
  28. data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
  29. data/ext/makiri/lexbor_compat/dom_index.c +2 -2
  30. data/ext/makiri/lexbor_compat/post_parse.c +100 -10
  31. data/ext/makiri/lexbor_compat/source_loc.c +13 -9
  32. data/ext/makiri/lexbor_compat/text_index.c +14 -8
  33. data/ext/makiri/lexbor_compat/utf8_input.c +85 -26
  34. data/ext/makiri/makiri.c +139 -6
  35. data/ext/makiri/makiri.h +43 -2
  36. data/ext/makiri/xml/mkr_xml.h +126 -0
  37. data/ext/makiri/xml/mkr_xml_chars.c +225 -0
  38. data/ext/makiri/xml/mkr_xml_mutate.c +875 -0
  39. data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
  40. data/ext/makiri/xml/mkr_xml_node.c +267 -0
  41. data/ext/makiri/xml/mkr_xml_node.h +119 -0
  42. data/ext/makiri/xml/mkr_xml_tree.c +1479 -0
  43. data/ext/makiri/xpath/mkr_xpath.c +59 -32
  44. data/ext/makiri/xpath/mkr_xpath.h +96 -4
  45. data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
  46. data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
  47. data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +202 -175
  48. data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +110 -86
  49. data/ext/makiri/xpath/mkr_xpath_internal.h +91 -200
  50. data/ext/makiri/xpath/mkr_xpath_lex.c +2 -2
  51. data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
  52. data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +142 -0
  53. data/ext/makiri/xpath/mkr_xpath_parse.c +5 -5
  54. data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
  55. data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
  56. data/ext/makiri/xpath/mkr_xpath_shared.c +593 -0
  57. data/ext/makiri/xpath/{mkr_xpath_value.c → mkr_xpath_value_body.h} +145 -656
  58. data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
  59. data/lib/makiri/{attribute.rb → attr.rb} +7 -3
  60. data/lib/makiri/cdata_section.rb +21 -0
  61. data/lib/makiri/comment.rb +12 -0
  62. data/lib/makiri/compat_aliases.rb +30 -0
  63. data/lib/makiri/document.rb +4 -76
  64. data/lib/makiri/document_fragment.rb +14 -9
  65. data/lib/makiri/element.rb +5 -3
  66. data/lib/makiri/html/document.rb +106 -0
  67. data/lib/makiri/html/node_methods.rb +19 -0
  68. data/lib/makiri/html.rb +12 -0
  69. data/lib/makiri/node.rb +58 -15
  70. data/lib/makiri/node_set.rb +8 -0
  71. data/lib/makiri/processing_instruction.rb +12 -0
  72. data/lib/makiri/text.rb +2 -0
  73. data/lib/makiri/version.rb +1 -1
  74. data/lib/makiri/xml/document.rb +24 -0
  75. data/lib/makiri/xml/node_methods.rb +37 -0
  76. data/lib/makiri/xml.rb +10 -0
  77. data/lib/makiri/xpath_context.rb +1 -1
  78. data/lib/makiri.rb +23 -5
  79. data/script/build_native_gem.rb +2 -2
  80. data/script/check_c_safety.rb +32 -0
  81. data/script/check_c_safety_allowlist.yml +83 -0
  82. metadata +35 -9
  83. data/ext/makiri/glue/ruby_css.c +0 -185
  84. data/ext/makiri/glue/ruby_serialize.c +0 -92
  85. data/lib/makiri/cdata.rb +0 -6
@@ -0,0 +1,593 @@
1
+ /* mkr_xpath_shared.c - the representation-independent engine primitives.
2
+ *
3
+ * Compiled exactly ONCE (one normal .c, not a monomorphized body): none of
4
+ * these functions dereferences a DOM node. They move node *pointers* (node-set
5
+ * build/clone/free), own/compare engine strings, manage the per-eval
6
+ * string-value cache and document-order-index lifecycles, and walk/destroy the
7
+ * AST. A pointer is a pointer whichever representation it points at, so the
8
+ * machine code is identical for HTML and XML -- one shared copy, not two.
9
+ *
10
+ * Contrast the engine bodies (mkr_xpath_{value,funcs,eval}_body.h): those are
11
+ * .h files precisely because they ARE compiled twice (mkr_xpath_engine_html.c /
12
+ * _xml.c include them with MKR_NODE_* bound to each representation). This file
13
+ * is compiled once, so its code lives directly in the .c -- there is nothing to
14
+ * include twice. mkr_xpath_internal.h is included WITHOUT a prelude, so
15
+ * MKR_DOM_NODE stays the neutral `void` default; the node pointers below are
16
+ * never dereferenced, so void* is exact.
17
+ *
18
+ * The driver (mkr_xpath.c), the parser/lexer, AND both engine instances call
19
+ * these by their bare names. Three are extern rather than file-static
20
+ * (mkr_pointer_hash, mkr_str_cache_index_put, mkr_str_cache_reindex): the
21
+ * string-value cache splits its pure index bookkeeping (here) from its
22
+ * node-dereferencing insert (mkr_get_cached_node_text, in the per-instance
23
+ * value body), so both sides share the one index implementation.
24
+ */
25
+ #include "mkr_xpath_internal.h"
26
+ #include "../core/mkr_core.h"
27
+
28
+ #include <stdint.h>
29
+ #include <stdlib.h>
30
+ #include <string.h>
31
+
32
+ /* ---------- pointer hash (shared by str-cache + doc-order index) ---------- */
33
+
34
+ uint32_t
35
+ mkr_pointer_hash(const void *p)
36
+ {
37
+ uintptr_t x = (uintptr_t)p;
38
+ /* SplitMix-style mixing - cheap and good enough for pointer keys. */
39
+ x = (x ^ (x >> 16)) * 0x9E3779B9u;
40
+ x = (x ^ (x >> 13)) * 0x85EBCA6Bu;
41
+ return (uint32_t)(x ^ (x >> 16));
42
+ }
43
+
44
+ /* ---------- node-set ---------- */
45
+
46
+ void
47
+ mkr_nodeset_init(mkr_nodeset_t *ns)
48
+ {
49
+ ns->items = NULL;
50
+ ns->count = 0;
51
+ ns->capacity = 0;
52
+ }
53
+
54
+ int
55
+ mkr_nodeset_push(mkr_nodeset_t *ns, MKR_DOM_NODE *node,
56
+ mkr_xpath_limits_t *limits, mkr_xpath_error_t *err)
57
+ {
58
+ if (node == NULL) return 0;
59
+ if (limits != NULL && mkr_limit_check_nodeset_size(limits, ns->count + 1, err) != 0) {
60
+ return -1;
61
+ }
62
+ if (mkr_grow_reserve((void **)&ns->items, &ns->capacity, ns->count + 1,
63
+ sizeof(*ns->items)) != MKR_OK) {
64
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, "out of memory growing node-set");
65
+ return -1;
66
+ }
67
+ ns->items[ns->count++] = node;
68
+ return 0;
69
+ }
70
+
71
+ void
72
+ mkr_nodeset_clear(mkr_nodeset_t *ns)
73
+ {
74
+ if (ns == NULL) return;
75
+ free(ns->items);
76
+ ns->items = NULL;
77
+ ns->count = 0;
78
+ ns->capacity = 0;
79
+ }
80
+
81
+ /* ---------- owned / borrowed text ---------- */
82
+
83
+ void
84
+ mkr_owned_text_init(mkr_owned_text_t *t)
85
+ {
86
+ if (t == NULL) return;
87
+ t->ptr = NULL;
88
+ t->len = 0;
89
+ }
90
+
91
+ void
92
+ mkr_owned_text_clear(mkr_owned_text_t *t)
93
+ {
94
+ if (t == NULL) return;
95
+ free(t->ptr);
96
+ t->ptr = NULL;
97
+ t->len = 0;
98
+ }
99
+
100
+ int
101
+ mkr_borrowed_text_eq(mkr_borrowed_text_t a, mkr_borrowed_text_t b)
102
+ {
103
+ if (a.ptr == NULL || b.ptr == NULL) return a.ptr == b.ptr;
104
+ return a.len == b.len && memcmp(a.ptr, b.ptr, a.len) == 0;
105
+ }
106
+
107
+ /* Copy an already-valid borrowed text into owned storage. Taking
108
+ * mkr_borrowed_text_t (not raw char*+len) keeps the type contract: an
109
+ * mkr_owned_text_t can only be minted from text the caller has asserted valid
110
+ * (via mkr_borrowed_text / mkr_borrowed_text_from_verified /
111
+ * mkr_borrowed_text_from_owned), so every raw-bytes -> text entry point is
112
+ * greppable. */
113
+ int
114
+ mkr_owned_text_from_borrowed_copy(mkr_owned_text_t *out, mkr_borrowed_text_t t,
115
+ mkr_xpath_error_t *err, const char *what)
116
+ {
117
+ if (out == NULL) {
118
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_owned_text_from_borrowed_copy: bad args");
119
+ return -1;
120
+ }
121
+ mkr_owned_text_init(out);
122
+ const char *s = t.ptr ? t.ptr : "";
123
+ size_t len = t.ptr ? t.len : 0;
124
+ char *p = mkr_strndup(s, len);
125
+ if (p == NULL) {
126
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, what ? what : "out of memory copying text");
127
+ return -1;
128
+ }
129
+ out->ptr = p;
130
+ out->len = len;
131
+ return 0;
132
+ }
133
+
134
+ /* ---------- value ---------- */
135
+
136
+ void
137
+ mkr_val_clear(mkr_val_t *v)
138
+ {
139
+ if (v == NULL) return;
140
+ switch (v->type) {
141
+ case MKR_XPATH_TYPE_NODESET:
142
+ mkr_nodeset_clear(&v->u.nodeset);
143
+ break;
144
+ case MKR_XPATH_TYPE_STRING:
145
+ mkr_owned_text_clear(&v->u.string);
146
+ break;
147
+ default:
148
+ break;
149
+ }
150
+ memset(v, 0, sizeof(*v));
151
+ }
152
+
153
+ void
154
+ mkr_val_set_owned_text(mkr_val_t *v, mkr_owned_text_t text)
155
+ {
156
+ if (v == NULL) return;
157
+ v->type = MKR_XPATH_TYPE_STRING;
158
+ v->u.string = text;
159
+ }
160
+
161
+ /* Set +v+ to a STRING by copying a borrowed view: the engine allocates and owns
162
+ * the copy. This is how callers outside the engine (the glue handler bridge)
163
+ * hand a string into a value - they pass what they have, a borrowed slice, and
164
+ * never construct an mkr_owned_text_t themselves. Keeping the copy-and-own step
165
+ * here keeps allocation and freeing of owned strings in one layer. Returns 0 on
166
+ * success, -1 on OOM (err populated; +v+ left untouched). */
167
+ int
168
+ mkr_val_set_borrowed_text_copy(mkr_val_t *v, mkr_borrowed_text_t text,
169
+ mkr_xpath_error_t *err, const char *what)
170
+ {
171
+ if (v == NULL) {
172
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_val_set_borrowed_text_copy: bad args");
173
+ return -1;
174
+ }
175
+ mkr_owned_text_t owned;
176
+ if (mkr_owned_text_from_borrowed_copy(&owned, text, err, what) != 0) {
177
+ return -1;
178
+ }
179
+ mkr_val_set_owned_text(v, owned);
180
+ return 0;
181
+ }
182
+
183
+ /* ---------- per-evaluate document-order index (lifecycle) ---------- */
184
+
185
+ void
186
+ mkr_doc_order_index_init(mkr_doc_order_index_t *idx)
187
+ {
188
+ idx->buckets = NULL;
189
+ idx->cap = 0;
190
+ idx->count = 0;
191
+ idx->built = 0;
192
+ }
193
+
194
+ void
195
+ mkr_doc_order_index_clear(mkr_doc_order_index_t *idx)
196
+ {
197
+ if (idx == NULL) return;
198
+ free(idx->buckets);
199
+ idx->buckets = NULL;
200
+ idx->cap = 0;
201
+ idx->count = 0;
202
+ idx->built = 0;
203
+ }
204
+
205
+ /* ---------- per-evaluation string-value cache (lifecycle + index) ---------- */
206
+
207
+ void
208
+ mkr_str_cache_init(mkr_str_cache_t *c)
209
+ {
210
+ c->entries = NULL;
211
+ c->count = 0;
212
+ c->cap = 0;
213
+ c->buckets = NULL;
214
+ c->bucket_cap = 0;
215
+ }
216
+
217
+ /* Insert entry index `idx` (keyed by entries[idx].node) into the index. The
218
+ * index must have room (callers grow/rehash first). Extern: shared by the pure
219
+ * reindex below and the node-dereferencing insert in the per-instance body. */
220
+ void
221
+ mkr_str_cache_index_put(mkr_str_cache_t *c, size_t idx)
222
+ {
223
+ size_t mask = c->bucket_cap - 1;
224
+ size_t j = mkr_pointer_hash(c->entries[idx].node) & mask;
225
+ while (c->buckets[j] != 0) {
226
+ j = (j + 1) & mask;
227
+ }
228
+ c->buckets[j] = idx + 1;
229
+ }
230
+
231
+ /* Rebuild the index from entries[0, count). Returns -1 on OOM. Extern: see above. */
232
+ int
233
+ mkr_str_cache_reindex(mkr_str_cache_t *c, size_t bucket_cap)
234
+ {
235
+ size_t *buckets = mkr_callocarray(bucket_cap, sizeof(*buckets));
236
+ if (buckets == NULL) return -1;
237
+ free(c->buckets);
238
+ c->buckets = buckets;
239
+ c->bucket_cap = bucket_cap;
240
+ for (size_t i = 0; i < c->count; ++i) {
241
+ mkr_str_cache_index_put(c, i);
242
+ }
243
+ return 0;
244
+ }
245
+
246
+ void
247
+ mkr_str_cache_truncate(mkr_str_cache_t *c, size_t target_count)
248
+ {
249
+ if (c == NULL || target_count >= c->count) return;
250
+ for (size_t i = target_count; i < c->count; ++i) {
251
+ free(c->entries[i].str);
252
+ }
253
+ c->count = target_count;
254
+ /* Drop the removed nodes from the index. A full truncate just clears it;
255
+ * a partial one (nested-eval snapshot restore) rebuilds from what remains. */
256
+ if (c->buckets != NULL) {
257
+ if (target_count == 0) {
258
+ size_t buckets_bytes;
259
+ if (!mkr_size_mul(c->bucket_cap, sizeof(*c->buckets), &buckets_bytes)) {
260
+ free(c->buckets);
261
+ c->buckets = NULL;
262
+ c->bucket_cap = 0;
263
+ return;
264
+ }
265
+ memset(c->buckets, 0, buckets_bytes);
266
+ } else {
267
+ mkr_str_cache_reindex(c, c->bucket_cap);
268
+ }
269
+ }
270
+ }
271
+
272
+ void
273
+ mkr_str_cache_clear(mkr_str_cache_t *c)
274
+ {
275
+ if (c == NULL) return;
276
+ for (size_t i = 0; i < c->count; ++i) {
277
+ free(c->entries[i].str);
278
+ }
279
+ free(c->entries);
280
+ free(c->buckets);
281
+ c->entries = NULL;
282
+ c->count = 0;
283
+ c->cap = 0;
284
+ c->buckets = NULL;
285
+ c->bucket_cap = 0;
286
+ }
287
+
288
+ /* ---------- AST destructors ---------- */
289
+
290
+ void
291
+ mkr_step_clear(mkr_step_t *s)
292
+ {
293
+ if (s == NULL) return;
294
+ mkr_owned_text_clear(&s->test.prefix);
295
+ mkr_owned_text_clear(&s->test.local);
296
+ mkr_owned_text_clear(&s->test.pi_target);
297
+ for (size_t i = 0; i < s->npredicates; ++i) {
298
+ mkr_node_free(s->predicates[i]);
299
+ }
300
+ free(s->predicates);
301
+ memset(s, 0, sizeof(*s));
302
+ }
303
+
304
+ /* ---------- AST hoisting helpers ---------- */
305
+
306
+ /* Pure XPath 1.0 built-ins safe to hoist when all args are CI. Listed
307
+ * explicitly to keep the set conservative. Functions that read the
308
+ * context node (last/position, 0-arg string/normalize-space/local-
309
+ * name/etc., lang) or that may depend on dynamic state (id, handler-
310
+ * routed) are intentionally absent. */
311
+ static int
312
+ is_pure_builtin_name(const char *name, size_t nargs)
313
+ {
314
+ if (name == NULL) return 0;
315
+ /* 0-arg only - these read no input. */
316
+ if (nargs == 0) {
317
+ return strcmp(name, "true") == 0 || strcmp(name, "false") == 0;
318
+ }
319
+ /* n-arg pure functions - all args must themselves be CI (checked
320
+ * by the caller). */
321
+ static const char *pure_names[] = {
322
+ "count", "string-length", "number", "boolean", "not",
323
+ "floor", "ceiling", "round", "sum",
324
+ "concat", "starts-with", "contains",
325
+ "substring-before", "substring-after", "substring",
326
+ "translate",
327
+ NULL,
328
+ };
329
+ for (size_t i = 0; pure_names[i]; ++i) {
330
+ if (strcmp(pure_names[i], name) == 0) return 1;
331
+ }
332
+ return 0;
333
+ }
334
+
335
+ static void
336
+ mark_step_predicates(mkr_step_t *s)
337
+ {
338
+ for (size_t i = 0; i < s->npredicates; ++i) {
339
+ mkr_mark_context_independent(s->predicates[i]);
340
+ }
341
+ }
342
+
343
+ void
344
+ mkr_mark_context_independent(mkr_node_t *n)
345
+ {
346
+ if (n == NULL) return;
347
+ int ci = 0;
348
+ switch (n->kind) {
349
+ case MKR_NK_LITERAL_STR:
350
+ case MKR_NK_LITERAL_NUM:
351
+ ci = 1;
352
+ break;
353
+ case MKR_NK_VARREF:
354
+ /* Conservative: variables not hoisted even though XPath 1.0 says
355
+ * they're fixed per evaluation. */
356
+ ci = 0;
357
+ break;
358
+ case MKR_NK_FNCALL: {
359
+ /* Recurse first so subtrees get their own CI marks even when this
360
+ * call itself is not hoistable. */
361
+ for (size_t i = 0; i < n->u.fncall.nargs; ++i) {
362
+ mkr_mark_context_independent(n->u.fncall.args[i]);
363
+ }
364
+ if (n->u.fncall.prefix.ptr != NULL) {
365
+ ci = 0; /* Handler-routed or namespaced builtins → non-CI. */
366
+ break;
367
+ }
368
+ if (!is_pure_builtin_name(n->u.fncall.name.ptr, n->u.fncall.nargs)) {
369
+ ci = 0;
370
+ break;
371
+ }
372
+ ci = 1;
373
+ for (size_t i = 0; i < n->u.fncall.nargs; ++i) {
374
+ if (!n->u.fncall.args[i]->is_context_independent) { ci = 0; break; }
375
+ }
376
+ break;
377
+ }
378
+ case MKR_NK_UNARY:
379
+ mkr_mark_context_independent(n->u.unary.expr);
380
+ ci = n->u.unary.expr ? n->u.unary.expr->is_context_independent : 0;
381
+ break;
382
+ case MKR_NK_BINOP:
383
+ mkr_mark_context_independent(n->u.binop.lhs);
384
+ mkr_mark_context_independent(n->u.binop.rhs);
385
+ ci = (n->u.binop.lhs && n->u.binop.lhs->is_context_independent)
386
+ && (n->u.binop.rhs && n->u.binop.rhs->is_context_independent);
387
+ break;
388
+ case MKR_NK_PATH:
389
+ /* Absolute path is CI: seed is the document root regardless of
390
+ * outer context. Relative paths use the outer context node and
391
+ * are not hoistable. Predicates inside the path are evaluated
392
+ * against the path's own context, so their position()/last() do
393
+ * not leak - recurse so any pure sub-expressions still get marks. */
394
+ ci = n->u.path.absolute ? 1 : 0;
395
+ for (size_t i = 0; i < n->u.path.nsteps; ++i) {
396
+ mark_step_predicates(&n->u.path.steps[i]);
397
+ }
398
+ break;
399
+ case MKR_NK_FILTER:
400
+ /* Conservative: filter expressions are not hoisted in v1. */
401
+ ci = 0;
402
+ mkr_mark_context_independent(n->u.filter.expr);
403
+ for (size_t i = 0; i < n->u.filter.npreds; ++i) {
404
+ mkr_mark_context_independent(n->u.filter.preds[i]);
405
+ }
406
+ for (size_t i = 0; i < n->u.filter.npath; ++i) {
407
+ mark_step_predicates(&n->u.filter.path_steps[i]);
408
+ }
409
+ break;
410
+ }
411
+ n->is_context_independent = (uint8_t)ci;
412
+ }
413
+
414
+ static void
415
+ clear_memos_step(mkr_step_t *s)
416
+ {
417
+ for (size_t i = 0; i < s->npredicates; ++i) {
418
+ mkr_node_clear_memos(s->predicates[i]);
419
+ }
420
+ }
421
+
422
+ /* ---------- peephole: //X fusion ---------- */
423
+
424
+ /*
425
+ * Collapse pairs of consecutive steps:
426
+ * (axis=descendant-or-self, test=node(), no predicates)
427
+ * (axis=child, test=*, no predicates)
428
+ * into a single
429
+ * (axis=descendant, test=*, no predicates)
430
+ *
431
+ * The fusion is safe per XPath 1.0 only when the child step has no
432
+ * predicates: otherwise '//X[1]' would change meaning ("first X per
433
+ * parent" vs "first X in doc order"). The synthesised // step always
434
+ * has no predicates by construction, so we don't need to check the
435
+ * first step's predicate list - only the child step's.
436
+ */
437
+ static void
438
+ fuse_descendant_or_self_steps(mkr_step_t *steps, size_t *nsteps_ptr)
439
+ {
440
+ if (steps == NULL || *nsteps_ptr < 2) return;
441
+ size_t nsteps = *nsteps_ptr;
442
+ size_t w = 0, r = 0;
443
+ while (r < nsteps) {
444
+ if (r + 1 < nsteps
445
+ && steps[r].axis == MKR_AXIS_DESCENDANT_OR_SELF
446
+ && steps[r].test.kind == MKR_NT_NODE
447
+ && steps[r].test.prefix.ptr == NULL
448
+ && steps[r].npredicates == 0
449
+ && steps[r + 1].axis == MKR_AXIS_CHILD
450
+ && steps[r + 1].npredicates == 0) {
451
+ /* Drop the desc-or-self step and promote the child step. */
452
+ mkr_step_clear(&steps[r]);
453
+ steps[w] = steps[r + 1];
454
+ memset(&steps[r + 1], 0, sizeof(steps[r + 1]));
455
+ steps[w].axis = MKR_AXIS_DESCENDANT;
456
+ w++;
457
+ r += 2;
458
+ } else {
459
+ if (w != r) {
460
+ steps[w] = steps[r];
461
+ memset(&steps[r], 0, sizeof(steps[r]));
462
+ }
463
+ w++;
464
+ r++;
465
+ }
466
+ }
467
+ *nsteps_ptr = w;
468
+ }
469
+
470
+ void
471
+ mkr_apply_peephole(mkr_node_t *n)
472
+ {
473
+ if (n == NULL) return;
474
+ switch (n->kind) {
475
+ case MKR_NK_FNCALL:
476
+ for (size_t i = 0; i < n->u.fncall.nargs; ++i) mkr_apply_peephole(n->u.fncall.args[i]);
477
+ break;
478
+ case MKR_NK_UNARY:
479
+ mkr_apply_peephole(n->u.unary.expr);
480
+ break;
481
+ case MKR_NK_BINOP:
482
+ mkr_apply_peephole(n->u.binop.lhs);
483
+ mkr_apply_peephole(n->u.binop.rhs);
484
+ break;
485
+ case MKR_NK_PATH:
486
+ fuse_descendant_or_self_steps(n->u.path.steps, &n->u.path.nsteps);
487
+ for (size_t i = 0; i < n->u.path.nsteps; ++i) {
488
+ for (size_t j = 0; j < n->u.path.steps[i].npredicates; ++j) {
489
+ mkr_apply_peephole(n->u.path.steps[i].predicates[j]);
490
+ }
491
+ }
492
+ break;
493
+ case MKR_NK_FILTER:
494
+ mkr_apply_peephole(n->u.filter.expr);
495
+ for (size_t i = 0; i < n->u.filter.npreds; ++i) mkr_apply_peephole(n->u.filter.preds[i]);
496
+ fuse_descendant_or_self_steps(n->u.filter.path_steps, &n->u.filter.npath);
497
+ for (size_t i = 0; i < n->u.filter.npath; ++i) {
498
+ for (size_t j = 0; j < n->u.filter.path_steps[i].npredicates; ++j) {
499
+ mkr_apply_peephole(n->u.filter.path_steps[i].predicates[j]);
500
+ }
501
+ }
502
+ break;
503
+ default:
504
+ break;
505
+ }
506
+ }
507
+
508
+ void
509
+ mkr_node_clear_memos(mkr_node_t *n)
510
+ {
511
+ if (n == NULL) return;
512
+ if (n->memoized) {
513
+ mkr_val_clear(&n->memo_value);
514
+ n->memoized = 0;
515
+ }
516
+ switch (n->kind) {
517
+ case MKR_NK_FNCALL:
518
+ for (size_t i = 0; i < n->u.fncall.nargs; ++i) mkr_node_clear_memos(n->u.fncall.args[i]);
519
+ break;
520
+ case MKR_NK_UNARY:
521
+ mkr_node_clear_memos(n->u.unary.expr);
522
+ break;
523
+ case MKR_NK_BINOP:
524
+ mkr_node_clear_memos(n->u.binop.lhs);
525
+ mkr_node_clear_memos(n->u.binop.rhs);
526
+ break;
527
+ case MKR_NK_PATH:
528
+ for (size_t i = 0; i < n->u.path.nsteps; ++i) clear_memos_step(&n->u.path.steps[i]);
529
+ break;
530
+ case MKR_NK_FILTER:
531
+ mkr_node_clear_memos(n->u.filter.expr);
532
+ for (size_t i = 0; i < n->u.filter.npreds; ++i) mkr_node_clear_memos(n->u.filter.preds[i]);
533
+ for (size_t i = 0; i < n->u.filter.npath; ++i) clear_memos_step(&n->u.filter.path_steps[i]);
534
+ break;
535
+ default:
536
+ break;
537
+ }
538
+ }
539
+
540
+ void
541
+ mkr_node_free(mkr_node_t *n)
542
+ {
543
+ if (n == NULL) return;
544
+ /* Free any memoized value first (idempotent). */
545
+ if (n->memoized) {
546
+ mkr_val_clear(&n->memo_value);
547
+ n->memoized = 0;
548
+ }
549
+ switch (n->kind) {
550
+ case MKR_NK_LITERAL_STR:
551
+ mkr_owned_text_clear(&n->u.literal);
552
+ break;
553
+ case MKR_NK_LITERAL_NUM:
554
+ break;
555
+ case MKR_NK_VARREF:
556
+ mkr_owned_text_clear(&n->u.varref.prefix);
557
+ mkr_owned_text_clear(&n->u.varref.name);
558
+ break;
559
+ case MKR_NK_FNCALL:
560
+ mkr_owned_text_clear(&n->u.fncall.prefix);
561
+ mkr_owned_text_clear(&n->u.fncall.name);
562
+ for (size_t i = 0; i < n->u.fncall.nargs; ++i) {
563
+ mkr_node_free(n->u.fncall.args[i]);
564
+ }
565
+ free(n->u.fncall.args);
566
+ break;
567
+ case MKR_NK_UNARY:
568
+ mkr_node_free(n->u.unary.expr);
569
+ break;
570
+ case MKR_NK_BINOP:
571
+ mkr_node_free(n->u.binop.lhs);
572
+ mkr_node_free(n->u.binop.rhs);
573
+ break;
574
+ case MKR_NK_PATH:
575
+ for (size_t i = 0; i < n->u.path.nsteps; ++i) {
576
+ mkr_step_clear(&n->u.path.steps[i]);
577
+ }
578
+ free(n->u.path.steps);
579
+ break;
580
+ case MKR_NK_FILTER:
581
+ mkr_node_free(n->u.filter.expr);
582
+ for (size_t i = 0; i < n->u.filter.npreds; ++i) {
583
+ mkr_node_free(n->u.filter.preds[i]);
584
+ }
585
+ free(n->u.filter.preds);
586
+ for (size_t i = 0; i < n->u.filter.npath; ++i) {
587
+ mkr_step_clear(&n->u.filter.path_steps[i]);
588
+ }
589
+ free(n->u.filter.path_steps);
590
+ break;
591
+ }
592
+ free(n);
593
+ }