makiri 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/conformance.yml +22 -0
  3. data/.github/workflows/libfuzzer.yml +83 -0
  4. data/.github/workflows/release.yml +12 -7
  5. data/.github/workflows/security.yml +88 -3
  6. data/.github/workflows/valgrind.yml +135 -0
  7. data/CHANGELOG.md +152 -15
  8. data/README.md +183 -13
  9. data/Rakefile +294 -7
  10. data/ext/makiri/bridge/bridge.h +28 -0
  11. data/ext/makiri/bridge/ruby_string.c +282 -12
  12. data/ext/makiri/core/mkr_alloc.c +40 -3
  13. data/ext/makiri/core/mkr_alloc.h +28 -5
  14. data/ext/makiri/core/mkr_buf.c +47 -3
  15. data/ext/makiri/core/mkr_buf.h +112 -3
  16. data/ext/makiri/core/mkr_core.c +143 -0
  17. data/ext/makiri/core/mkr_core.h +11 -2
  18. data/ext/makiri/core/mkr_hash.h +1 -1
  19. data/ext/makiri/core/mkr_span.h +186 -0
  20. data/ext/makiri/core/mkr_text.h +8 -8
  21. data/ext/makiri/core/mkr_utf8.c +101 -0
  22. data/ext/makiri/core/mkr_utf8.h +88 -0
  23. data/ext/makiri/extconf.rb +123 -10
  24. data/ext/makiri/fuzz/Makefile +95 -0
  25. data/ext/makiri/fuzz/check_fuzzer.cc +4 -0
  26. data/ext/makiri/fuzz/xml_fuzz.c +24 -0
  27. data/ext/makiri/fuzz/xpath_fuzz.c +109 -0
  28. data/ext/makiri/glue/glue.h +55 -11
  29. data/ext/makiri/glue/ruby_doc.c +129 -59
  30. data/ext/makiri/glue/ruby_html_css.c +292 -0
  31. data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +248 -52
  32. data/ext/makiri/glue/ruby_html_node.c +859 -0
  33. data/ext/makiri/glue/ruby_html_serialize.c +154 -0
  34. data/ext/makiri/glue/ruby_node.c +74 -729
  35. data/ext/makiri/glue/ruby_node_set.c +167 -32
  36. data/ext/makiri/glue/ruby_xml.c +602 -0
  37. data/ext/makiri/glue/ruby_xml_node.c +1373 -0
  38. data/ext/makiri/glue/ruby_xpath.c +63 -30
  39. data/ext/makiri/glue/ruby_xpath.h +19 -0
  40. data/ext/makiri/lexbor_compat/compat.h +42 -9
  41. data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
  42. data/ext/makiri/lexbor_compat/dom_index.c +2 -2
  43. data/ext/makiri/lexbor_compat/post_parse.c +100 -10
  44. data/ext/makiri/lexbor_compat/source_loc.c +15 -13
  45. data/ext/makiri/lexbor_compat/text_index.c +14 -8
  46. data/ext/makiri/lexbor_compat/utf8_input.c +19 -33
  47. data/ext/makiri/makiri.c +184 -6
  48. data/ext/makiri/makiri.h +43 -2
  49. data/ext/makiri/xml/mkr_xml.h +125 -0
  50. data/ext/makiri/xml/mkr_xml_chars.c +195 -0
  51. data/ext/makiri/xml/mkr_xml_index.c +169 -0
  52. data/ext/makiri/xml/mkr_xml_index.h +48 -0
  53. data/ext/makiri/xml/mkr_xml_mutate.c +817 -0
  54. data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
  55. data/ext/makiri/xml/mkr_xml_node.c +399 -0
  56. data/ext/makiri/xml/mkr_xml_node.h +184 -0
  57. data/ext/makiri/xml/mkr_xml_tree.c +1515 -0
  58. data/ext/makiri/xpath/mkr_css.c +1023 -0
  59. data/ext/makiri/xpath/mkr_css.h +65 -0
  60. data/ext/makiri/xpath/mkr_xpath.c +96 -32
  61. data/ext/makiri/xpath/mkr_xpath.h +109 -4
  62. data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
  63. data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
  64. data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +551 -241
  65. data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +318 -276
  66. data/ext/makiri/xpath/mkr_xpath_internal.h +177 -206
  67. data/ext/makiri/xpath/mkr_xpath_lex.c +95 -125
  68. data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
  69. data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +145 -0
  70. data/ext/makiri/xpath/mkr_xpath_number.c +109 -0
  71. data/ext/makiri/xpath/mkr_xpath_parse.c +83 -94
  72. data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
  73. data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
  74. data/ext/makiri/xpath/mkr_xpath_shared.c +609 -0
  75. data/ext/makiri/xpath/mkr_xpath_value_body.h +801 -0
  76. data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
  77. data/lib/makiri/{attribute.rb → attr.rb} +7 -3
  78. data/lib/makiri/cdata_section.rb +19 -0
  79. data/lib/makiri/comment.rb +10 -0
  80. data/lib/makiri/compat_aliases.rb +30 -0
  81. data/lib/makiri/document.rb +9 -73
  82. data/lib/makiri/document_fragment.rb +14 -9
  83. data/lib/makiri/element.rb +4 -4
  84. data/lib/makiri/html/document.rb +106 -0
  85. data/lib/makiri/html/node_methods.rb +19 -0
  86. data/lib/makiri/html.rb +12 -0
  87. data/lib/makiri/node.rb +58 -15
  88. data/lib/makiri/node_set.rb +8 -0
  89. data/lib/makiri/processing_instruction.rb +10 -0
  90. data/lib/makiri/text.rb +1 -1
  91. data/lib/makiri/version.rb +1 -1
  92. data/lib/makiri/xml/builder.rb +263 -0
  93. data/lib/makiri/xml/document.rb +24 -0
  94. data/lib/makiri/xml/node_methods.rb +84 -0
  95. data/lib/makiri/xml.rb +10 -0
  96. data/lib/makiri/xpath_context.rb +1 -1
  97. data/lib/makiri.rb +24 -5
  98. data/script/build_native_gem.rb +2 -2
  99. data/script/check_alloc_failures.rb +266 -0
  100. data/script/check_c_safety.rb +77 -2
  101. data/script/check_c_safety_allowlist.yml +102 -0
  102. data/script/check_leaks.rb +64 -0
  103. data/script/leaks_harness.rb +64 -0
  104. data/vendor/lexbor/CMakeLists.txt +6 -0
  105. data/vendor/lexbor/README.md +12 -0
  106. data/vendor/lexbor/config.cmake +1 -1
  107. data/vendor/lexbor/source/lexbor/core/base.h +1 -1
  108. data/vendor/lexbor/source/lexbor/core/config.cmake +9 -1
  109. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +2 -3
  110. data/vendor/lexbor/source/lexbor/css/selectors/state.c +3 -0
  111. data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +21 -0
  112. data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +5 -0
  113. data/vendor/lexbor/source/lexbor/encoding/decode.c +33 -4
  114. data/vendor/lexbor/source/lexbor/html/base.h +1 -1
  115. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +4 -0
  116. data/vendor/lexbor/source/lexbor/html/serialize.c +545 -41
  117. data/vendor/lexbor/source/lexbor/html/serialize.h +2 -1
  118. data/vendor/lexbor/source/lexbor/html/tokenizer.h +2 -2
  119. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1 -1
  120. data/vendor/lexbor/source/lexbor/html/tree.c +6 -6
  121. data/vendor/lexbor/source/lexbor/selectors/selectors.c +12 -3
  122. data/vendor/lexbor/source/lexbor/url/base.h +1 -1
  123. data/vendor/lexbor/source/lexbor/url/url.c +5 -2
  124. data/vendor/lexbor/source/lexbor/url/url.h +9 -0
  125. data/vendor/lexbor/version +1 -1
  126. metadata +53 -9
  127. data/ext/makiri/glue/ruby_css.c +0 -185
  128. data/ext/makiri/glue/ruby_serialize.c +0 -92
  129. data/ext/makiri/xpath/mkr_xpath_value.c +0 -1286
  130. data/lib/makiri/cdata.rb +0 -6
@@ -0,0 +1,801 @@
1
+ #include "mkr_xpath_internal.h"
2
+ #include "../core/mkr_core.h"
3
+
4
+ #include <lexbor/dom/dom.h>
5
+ #include <math.h>
6
+ #include <stdio.h>
7
+ #include <stdlib.h>
8
+ #include <string.h>
9
+
10
+ /*
11
+ * Per-instance value model: the node-DEREFERENCING half of the runtime values
12
+ * - node string-value construction (XPath 1.0 §5), the value coercions that
13
+ * read a node-set's first node, document-order comparison/sort, and the
14
+ * string-value cache's node-keyed insert. Compiled once per representation
15
+ * (HTML / XML) with MKR_NODE_* bound by the including prelude.
16
+ *
17
+ * Every function here is file-static: it is reachable only from the other
18
+ * per-instance bodies (funcs / eval) in the same merged engine translation
19
+ * unit. The representation-INDEPENDENT primitives it leans on (node-set build,
20
+ * owned/borrowed text, str-cache + doc-order lifecycle, AST destructors) are the
21
+ * shared, bare-named functions in mkr_xpath_shared_body.h, declared in
22
+ * mkr_xpath_internal.h.
23
+ */
24
+
25
+ /* Forward declarations for the two coercions used before their definition
26
+ * (mkr_val_to_number_or_fail reads both). They are static, so there is no
27
+ * declaration in the shared internal header to cover the forward reference. */
28
+ static double mkr_borrowed_text_to_number(mkr_borrowed_text_t t);
29
+ static double mkr_val_to_number_unchecked(const mkr_val_t *v);
30
+
31
+ /* ---------- owned-text from a steal-able buffer ---------- */
32
+
33
+ static int
34
+ mkr_owned_text_from_buf_steal(mkr_owned_text_t *out, mkr_buf_t *buf,
35
+ mkr_xpath_error_t *err, const char *what)
36
+ {
37
+ if (out == NULL || buf == NULL) {
38
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_owned_text_from_buf_steal: bad args");
39
+ return -1;
40
+ }
41
+ mkr_owned_text_init(out);
42
+ size_t len = 0;
43
+ char *p = mkr_buf_steal(buf, &len);
44
+ if (p == NULL) {
45
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, what ? what : "out of memory stealing text buffer");
46
+ return -1;
47
+ }
48
+ out->ptr = p;
49
+ out->len = len;
50
+ return 0;
51
+ }
52
+
53
+ /* ---------- value clone ---------- */
54
+
55
+ static int
56
+ mkr_val_clone(const mkr_val_t *src, mkr_val_t *dst, mkr_xpath_error_t *err)
57
+ {
58
+ if (src == NULL || dst == NULL) {
59
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_val_clone: bad args");
60
+ return -1;
61
+ }
62
+ memset(dst, 0, sizeof(*dst));
63
+ dst->type = src->type;
64
+ switch (src->type) {
65
+ case MKR_XPATH_TYPE_STRING: {
66
+ mkr_owned_text_t text;
67
+ if (mkr_owned_text_from_borrowed_copy(&text, mkr_borrowed_text_from_owned(src->u.string),
68
+ err, "out of memory cloning string value") != 0) return -1;
69
+ mkr_val_set_owned_text(dst, text);
70
+ return 0;
71
+ }
72
+ case MKR_XPATH_TYPE_NUMBER:
73
+ dst->u.number = src->u.number;
74
+ return 0;
75
+ case MKR_XPATH_TYPE_BOOLEAN:
76
+ dst->u.boolean = src->u.boolean;
77
+ return 0;
78
+ case MKR_XPATH_TYPE_NODESET: {
79
+ size_t n = src->u.nodeset.count;
80
+ mkr_nodeset_init(&dst->u.nodeset);
81
+ if (n == 0) return 0;
82
+ void **items;
83
+ size_t items_bytes;
84
+ if (!mkr_size_mul(n, sizeof(*items), &items_bytes)) {
85
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, "out of memory cloning node-set");
86
+ return -1;
87
+ }
88
+ items = mkr_reallocarray(NULL, n, sizeof(*items));
89
+ if (items == NULL) {
90
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, "out of memory cloning node-set");
91
+ return -1;
92
+ }
93
+ memcpy(items, src->u.nodeset.items, items_bytes);
94
+ dst->u.nodeset.items = items;
95
+ dst->u.nodeset.count = n;
96
+ dst->u.nodeset.capacity = n;
97
+ return 0;
98
+ }
99
+ }
100
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_val_clone: unknown value type");
101
+ return -1;
102
+ }
103
+
104
+ /* ---------- node string-value (XPath 1.0 §5) ----------
105
+ *
106
+ * Built into an mkr_buf_t whose `max` is the per-evaluate byte cap: append fails
107
+ * closed with MKR_ERR_LIMIT past the cap and MKR_ERR_OOM on allocation failure,
108
+ * so there is never a partial/truncated result. Lexbor-allocated text is freed
109
+ * after each append (otherwise we'd leak document-arena memory on every XPath
110
+ * that touches text content). */
111
+
112
+ /* Append `node`'s own text content. */
113
+ static mkr_status_t
114
+ append_text_content(MKR_DOM_NODE *node, mkr_buf_t *buf)
115
+ {
116
+ mkr_status_t st;
117
+ MKR_NODE_APPEND_OWN_TEXT(node, buf, st);
118
+ return st;
119
+ }
120
+
121
+ /* Append the string-value of every character-data descendant of `node`, in
122
+ * document order. Both TEXT and CDATA-section nodes are character data (XPath
123
+ * 1.0 §3 / §5: a CDATA section is text, not a distinct node type), so both
124
+ * contribute - matching the text index that backs Node#text. Iterative
125
+ * (parent-pointer) pre-order walk rather than C recursion, so an adversarially
126
+ * deep tree cannot overflow the stack (fail-closed / no DoS); O(1) extra space.
127
+ * Descends only into elements. */
128
+ static mkr_status_t
129
+ append_text_descendants(MKR_DOM_NODE *node, mkr_buf_t *buf)
130
+ {
131
+ MKR_DOM_NODE *cur = MKR_NODE_FIRST_CHILD(node);
132
+ while (cur != NULL) {
133
+ if (MKR_NODE_TYPE(cur) == MKR_NTYPE_TEXT
134
+ || MKR_NODE_TYPE(cur) == MKR_NTYPE_CDATA_SECTION) {
135
+ mkr_status_t st = append_text_content(cur, buf);
136
+ if (st != MKR_OK) return st; /* LIMIT or OOM - caller fails closed */
137
+ }
138
+ if (MKR_NODE_TYPE(cur) == MKR_NTYPE_ELEMENT && MKR_NODE_FIRST_CHILD(cur) != NULL) {
139
+ cur = MKR_NODE_FIRST_CHILD(cur);
140
+ continue;
141
+ }
142
+ while (cur != node && MKR_NODE_NEXT(cur) == NULL) {
143
+ cur = MKR_NODE_PARENT(cur);
144
+ }
145
+ if (cur == node) return MKR_OK;
146
+ cur = MKR_NODE_NEXT(cur);
147
+ }
148
+ return MKR_OK;
149
+ }
150
+
151
+ /* Build node's string-value into `buf` (cap carried by buf->max). */
152
+ static mkr_status_t
153
+ build_string_value(const MKR_DOM_NODE *node, mkr_buf_t *buf)
154
+ {
155
+ if (node == NULL) return MKR_OK;
156
+
157
+ switch (MKR_NODE_TYPE(node)) {
158
+ case MKR_NTYPE_ATTRIBUTE: {
159
+ MKR_DOM_ATTR *attr = (MKR_DOM_ATTR *)node;
160
+ size_t vlen = 0;
161
+ const lxb_char_t *v = MKR_ATTR_VALUE(attr, &vlen);
162
+ return mkr_buf_append(buf, v ? (const char *)v : "", vlen);
163
+ }
164
+ case MKR_NTYPE_TEXT:
165
+ case MKR_NTYPE_CDATA_SECTION:
166
+ case MKR_NTYPE_COMMENT:
167
+ case MKR_NTYPE_PI:
168
+ return append_text_content((MKR_DOM_NODE *)node, buf);
169
+ default:
170
+ return append_text_descendants((MKR_DOM_NODE *)node, buf);
171
+ }
172
+ }
173
+
174
+ static void
175
+ mkr_build_node_text_unchecked(const MKR_DOM_NODE *node, mkr_owned_text_t *out)
176
+ {
177
+ /* Best-effort node string-value, used only for NUMBER coercion (its sole
178
+ * caller): the text is parsed straight to a double, so mkr_buf's conservative
179
+ * default ceiling (max == 0) is ample - a node whose text exceeds it was never a
180
+ * valid number, and the build then falls back to an owned "" (-> NaN), which is
181
+ * the correct coercion result anyway. On any failure return "" rather than NULL,
182
+ * since callers require a non-NULL text. */
183
+ mkr_owned_text_init(out);
184
+ mkr_buf_t buf;
185
+ mkr_buf_init(&buf, 0);
186
+ if (build_string_value(node, &buf) != MKR_OK) {
187
+ mkr_buf_free(&buf);
188
+ (void)mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit(""), NULL, NULL);
189
+ return;
190
+ }
191
+ if (mkr_owned_text_from_buf_steal(out, &buf, NULL, NULL) != 0) {
192
+ (void)mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit(""), NULL, NULL);
193
+ }
194
+ }
195
+
196
+ static int
197
+ mkr_node_to_owned_text_or_fail(const MKR_DOM_NODE *node,
198
+ mkr_xpath_limits_t *limits,
199
+ mkr_xpath_error_t *err,
200
+ mkr_owned_text_t *out)
201
+ {
202
+ if (out == NULL) {
203
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_node_to_owned_text_or_fail: bad args");
204
+ return -1;
205
+ }
206
+ mkr_owned_text_init(out);
207
+ mkr_buf_t buf;
208
+ mkr_buf_init(&buf, (limits != NULL) ? limits->max_string_bytes : 0);
209
+ mkr_status_t st = build_string_value(node, &buf);
210
+ if (st == MKR_ERR_LIMIT) {
211
+ mkr_buf_free(&buf);
212
+ mkr_err_setf(err, MKR_XPATH_ERR_LIMIT,
213
+ "string size limit exceeded (%zu bytes) while building node string-value",
214
+ limits->max_string_bytes);
215
+ return -1;
216
+ }
217
+ if (st != MKR_OK) {
218
+ mkr_buf_free(&buf);
219
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, "out of memory building node string-value");
220
+ return -1;
221
+ }
222
+ return mkr_owned_text_from_buf_steal(out, &buf, err, "out of memory building node string-value");
223
+ }
224
+
225
+ static int
226
+ mkr_val_to_owned_text_or_fail(const mkr_val_t *v,
227
+ mkr_xpath_limits_t *limits,
228
+ mkr_xpath_error_t *err,
229
+ mkr_owned_text_t *out)
230
+ {
231
+ if (out == NULL) {
232
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_val_to_owned_text_or_fail: bad args");
233
+ return -1;
234
+ }
235
+ mkr_owned_text_init(out);
236
+ if (v == NULL) {
237
+ return mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit(""), err, "out of memory converting value to string");
238
+ }
239
+ switch (v->type) {
240
+ case MKR_XPATH_TYPE_STRING: {
241
+ mkr_borrowed_text_t text = mkr_borrowed_text_from_owned(v->u.string);
242
+ if (text.ptr == NULL) text.len = 0;
243
+ if (limits != NULL && mkr_limit_check_string_bytes(limits, text.len, err) != 0) return -1;
244
+ return mkr_owned_text_from_borrowed_copy(out, text,
245
+ err, "out of memory copying string value");
246
+ }
247
+ case MKR_XPATH_TYPE_BOOLEAN:
248
+ return v->u.boolean
249
+ ? mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit("true"), err, "out of memory converting boolean to string")
250
+ : mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit("false"), err, "out of memory converting boolean to string");
251
+ case MKR_XPATH_TYPE_NUMBER: {
252
+ double d = v->u.number;
253
+ if (isnan(d)) {
254
+ return mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit("NaN"), err, "out of memory converting number to string");
255
+ }
256
+ if (isinf(d)) {
257
+ return d < 0
258
+ ? mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit("-Infinity"), err, "out of memory converting number to string")
259
+ : mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit("Infinity"), err, "out of memory converting number to string");
260
+ }
261
+ if (d == 0.0) {
262
+ return mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit("0"), err, "out of memory converting number to string");
263
+ }
264
+ char buf[64];
265
+ int n;
266
+ if (d == floor(d) && fabs(d) < 1e15) {
267
+ n = snprintf(buf, sizeof(buf), "%lld", (long long)d);
268
+ } else {
269
+ n = snprintf(buf, sizeof(buf), "%.15g", d);
270
+ }
271
+ if (n < 0 || (size_t)n >= sizeof(buf)) {
272
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "number string conversion overflow");
273
+ return -1;
274
+ }
275
+ char *p = mkr_strndup(buf, (size_t)n);
276
+ if (p == NULL) { mkr_err_set(err, MKR_XPATH_ERR_OOM, "out of memory converting number to string"); return -1; }
277
+ *out = mkr_owned_text(p, (size_t)n);
278
+ return 0;
279
+ }
280
+ case MKR_XPATH_TYPE_NODESET:
281
+ if (v->u.nodeset.count == 0) {
282
+ return mkr_owned_text_from_borrowed_copy(out, mkr_borrowed_text_lit(""), err, "out of memory");
283
+ }
284
+ /* XPath 1.0 §4.2: string(node-set) = string-value of first node in doc order. */
285
+ return mkr_node_to_owned_text_or_fail(v->u.nodeset.items[0], limits, err, out);
286
+ }
287
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "unknown value type");
288
+ return -1;
289
+ }
290
+
291
+ static int
292
+ mkr_val_to_number_or_fail(const mkr_val_t *v,
293
+ mkr_xpath_limits_t *limits,
294
+ mkr_xpath_error_t *err,
295
+ double *out)
296
+ {
297
+ if (v == NULL || out == NULL) {
298
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_val_to_number_or_fail: bad args");
299
+ return -1;
300
+ }
301
+ if (v->type == MKR_XPATH_TYPE_NODESET) {
302
+ if (v->u.nodeset.count == 0) {
303
+ *out = (double)NAN;
304
+ return 0;
305
+ }
306
+ mkr_owned_text_t text;
307
+ if (mkr_node_to_owned_text_or_fail(v->u.nodeset.items[0], limits, err, &text) != 0) return -1;
308
+ *out = mkr_borrowed_text_to_number(mkr_borrowed_text_from_owned(text));
309
+ mkr_owned_text_clear(&text);
310
+ return 0;
311
+ }
312
+ *out = mkr_val_to_number_unchecked(v);
313
+ return 0;
314
+ }
315
+
316
+ /* ---------- coercions ---------- */
317
+
318
+ /* string -> number coercion (XPath 1.0 §4.4): optional leading whitespace, an
319
+ * optional single '-' (NO whitespace between it and the digits, and NO '+'),
320
+ * then a Number, then optional trailing whitespace - anything else is NaN. The
321
+ * Number scan/convert uses the same grammar-exact, locale-independent helpers as
322
+ * the lexer, so "0x10" / "1e3" / "INF" all coerce to NaN (the extent stops
323
+ * before x/e and the trailing garbage trips the end check). All reads go through
324
+ * the bounded span. */
325
+ static double
326
+ mkr_borrowed_text_to_number(mkr_borrowed_text_t t)
327
+ {
328
+ if (t.ptr == NULL) return (double)NAN;
329
+ mkr_span_t s = mkr_span(t.ptr, t.len);
330
+
331
+ mkr_span_skip_xpath_ws(&s);
332
+
333
+ int neg = 0;
334
+ if (mkr_span_peek(&s) == '-') { neg = 1; mkr_span_skip(&s, 1); }
335
+
336
+ const char *mark = mkr_span_mark(&s);
337
+ size_t extent = mkr_xpath_number_extent(mark, mkr_span_left(&s));
338
+ if (extent == 0) return (double)NAN;
339
+ double d = mkr_xpath_number_from_extent(mark, extent);
340
+ mkr_span_skip(&s, extent);
341
+
342
+ mkr_span_skip_xpath_ws(&s);
343
+ if (mkr_span_peek(&s) != -1) return (double)NAN; /* trailing garbage */
344
+
345
+ return neg ? -d : d;
346
+ }
347
+
348
+ static double
349
+ mkr_val_to_number_unchecked(const mkr_val_t *v)
350
+ {
351
+ switch (v->type) {
352
+ case MKR_XPATH_TYPE_NUMBER:
353
+ return v->u.number;
354
+ case MKR_XPATH_TYPE_BOOLEAN:
355
+ return v->u.boolean ? 1.0 : 0.0;
356
+ case MKR_XPATH_TYPE_STRING:
357
+ return mkr_borrowed_text_to_number(mkr_borrowed_text_from_owned(v->u.string));
358
+ case MKR_XPATH_TYPE_NODESET: {
359
+ if (v->u.nodeset.count == 0) return (double)NAN;
360
+ /* string-value of first node in document order */
361
+ mkr_owned_text_t text;
362
+ mkr_build_node_text_unchecked(v->u.nodeset.items[0], &text);
363
+ double d = mkr_borrowed_text_to_number(mkr_borrowed_text_from_owned(text));
364
+ mkr_owned_text_clear(&text);
365
+ return d;
366
+ }
367
+ }
368
+ return (double)NAN;
369
+ }
370
+
371
+ static int
372
+ mkr_val_to_boolean(const mkr_val_t *v)
373
+ {
374
+ switch (v->type) {
375
+ case MKR_XPATH_TYPE_BOOLEAN:
376
+ return v->u.boolean;
377
+ case MKR_XPATH_TYPE_NUMBER:
378
+ return !(v->u.number == 0.0 || isnan(v->u.number));
379
+ case MKR_XPATH_TYPE_STRING:
380
+ return v->u.string.ptr != NULL && v->u.string.ptr[0] != '\0';
381
+ case MKR_XPATH_TYPE_NODESET:
382
+ return v->u.nodeset.count > 0;
383
+ }
384
+ return 0;
385
+ }
386
+
387
+ /* ---------- document order ---------- */
388
+
389
+ /*
390
+ * Treat an attribute node as positioned "with" its owner element for
391
+ * cross-subtree comparisons; only when both belong to the same element
392
+ * does the attribute-vs-attribute or attribute-vs-descendant rule kick in.
393
+ */
394
+ static const MKR_DOM_NODE *
395
+ anchor_for_cmp(const MKR_DOM_NODE *n)
396
+ {
397
+ if (MKR_NODE_TYPE(n) == MKR_NTYPE_ATTRIBUTE) {
398
+ return MKR_NODE_PARENT(n) ? MKR_NODE_PARENT(n) : n;
399
+ }
400
+ return n;
401
+ }
402
+
403
+ static int
404
+ depth_of(const MKR_DOM_NODE *n)
405
+ {
406
+ int d = 0;
407
+ while (MKR_NODE_PARENT(n)) { d++; n = MKR_NODE_PARENT(n); }
408
+ return d;
409
+ }
410
+
411
+ static int
412
+ doc_order_cmp(const MKR_DOM_NODE *a, const MKR_DOM_NODE *b)
413
+ {
414
+ if (a == b) return 0;
415
+ const MKR_DOM_NODE *aa = anchor_for_cmp(a);
416
+ const MKR_DOM_NODE *bb = anchor_for_cmp(b);
417
+
418
+ /* If the anchors are the same element, decide by node type. A non-attribute
419
+ * node that anchors to the same element E can ONLY be E itself: any other
420
+ * node (a child/descendant) anchors to itself, not to E, so it would not
421
+ * reach this branch (the attribute-vs-descendant case is handled below by
422
+ * the depth-normalisation walk). Per XPath 1.0 §5.1 document order is
423
+ * "element, then its attribute nodes, then its children", so an attribute
424
+ * comes AFTER its own owner element. */
425
+ if (aa == bb) {
426
+ int a_attr = (MKR_NODE_TYPE(a) == MKR_NTYPE_ATTRIBUTE);
427
+ int b_attr = (MKR_NODE_TYPE(b) == MKR_NTYPE_ATTRIBUTE);
428
+ if (a_attr && !b_attr) return 1; /* b is the owner element E; a (its attr) follows */
429
+ if (b_attr && !a_attr) return -1; /* a is the owner element E; b (its attr) follows */
430
+ /* Both attributes of the same element: relative order is
431
+ * implementation-defined. Use insertion order via attr linked list. */
432
+ if (a_attr && b_attr) {
433
+ for (const MKR_DOM_ATTR *at = MKR_ELEM_FIRST_ATTR((const MKR_DOM_ELEMENT *)aa);
434
+ at != NULL; at = MKR_ATTR_NEXT(at)) {
435
+ if ((const MKR_DOM_NODE *)at == a) return -1;
436
+ if ((const MKR_DOM_NODE *)at == b) return 1;
437
+ }
438
+ return 0;
439
+ }
440
+ /* aa == bb but neither is an attribute means a == b, handled above. */
441
+ return 0;
442
+ }
443
+
444
+ int da = depth_of(aa), db = depth_of(bb);
445
+ while (da > db) { aa = MKR_NODE_PARENT(aa); da--; }
446
+ while (db > da) { bb = MKR_NODE_PARENT(bb); db--; }
447
+ if (aa == bb) {
448
+ /* One is ancestor of the other; ancestor comes first. */
449
+ return (aa == anchor_for_cmp(a)) ? -1 : 1;
450
+ }
451
+ while (MKR_NODE_PARENT(aa) != MKR_NODE_PARENT(bb)) {
452
+ aa = MKR_NODE_PARENT(aa);
453
+ bb = MKR_NODE_PARENT(bb);
454
+ }
455
+ /* Resolve sibling order. Scan outward from aa and bb in lockstep (via ->next)
456
+ * rather than forward from parent->first_child: the cost is then O(distance
457
+ * between aa and bb), not O(distance from the first child. The latter is
458
+ * quadratic when sorting nodes that sit deep in a wide, flat parent (e.g. a
459
+ * predicate result picking scattered <li> from a 2000-child <ul>), which the
460
+ * doc-order index would only avoid once a single sort reaches its build
461
+ * threshold. */
462
+ if (MKR_NODE_PARENT(aa) == NULL) {
463
+ /* Different documents/roots - undefined; keep stable. */
464
+ return 0;
465
+ }
466
+ const MKR_DOM_NODE *fa = aa, *fb = bb;
467
+ for (;;) {
468
+ fa = fa ? MKR_NODE_NEXT(fa) : NULL;
469
+ fb = fb ? MKR_NODE_NEXT(fb) : NULL;
470
+ if (fa == bb) return -1; /* bb lies after aa -> aa first */
471
+ if (fb == aa) return 1; /* aa lies after bb -> bb first */
472
+ if (fa == NULL && fb == NULL) return 0; /* unreachable for same-parent nodes */
473
+ }
474
+ }
475
+
476
+ /* ---------- per-evaluate document-order index (build/lookup/sort) ---------- */
477
+
478
+ /* Insert (node, ord) into the open-addressing table. Grows when load
479
+ * factor exceeds 3/4. Returns 0 on success, -1 on OOM. */
480
+ static int
481
+ order_index_insert(mkr_doc_order_index_t *idx, const MKR_DOM_NODE *node, size_t ord)
482
+ {
483
+ if (idx->cap == 0 || idx->count * 4 >= idx->cap * 3) {
484
+ size_t new_cap = 256;
485
+ if (idx->cap != 0 && !mkr_size_mul(idx->cap, 2, &new_cap)) {
486
+ return -1; /* overflow */
487
+ }
488
+ void *new_buckets = mkr_callocarray(new_cap, sizeof(*idx->buckets));
489
+ if (new_buckets == NULL) return -1;
490
+ /* Rehash. */
491
+ typeof(idx->buckets) old_buckets = idx->buckets;
492
+ size_t old_cap = idx->cap;
493
+ idx->buckets = new_buckets;
494
+ idx->cap = new_cap;
495
+ idx->count = 0;
496
+ for (size_t i = 0; i < old_cap; ++i) {
497
+ if (old_buckets[i].node != NULL) {
498
+ size_t mask = new_cap - 1;
499
+ size_t j = mkr_ptr_hash(old_buckets[i].node) & mask;
500
+ while (idx->buckets[j].node != NULL) j = (j + 1) & mask;
501
+ idx->buckets[j].node = old_buckets[i].node;
502
+ idx->buckets[j].ord = old_buckets[i].ord;
503
+ idx->count++;
504
+ }
505
+ }
506
+ free(old_buckets);
507
+ }
508
+ size_t mask = idx->cap - 1;
509
+ size_t j = mkr_ptr_hash(node) & mask;
510
+ while (idx->buckets[j].node != NULL) {
511
+ if (idx->buckets[j].node == node) return 0; /* already present */
512
+ j = (j + 1) & mask;
513
+ }
514
+ idx->buckets[j].node = node;
515
+ idx->buckets[j].ord = ord;
516
+ idx->count++;
517
+ return 0;
518
+ }
519
+
520
+ static int
521
+ order_index_lookup(const mkr_doc_order_index_t *idx, const MKR_DOM_NODE *node,
522
+ size_t *out_ord)
523
+ {
524
+ if (idx->cap == 0) return -1;
525
+ size_t mask = idx->cap - 1;
526
+ size_t j = mkr_ptr_hash(node) & mask;
527
+ while (idx->buckets[j].node != NULL) {
528
+ if (idx->buckets[j].node == node) {
529
+ if (out_ord) *out_ord = idx->buckets[j].ord;
530
+ return 0;
531
+ }
532
+ j = (j + 1) & mask;
533
+ }
534
+ return -1;
535
+ }
536
+
537
+ /* DFS pre-order: assign ordinal to the element, then its attributes
538
+ * (in linked-list order, before children), then descendants. This
539
+ * matches doc_order_cmp's attribute placement.
540
+ *
541
+ * Iterative (parent-pointer) walk rather than C recursion, so an adversarially
542
+ * deep tree cannot overflow the stack (fail-closed / no DoS); O(1) extra space.
543
+ * The traversal stays within the subtree rooted at `root` (it never follows
544
+ * root->next). */
545
+ static int
546
+ order_index_walk(mkr_doc_order_index_t *idx, MKR_DOM_NODE *root, size_t *next_ord)
547
+ {
548
+ MKR_DOM_NODE *cur = root;
549
+ while (cur != NULL) {
550
+ /* Visit (pre-order): the node, then its attributes before any child. */
551
+ if (order_index_insert(idx, cur, (*next_ord)++) != 0) return -1;
552
+ if (MKR_NODE_TYPE(cur) == MKR_NTYPE_ELEMENT) {
553
+ MKR_DOM_ELEMENT *el = (MKR_DOM_ELEMENT *)cur;
554
+ for (MKR_DOM_ATTR *a = MKR_ELEM_FIRST_ATTR(el); a != NULL; a = MKR_ATTR_NEXT(a)) {
555
+ if (order_index_insert(idx, (MKR_DOM_NODE *)a, (*next_ord)++) != 0) return -1;
556
+ }
557
+ }
558
+ if (MKR_NODE_FIRST_CHILD(cur) != NULL) {
559
+ cur = MKR_NODE_FIRST_CHILD(cur);
560
+ continue;
561
+ }
562
+ while (cur != root && MKR_NODE_NEXT(cur) == NULL) {
563
+ cur = MKR_NODE_PARENT(cur);
564
+ }
565
+ if (cur == root) break;
566
+ cur = MKR_NODE_NEXT(cur);
567
+ }
568
+ return 0;
569
+ }
570
+
571
+ static int
572
+ order_index_build(mkr_doc_order_index_t *idx, MKR_DOM_NODE *root,
573
+ mkr_xpath_error_t *err)
574
+ {
575
+ if (idx->built) return 0;
576
+ if (root == NULL) return -1;
577
+ size_t next_ord = 0;
578
+ if (order_index_walk(idx, root, &next_ord) != 0) {
579
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, "out of memory building document order index");
580
+ mkr_doc_order_index_clear(idx);
581
+ return -1;
582
+ }
583
+ idx->built = 1;
584
+ return 0;
585
+ }
586
+
587
+ /* Indexed comparator. Falls back to doc_order_cmp on any miss
588
+ * (e.g., synthesised nodes or cross-document compares). */
589
+ static int
590
+ doc_order_cmp_ctx(mkr_xpath_context_t *ctx, const MKR_DOM_NODE *a, const MKR_DOM_NODE *b)
591
+ {
592
+ if (a == b) return 0;
593
+ if (ctx == NULL) return doc_order_cmp(a, b);
594
+ mkr_doc_order_index_t *idx = mkr_ctx_order_index(ctx);
595
+ if (idx == NULL || !idx->built) return doc_order_cmp(a, b);
596
+ size_t oa, ob;
597
+ if (order_index_lookup(idx, a, &oa) != 0) return doc_order_cmp(a, b);
598
+ if (order_index_lookup(idx, b, &ob) != 0) return doc_order_cmp(a, b);
599
+ /* Safe comparison - compare, don't subtract (unsigned difference wraps). */
600
+ if (oa < ob) return -1;
601
+ if (oa > ob) return 1;
602
+ return 0;
603
+ }
604
+
605
+ /* Bottom-up merge sort. Threading ctx through avoids the qsort_r /
606
+ * thread-local hack and keeps everything reentrant. Stable as a
607
+ * bonus: ties (same ord - only possible for synthesised nodes that
608
+ * weren't in the index) preserve insertion order. */
609
+ static void
610
+ ms_merge(void **arr, void **tmp,
611
+ size_t lo, size_t mid, size_t hi, mkr_xpath_context_t *ctx)
612
+ {
613
+ size_t i = lo, j = mid, k = lo;
614
+ while (i < mid && j < hi) {
615
+ if (doc_order_cmp_ctx(ctx, arr[i], arr[j]) <= 0) tmp[k++] = arr[i++];
616
+ else tmp[k++] = arr[j++];
617
+ }
618
+ while (i < mid) tmp[k++] = arr[i++];
619
+ while (j < hi) tmp[k++] = arr[j++];
620
+ for (size_t x = lo; x < hi; ++x) arr[x] = tmp[x];
621
+ }
622
+
623
+ static void
624
+ ms_sort(void **arr, void **tmp,
625
+ size_t lo, size_t hi, mkr_xpath_context_t *ctx)
626
+ {
627
+ if (hi - lo < 2) return;
628
+ size_t mid = lo + (hi - lo) / 2;
629
+ ms_sort(arr, tmp, lo, mid, ctx);
630
+ ms_sort(arr, tmp, mid, hi, ctx);
631
+ ms_merge(arr, tmp, lo, mid, hi, ctx);
632
+ }
633
+
634
+ /* qsort fallback used only when tmp-buffer allocation fails. */
635
+ static int
636
+ doc_order_qsort_cb_fallback(const void *pa, const void *pb)
637
+ {
638
+ const MKR_DOM_NODE *a = *(const MKR_DOM_NODE * const *)pa;
639
+ const MKR_DOM_NODE *b = *(const MKR_DOM_NODE * const *)pb;
640
+ return doc_order_cmp(a, b);
641
+ }
642
+
643
+ /* Threshold for building the doc-order index. Below this we expect
644
+ * N log N parent-chain compares to be cheaper than the O(D) full-doc
645
+ * walk that the index requires (D = total nodes in document, which is
646
+ * typically 6000+ on real pages). Empirically the crossover sits
647
+ * somewhere between N=100 and N=300 on coffee.html; we pick a safe
648
+ * point that keeps small unions and reverse-axis dedups off the slow
649
+ * build path. Once the index IS built (e.g., by a larger sort earlier
650
+ * in the same evaluate), subsequent small sorts naturally reuse it. */
651
+ #define MKR_INDEX_BUILD_MIN 200
652
+
653
+ static void
654
+ mkr_nodeset_sort_doc_order(mkr_xpath_context_t *ctx, mkr_nodeset_t *ns)
655
+ {
656
+ if (ns == NULL || ns->count < 2) return;
657
+
658
+ /* Already-sorted fast path. A relative step over a multi-node context
659
+ * (e.g. the child step of //li/a or //a:entry/a:title) collects its
660
+ * forward-axis results context-by-context in document order, so when the
661
+ * contexts are non-nested the concatenation is ALREADY in document order and
662
+ * the O(n log n) sort is pure waste. An O(n) scan confirms it: if every
663
+ * adjacent pair is already in order we return without sorting (and without
664
+ * building the doc-order index). Reverse axes and interleaved (nested-context)
665
+ * results fail the scan early and fall through to the full sort below. The
666
+ * scan uses the same comparator the sort would, so it can only skip work,
667
+ * never change the result. This is the libxml2-parity win for multi-step
668
+ * paths, where the sort otherwise dominates (profiled). */
669
+ int already_ordered = 1;
670
+ for (size_t i = 1; i < ns->count; ++i) {
671
+ if (doc_order_cmp_ctx(ctx, ns->items[i - 1], ns->items[i]) > 0) {
672
+ already_ordered = 0;
673
+ break;
674
+ }
675
+ }
676
+ if (already_ordered) return;
677
+
678
+ /* Lazy build of the doc-order index. Only worth doing when the sort
679
+ * itself is large enough to amortise the full-doc walk; smaller
680
+ * sorts fall through to parent-chain compares via doc_order_cmp_ctx
681
+ * (which sees an unbuilt index and dispatches accordingly). */
682
+ mkr_doc_order_index_t *idx = mkr_ctx_order_index(ctx);
683
+ if (idx != NULL && !idx->built && ns->count >= MKR_INDEX_BUILD_MIN) {
684
+ MKR_DOM_NODE *root = (MKR_DOM_NODE *)mkr_ctx_document(ctx);
685
+ mkr_xpath_error_t ierr = {0};
686
+ (void)order_index_build(idx, root, &ierr);
687
+ mkr_xpath_error_clear(&ierr); /* index is best-effort; on OOM we fall through to parent-chain cmp */
688
+ }
689
+
690
+ void **tmp = mkr_reallocarray(NULL, ns->count, sizeof(*tmp));
691
+ if (tmp == NULL) {
692
+ /* Fall back to in-place qsort with parent-chain compare (slow but
693
+ * correct). Should be a very rare path. */
694
+ qsort(ns->items, ns->count, sizeof(ns->items[0]), doc_order_qsort_cb_fallback);
695
+ return;
696
+ }
697
+ ms_sort(ns->items, tmp, 0, ns->count, ctx);
698
+ free(tmp);
699
+ }
700
+
701
+ static void
702
+ mkr_nodeset_unique_sorted(mkr_xpath_context_t *ctx, mkr_nodeset_t *ns)
703
+ {
704
+ if (ns == NULL || ns->count < 2) return;
705
+ mkr_nodeset_sort_doc_order(ctx, ns);
706
+ size_t w = 1;
707
+ for (size_t r = 1; r < ns->count; ++r) {
708
+ if (ns->items[r] != ns->items[r - 1]) {
709
+ ns->items[w++] = ns->items[r];
710
+ }
711
+ }
712
+ ns->count = w;
713
+ }
714
+
715
+ /* ---------- string-value cache: node-keyed insert (dereferences `node`) ---------- */
716
+
717
+ static int
718
+ mkr_get_cached_node_text(mkr_xpath_context_t *ctx,
719
+ MKR_DOM_NODE *node,
720
+ mkr_borrowed_text_t *out,
721
+ mkr_xpath_error_t *err)
722
+ {
723
+ if (out == NULL) {
724
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL, "mkr_get_cached_node_text: bad args");
725
+ return -1;
726
+ }
727
+ *out = mkr_borrowed_text(NULL, 0);
728
+ /* Contract: ctx is non-NULL when called from the evaluator (the only
729
+ * intended caller). A NULL ctx is a programming error; surface it. */
730
+ mkr_str_cache_t *c = mkr_ctx_str_cache(ctx);
731
+ if (c == NULL) {
732
+ mkr_err_set(err, MKR_XPATH_ERR_INTERNAL,
733
+ "mkr_get_cached_node_text called without a context");
734
+ return -1;
735
+ }
736
+
737
+ /* O(1) lookup via the pointer-keyed index. */
738
+ if (c->bucket_cap != 0) {
739
+ size_t mask = c->bucket_cap - 1;
740
+ size_t j = mkr_ptr_hash(node) & mask;
741
+ while (c->buckets[j] != 0) {
742
+ mkr_str_cache_entry_t *e = &c->entries[c->buckets[j] - 1];
743
+ if (e->node == node) {
744
+ *out = mkr_borrowed_text(e->str, e->len);
745
+ return 0;
746
+ }
747
+ j = (j + 1) & mask;
748
+ }
749
+ }
750
+
751
+ mkr_owned_text_t text;
752
+ if (mkr_node_to_owned_text_or_fail(node, mkr_ctx_limits(ctx), err, &text) != 0) return -1;
753
+
754
+ if (mkr_grow_reserve((void **)&c->entries, &c->cap, c->count + 1,
755
+ sizeof(*c->entries)) != MKR_OK) {
756
+ mkr_owned_text_clear(&text);
757
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, "out of memory in node string cache");
758
+ return -1;
759
+ }
760
+
761
+ /* Enforce a total cap on the cached string bytes (fail-closed). */
762
+ size_t new_total;
763
+ if (!mkr_size_add(c->total_bytes, text.len, &new_total)
764
+ || mkr_limit_check_string_bytes(mkr_ctx_limits(ctx), new_total, err) != 0) {
765
+ mkr_owned_text_clear(&text);
766
+ return -1;
767
+ }
768
+
769
+ /* Grow / build the index FIRST. It rebuilds only from the already-committed
770
+ * [0, count) entries (mkr_str_cache_reindex), so doing it before the new
771
+ * entry is written means every fallible step happens while the slot at
772
+ * [count] is still untouched - the entry is committed only once nothing can
773
+ * fail, eliminating the tentative-write-then-null-out rollback. Load factor
774
+ * is kept <= 1/2. */
775
+ if (c->bucket_cap == 0 || (c->count + 1) * 2 > c->bucket_cap) {
776
+ size_t new_bucket_cap = 64;
777
+ if (c->bucket_cap != 0 && !mkr_size_mul(c->bucket_cap, 2, &new_bucket_cap)) {
778
+ mkr_owned_text_clear(&text);
779
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, "node string cache index overflow");
780
+ return -1;
781
+ }
782
+ if (mkr_str_cache_reindex(c, new_bucket_cap) != 0) {
783
+ mkr_owned_text_clear(&text);
784
+ mkr_err_set(err, MKR_XPATH_ERR_OOM, "out of memory indexing node string cache");
785
+ return -1;
786
+ }
787
+ }
788
+
789
+ /* Commit: no fallible step remains, so write the entry, index it, and bump
790
+ * the counters. mkr_str_cache_index_put reads entries[count].node, so the
791
+ * write must precede it. */
792
+ c->entries[c->count].node = node;
793
+ c->entries[c->count].str = text.ptr;
794
+ c->entries[c->count].len = text.len;
795
+ mkr_str_cache_index_put(c, c->count);
796
+ c->total_bytes += text.len;
797
+ c->count++;
798
+
799
+ *out = mkr_borrowed_text_from_owned(text);
800
+ return 0;
801
+ }