apex-ruby 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/ext/apex_ext/apex_ext.c +6 -0
  3. data/ext/apex_ext/apex_src/AGENTS.md +41 -0
  4. data/ext/apex_ext/apex_src/CHANGELOG.md +412 -2
  5. data/ext/apex_ext/apex_src/CMakeLists.txt +41 -29
  6. data/ext/apex_ext/apex_src/Formula/apex.rb +2 -2
  7. data/ext/apex_ext/apex_src/Package.swift +9 -0
  8. data/ext/apex_ext/apex_src/README.md +31 -9
  9. data/ext/apex_ext/apex_src/ROADMAP.md +5 -0
  10. data/ext/apex_ext/apex_src/VERSION +1 -1
  11. data/ext/apex_ext/apex_src/cli/main.c +1125 -13
  12. data/ext/apex_ext/apex_src/docs/index.md +459 -0
  13. data/ext/apex_ext/apex_src/include/apex/apex.h +67 -5
  14. data/ext/apex_ext/apex_src/include/apex/ast_man.h +20 -0
  15. data/ext/apex_ext/apex_src/include/apex/ast_markdown.h +39 -0
  16. data/ext/apex_ext/apex_src/include/apex/ast_terminal.h +40 -0
  17. data/ext/apex_ext/apex_src/include/apex/module.modulemap +1 -1
  18. data/ext/apex_ext/apex_src/man/apex-config.5 +333 -258
  19. data/ext/apex_ext/apex_src/man/apex-config.5.md +3 -1
  20. data/ext/apex_ext/apex_src/man/apex-plugins.7 +401 -316
  21. data/ext/apex_ext/apex_src/man/apex.1 +663 -620
  22. data/ext/apex_ext/apex_src/man/apex.1.html +703 -0
  23. data/ext/apex_ext/apex_src/man/apex.1.md +160 -90
  24. data/ext/apex_ext/apex_src/objc/Apex.swift +6 -0
  25. data/ext/apex_ext/apex_src/objc/NSString+Apex.h +12 -0
  26. data/ext/apex_ext/apex_src/objc/NSString+Apex.m +9 -0
  27. data/ext/apex_ext/apex_src/pages/index.md +459 -0
  28. data/ext/apex_ext/apex_src/src/_README.md +4 -4
  29. data/ext/apex_ext/apex_src/src/apex.c +702 -44
  30. data/ext/apex_ext/apex_src/src/ast_json.c +1130 -0
  31. data/ext/apex_ext/apex_src/src/ast_json.h +46 -0
  32. data/ext/apex_ext/apex_src/src/ast_man.c +948 -0
  33. data/ext/apex_ext/apex_src/src/ast_markdown.c +409 -0
  34. data/ext/apex_ext/apex_src/src/ast_terminal.c +2516 -0
  35. data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +8 -5
  36. data/ext/apex_ext/apex_src/src/extensions/definition_list.c +491 -1514
  37. data/ext/apex_ext/apex_src/src/extensions/definition_list.h +8 -15
  38. data/ext/apex_ext/apex_src/src/extensions/emoji.c +207 -0
  39. data/ext/apex_ext/apex_src/src/extensions/emoji.h +14 -0
  40. data/ext/apex_ext/apex_src/src/extensions/header_ids.c +178 -71
  41. data/ext/apex_ext/apex_src/src/extensions/highlight.c +37 -5
  42. data/ext/apex_ext/apex_src/src/extensions/ial.c +416 -47
  43. data/ext/apex_ext/apex_src/src/extensions/includes.c +241 -10
  44. data/ext/apex_ext/apex_src/src/extensions/includes.h +1 -0
  45. data/ext/apex_ext/apex_src/src/extensions/metadata.c +166 -3
  46. data/ext/apex_ext/apex_src/src/extensions/metadata.h +7 -0
  47. data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +34 -3
  48. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +55 -10
  49. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +7 -4
  50. data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +84 -52
  51. data/ext/apex_ext/apex_src/src/extensions/toc.c +133 -19
  52. data/ext/apex_ext/apex_src/src/filters_ast.c +194 -0
  53. data/ext/apex_ext/apex_src/src/filters_ast.h +36 -0
  54. data/ext/apex_ext/apex_src/src/html_renderer.c +1265 -35
  55. data/ext/apex_ext/apex_src/src/html_renderer.h +21 -0
  56. data/ext/apex_ext/apex_src/src/plugins_remote.c +40 -14
  57. data/ext/apex_ext/apex_src/tests/CMakeLists.txt +1 -0
  58. data/ext/apex_ext/apex_src/tests/README.md +11 -5
  59. data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +13 -2
  60. data/ext/apex_ext/apex_src/tests/fixtures/filters/filter_output_with_rawblock.json +1 -0
  61. data/ext/apex_ext/apex_src/tests/fixtures/filters/unwrap.md +7 -0
  62. data/ext/apex_ext/apex_src/tests/fixtures/images/auto-wildcard.md +8 -0
  63. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.avif +0 -0
  64. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.jpg +0 -0
  65. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.webp +0 -0
  66. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.avif +0 -0
  67. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.jpg +0 -0
  68. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.webp +0 -0
  69. data/ext/apex_ext/apex_src/tests/fixtures/images/media_formats_test.md +63 -0
  70. data/ext/apex_ext/apex_src/tests/fixtures/includes/data-semi.csv +3 -0
  71. data/ext/apex_ext/apex_src/tests/fixtures/includes/with space.txt +1 -0
  72. data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +4 -1
  73. data/ext/apex_ext/apex_src/tests/paginate_cli_test.sh +64 -0
  74. data/ext/apex_ext/apex_src/tests/terminal_width_test.sh +29 -0
  75. data/ext/apex_ext/apex_src/tests/test-swift-package.sh +14 -0
  76. data/ext/apex_ext/apex_src/tests/test_cmark_callback.c +189 -0
  77. data/ext/apex_ext/apex_src/tests/test_extensions.c +374 -0
  78. data/ext/apex_ext/apex_src/tests/test_metadata.c +68 -0
  79. data/ext/apex_ext/apex_src/tests/test_output.c +291 -2
  80. data/ext/apex_ext/apex_src/tests/test_runner.c +10 -0
  81. data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +1 -1
  82. data/ext/apex_ext/apex_src/tests/test_tables.c +17 -1
  83. data/lib/apex/version.rb +1 -1
  84. metadata +32 -2
  85. data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +0 -456
@@ -7,10 +7,12 @@
7
7
  #include "table.h" /* For CMARK_NODE_TABLE */
8
8
  #include "extensions/header_ids.h"
9
9
  #include <string.h>
10
+ #include <strings.h> /* For strncasecmp */
10
11
  #include <stdlib.h>
11
12
  #include <stdio.h>
12
13
  #include <stdbool.h>
13
14
  #include <ctype.h>
15
+ #include <sys/stat.h>
14
16
 
15
17
  /**
16
18
  * Inject attributes into HTML opening tags
@@ -154,6 +156,125 @@ static char *extract_ial_from_table_attrs(const char *attrs) {
154
156
  return result;
155
157
  }
156
158
 
159
+ /**
160
+ * Extract value of an attribute from an HTML tag.
161
+ * Returns newly allocated string or NULL. Caller must free.
162
+ */
163
+ static char *extract_attr_from_tag(const char *tag_start, const char *tag_end, const char *attr_name) {
164
+ size_t attr_len = strlen(attr_name);
165
+ const char *p = tag_start;
166
+ while (p < tag_end) {
167
+ if ((p == tag_start || isspace((unsigned char)p[-1])) &&
168
+ strncasecmp(p, attr_name, attr_len) == 0 && p[attr_len] == '=') {
169
+ p += attr_len + 1;
170
+ if (p >= tag_end) return NULL;
171
+ char q = *p;
172
+ if (q != '"' && q != '\'') return NULL;
173
+ p++;
174
+ const char *val_start = p;
175
+ while (p < tag_end && *p != q) {
176
+ if (*p == '\\' && p + 1 < tag_end) p++;
177
+ p++;
178
+ }
179
+ if (p >= tag_end) return NULL;
180
+ size_t len = (size_t)(p - val_start);
181
+ char *out = malloc(len + 1);
182
+ if (out) {
183
+ memcpy(out, val_start, len);
184
+ out[len] = '\0';
185
+ }
186
+ return out;
187
+ }
188
+ p++;
189
+ }
190
+ return NULL;
191
+ }
192
+
193
+ /**
194
+ * Replace extension in URL path. Caller must free. Returns NULL if no extension.
195
+ */
196
+ static char *url_with_extension(const char *url, const char *new_ext) {
197
+ if (!url || !new_ext) return NULL;
198
+ const char *last_dot = strrchr(url, '.');
199
+ const char *path_end = strchr(url, '?');
200
+ if (!path_end) path_end = strchr(url, '#');
201
+ if (!path_end) path_end = url + strlen(url);
202
+ if (!last_dot || last_dot >= path_end) return NULL;
203
+
204
+ size_t prefix_len = (size_t)(last_dot - url);
205
+ size_t ext_len = strlen(new_ext);
206
+ size_t tail_len = strlen(path_end);
207
+ char *out = malloc(prefix_len + 1 + ext_len + tail_len + 1);
208
+ if (!out) return NULL;
209
+ memcpy(out, url, prefix_len);
210
+ out[prefix_len] = '.';
211
+ memcpy(out + prefix_len + 1, new_ext, ext_len + 1);
212
+ if (tail_len > 0) memcpy(out + prefix_len + 1 + ext_len, path_end, tail_len + 1);
213
+ return out;
214
+ }
215
+
216
+ /**
217
+ * Find end of HTML tag (the >), respecting quoted attribute values.
218
+ */
219
+ static const char *find_tag_end(const char *tag_start) {
220
+ const char *p = tag_start;
221
+ char in_quote = 0;
222
+ while (*p) {
223
+ if (in_quote) {
224
+ if (*p == '\\' && p[1]) p++;
225
+ else if (*p == in_quote) in_quote = 0;
226
+ } else if (*p == '"' || *p == '\'') {
227
+ in_quote = *p;
228
+ } else if (*p == '>') {
229
+ return p;
230
+ }
231
+ p++;
232
+ }
233
+ return NULL;
234
+ }
235
+
236
+ /**
237
+ * Get video MIME type from URL extension.
238
+ */
239
+ static const char *video_type_from_url(const char *url) {
240
+ if (!url) return "video/mp4";
241
+ const char *dot = strrchr(url, '.');
242
+ if (!dot) return "video/mp4";
243
+ const char *ext = dot + 1;
244
+ const char *end = strchr(ext, '?');
245
+ if (!end) end = strchr(ext, '#');
246
+ if (!end) end = ext + strlen(ext);
247
+ size_t len = (size_t)(end - ext);
248
+ if (len >= 3 && strncasecmp(ext, "mp4", 3) == 0) return "video/mp4";
249
+ if (len >= 4 && strncasecmp(ext, "webm", 4) == 0) return "video/webm";
250
+ if (len >= 3 && strncasecmp(ext, "ogg", 3) == 0) return "video/ogg";
251
+ if (len >= 3 && strncasecmp(ext, "ogv", 3) == 0) return "video/ogg";
252
+ if (len >= 3 && strncasecmp(ext, "mov", 3) == 0) return "video/quicktime";
253
+ if (len >= 3 && strncasecmp(ext, "m4v", 3) == 0) return "video/mp4";
254
+ return "video/mp4";
255
+ }
256
+
257
+ /**
258
+ * Extract value of data-apex-picture-webp or data-apex-picture-avif from attrs string.
259
+ * Format: data-apex-picture-webp="value" or data-apex-picture-avif="value"
260
+ * Caller must free.
261
+ */
262
+ static char *extract_data_apex_picture_srcset(const char *attrs, const char *format) {
263
+ char key[64];
264
+ snprintf(key, sizeof(key), "data-apex-picture-%s=\"", format);
265
+ const char *p = strstr(attrs, key);
266
+ if (!p) return NULL;
267
+ p += strlen(key);
268
+ const char *end = strchr(p, '"');
269
+ if (!end) return NULL;
270
+ size_t len = (size_t)(end - p);
271
+ char *out = malloc(len + 1);
272
+ if (!out) return NULL;
273
+ memcpy(out, p, len);
274
+ out[len] = '\0';
275
+ return out;
276
+ }
277
+
157
278
  /* Counters for element indexing */
158
279
  typedef struct {
159
280
  int para_count;
@@ -229,16 +350,29 @@ static char *get_node_text_fingerprint(cmark_node *node) {
229
350
  }
230
351
  }
231
352
 
232
- /* For images, use the URL */
353
+ /* For images, use URL + alt (from first child) to disambiguate same-src images */
233
354
  if (type == CMARK_NODE_IMAGE) {
234
355
  const char *url = cmark_node_get_url(node);
235
356
  if (url) {
236
- size_t len = strlen(url);
237
- if (len > 50) len = 50;
238
- char *fingerprint = malloc(len + 1);
357
+ size_t url_len = strlen(url);
358
+ if (url_len > 50) url_len = 50;
359
+ cmark_node *child = cmark_node_first_child(node);
360
+ const char *alt = (child && cmark_node_get_type(child) == CMARK_NODE_TEXT) ?
361
+ cmark_node_get_literal(child) : NULL;
362
+ size_t alt_len = alt ? strlen(alt) : 0;
363
+ if (alt_len > 20) alt_len = 20;
364
+ size_t total = url_len + (alt_len ? 1 + alt_len : 0);
365
+ if (total > 50) total = 50;
366
+ char *fingerprint = malloc(total + 1);
239
367
  if (fingerprint) {
240
- memcpy(fingerprint, url, len);
241
- fingerprint[len] = '\0';
368
+ memcpy(fingerprint, url, url_len);
369
+ size_t pos = url_len;
370
+ if (alt_len && pos + 1 + alt_len <= 50) {
371
+ fingerprint[pos++] = '|';
372
+ memcpy(fingerprint + pos, alt, alt_len);
373
+ pos += alt_len;
374
+ }
375
+ fingerprint[pos] = '\0';
242
376
  return fingerprint;
243
377
  }
244
378
  }
@@ -302,7 +436,8 @@ static void collect_nodes_with_attrs(cmark_node *node, attr_node **list) {
302
436
  element_counters counters = {0};
303
437
  collect_nodes_with_attrs_recursive(node, list, &counters);
304
438
 
305
- /* Reverse the list to get document order */
439
+ /* Reverse the list: prepend builds [last_visited, ..., first_visited];
440
+ * we need document order [first, ..., last] for matching. */
306
441
  attr_node *reversed = NULL;
307
442
  while (*list) {
308
443
  attr_node *next = (*list)->next;
@@ -499,7 +634,7 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
499
634
  int fp_idx = 0;
500
635
 
501
636
  if (elem_type == CMARK_NODE_LINK || elem_type == CMARK_NODE_IMAGE) {
502
- /* For links/images, extract the href/src attribute */
637
+ /* For links/images, extract href/src and for images also alt (to disambiguate same-src) */
503
638
  const char *url_attr = (elem_type == CMARK_NODE_LINK) ? "href=\"" : "src=\"";
504
639
  const char *url_start = strstr(read, url_attr);
505
640
  if (url_start) {
@@ -509,8 +644,25 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
509
644
  size_t url_len = url_end - url_start;
510
645
  if (url_len > 50) url_len = 50;
511
646
  memcpy(html_fingerprint, url_start, url_len);
512
- html_fingerprint[url_len] = '\0';
513
647
  fp_idx = url_len;
648
+ if (elem_type == CMARK_NODE_IMAGE && fp_idx < 49) {
649
+ const char *alt_attr = "alt=\"";
650
+ const char *alt_start = strstr(read, alt_attr);
651
+ if (alt_start && alt_start < tag_end) {
652
+ alt_start += strlen(alt_attr);
653
+ const char *alt_end = strchr(alt_start, '"');
654
+ if (alt_end) {
655
+ size_t alt_len = alt_end - alt_start;
656
+ if (alt_len > 20) alt_len = 20;
657
+ if (fp_idx + 1 + alt_len <= 50) {
658
+ html_fingerprint[fp_idx++] = '|';
659
+ memcpy(html_fingerprint + fp_idx, alt_start, alt_len);
660
+ fp_idx += alt_len;
661
+ }
662
+ }
663
+ }
664
+ }
665
+ html_fingerprint[fp_idx] = '\0';
514
666
  }
515
667
  }
516
668
  } else if (elem_type == CMARK_NODE_STRONG || elem_type == CMARK_NODE_EMPH || elem_type == CMARK_NODE_CODE) {
@@ -551,6 +703,17 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
551
703
  break;
552
704
  }
553
705
  }
706
+ } else if (elem_type == CMARK_NODE_IMAGE) {
707
+ /* Images: match by element_index (document order). */
708
+ for (attr_node *a = attr_list; a; a = a->next, idx++) {
709
+ if (used[idx]) continue;
710
+ if (a->node_type != CMARK_NODE_IMAGE) continue;
711
+ if (a->element_index == elem_idx) {
712
+ matching = a;
713
+ used[idx] = true;
714
+ break;
715
+ }
716
+ }
554
717
  } else {
555
718
  /* For other elements, use the existing matching logic */
556
719
  for (attr_node *a = attr_list; a; a = a->next, idx++) {
@@ -565,7 +728,8 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
565
728
  /* Try fingerprint match first (works for both block and inline) */
566
729
  if (a->text_fingerprint && fp_idx > 0 &&
567
730
  strncmp(a->text_fingerprint, html_fingerprint, 50) == 0) {
568
- /* For inline elements, also check element_index to handle duplicates */
731
+ /* For inline elements, also check element_index to handle duplicates.
732
+ * (Images with same src use sequential matching in the branch above.) */
569
733
  if (elem_type == CMARK_NODE_LINK || elem_type == CMARK_NODE_IMAGE ||
570
734
  elem_type == CMARK_NODE_STRONG || elem_type == CMARK_NODE_EMPH ||
571
735
  elem_type == CMARK_NODE_CODE) {
@@ -667,6 +831,109 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
667
831
  }
668
832
  if (ial_attrs) free(ial_attrs);
669
833
  /* No IAL attributes to inject, but table still needs to be copied - fall through */
834
+ } else if (elem_type == CMARK_NODE_IMAGE &&
835
+ (strstr(matching->attrs, "data-apex-replace-video") ||
836
+ strstr(matching->attrs, "data-apex-replace-picture"))) {
837
+ /* Replace img with video or picture element */
838
+ const char *img_tag_end = find_tag_end(read);
839
+ if (img_tag_end && img_tag_end > read) {
840
+ char *src = extract_attr_from_tag(read, img_tag_end + 1, "src");
841
+ char *alt = extract_attr_from_tag(read, img_tag_end + 1, "alt");
842
+ char *title = extract_attr_from_tag(read, img_tag_end + 1, "title");
843
+ /* Fallback: title may be in IAL attrs (cmark may not emit it on img) */
844
+ if ((!title || !*title) && matching->attrs) {
845
+ size_t alen = strlen(matching->attrs);
846
+ char *fake_tag = malloc(alen + 10);
847
+ if (fake_tag) {
848
+ snprintf(fake_tag, alen + 10, "<img %s>", matching->attrs);
849
+ char *t = extract_attr_from_tag(fake_tag, fake_tag + strlen(fake_tag) + 1, "title");
850
+ free(fake_tag);
851
+ if (t) { free(title); title = t; }
852
+ }
853
+ }
854
+ if (!src) src = strdup("");
855
+ if (!alt) alt = strdup("");
856
+
857
+ char *replacement = NULL;
858
+ size_t repl_len = 0;
859
+
860
+ if (strstr(matching->attrs, "data-apex-replace-video")) {
861
+ /* Build <video> with <source> elements. Order: webm, ogg, mp4/mov/m4v (primary) */
862
+ size_t cap = 256 + (src ? strlen(src) * 4 : 0);
863
+ replacement = malloc(cap);
864
+ if (replacement) {
865
+ char *w = replacement;
866
+ w += snprintf(w, cap, "<video");
867
+ if (alt && *alt) w += snprintf(w, cap - (size_t)(w - replacement), " title=\"%s\"", alt);
868
+ w += snprintf(w, cap - (size_t)(w - replacement), ">");
869
+
870
+ if (strstr(matching->attrs, "data-apex-video-webm")) {
871
+ char *u = url_with_extension(src, "webm");
872
+ if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/webm\">", u); free(u); }
873
+ }
874
+ if (strstr(matching->attrs, "data-apex-video-ogg")) {
875
+ char *u = url_with_extension(src, "ogg");
876
+ if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/ogg\">", u); free(u); }
877
+ }
878
+ if (strstr(matching->attrs, "data-apex-video-mp4")) {
879
+ char *u = url_with_extension(src, "mp4");
880
+ if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/mp4\">", u); free(u); }
881
+ }
882
+ if (strstr(matching->attrs, "data-apex-video-mov")) {
883
+ char *u = url_with_extension(src, "mov");
884
+ if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/quicktime\">", u); free(u); }
885
+ }
886
+ if (strstr(matching->attrs, "data-apex-video-m4v")) {
887
+ char *u = url_with_extension(src, "m4v");
888
+ if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/mp4\">", u); free(u); }
889
+ }
890
+ /* Primary src as fallback (always include) */
891
+ w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"%s\">", src, video_type_from_url(src));
892
+ w += snprintf(w, cap - (size_t)(w - replacement), "</video>");
893
+ repl_len = (size_t)(w - replacement);
894
+ }
895
+ } else {
896
+ /* Build <picture> with <source> elements and <img> fallback */
897
+ char *webp_srcset = extract_data_apex_picture_srcset(matching->attrs, "webp");
898
+ char *avif_srcset = extract_data_apex_picture_srcset(matching->attrs, "avif");
899
+
900
+ /* Strip data-apex-* from attrs for the img */
901
+ size_t cap = 512 + (src ? strlen(src) * 2 : 0) + (webp_srcset ? strlen(webp_srcset) : 0) + (avif_srcset ? strlen(avif_srcset) : 0);
902
+ replacement = malloc(cap);
903
+ if (replacement) {
904
+ char *w = replacement;
905
+ w += snprintf(w, cap, "<picture>");
906
+ if (avif_srcset) w += snprintf(w, cap - (size_t)(w - replacement), "<source type=\"image/avif\" srcset=\"%s\">", avif_srcset);
907
+ if (webp_srcset) w += snprintf(w, cap - (size_t)(w - replacement), "<source type=\"image/webp\" srcset=\"%s\">", webp_srcset);
908
+ /* Preserve title on img for caption logic (apex_convert_image_captions) */
909
+ if (title && *title) {
910
+ w += snprintf(w, cap - (size_t)(w - replacement), "<img src=\"%s\" alt=\"%s\" title=\"%s\"></picture>", src, alt, title);
911
+ } else {
912
+ w += snprintf(w, cap - (size_t)(w - replacement), "<img src=\"%s\" alt=\"%s\"></picture>", src, alt);
913
+ }
914
+ repl_len = (size_t)(w - replacement);
915
+ }
916
+ free(webp_srcset);
917
+ free(avif_srcset);
918
+ }
919
+
920
+ if (replacement && repl_len > 0 && repl_len <= remaining) {
921
+ memcpy(write, replacement, repl_len);
922
+ write += repl_len;
923
+ remaining -= repl_len;
924
+ read = img_tag_end + 1;
925
+ free(replacement);
926
+ free(src);
927
+ free(alt);
928
+ free(title);
929
+ continue;
930
+ }
931
+ free(replacement);
932
+ free(src);
933
+ free(alt);
934
+ free(title);
935
+ }
936
+ /* Fall through to normal inject if replacement failed */
670
937
  } else {
671
938
  /* Find where to inject attributes */
672
939
  const char *inject_point = NULL;
@@ -810,30 +1077,32 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
810
1077
  return html ? strdup(html) : NULL;
811
1078
  }
812
1079
 
813
- /* Collect all headers from AST with their IDs */
1080
+ /* Collect all headers from AST with their IDs (level + text for matching) */
814
1081
  typedef struct header_id_map {
1082
+ int level;
815
1083
  char *text;
816
1084
  char *id;
817
1085
  int index;
1086
+ bool used;
818
1087
  struct header_id_map *next;
819
1088
  } header_id_map;
820
1089
 
821
1090
  header_id_map *header_map = NULL;
822
1091
  int header_count = 0;
823
1092
 
824
- /* Walk AST to collect headers */
1093
+ /* Walk AST to collect headers (only markdown HEADING nodes, not raw HTML) */
825
1094
  cmark_iter *iter = cmark_iter_new(document);
826
1095
  cmark_event_type event;
827
1096
  while ((event = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
828
1097
  cmark_node *node = cmark_iter_get_node(iter);
829
1098
  if (event == CMARK_EVENT_ENTER && cmark_node_get_type(node) == CMARK_NODE_HEADING) {
1099
+ int level = cmark_node_get_heading_level(node);
830
1100
  char *text = apex_extract_heading_text(node);
831
1101
  char *id = NULL;
832
1102
 
833
1103
  /* Check if ID already exists from IAL or manual ID (stored in user_data) */
834
1104
  char *user_data = (char *)cmark_node_get_user_data(node);
835
1105
  if (user_data) {
836
- /* Look for id="..." in user_data */
837
1106
  const char *id_attr = strstr(user_data, "id=\"");
838
1107
  if (id_attr) {
839
1108
  const char *id_start = id_attr + 4;
@@ -849,16 +1118,17 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
849
1118
  }
850
1119
  }
851
1120
 
852
- /* If no manual/IAL ID, generate one automatically */
853
1121
  if (!id) {
854
1122
  id = apex_generate_header_id(text, (apex_id_format_t)id_format);
855
1123
  }
856
1124
 
857
1125
  header_id_map *entry = malloc(sizeof(header_id_map));
858
1126
  if (entry) {
1127
+ entry->level = level;
859
1128
  entry->text = text;
860
1129
  entry->id = id;
861
1130
  entry->index = header_count++;
1131
+ entry->used = false;
862
1132
  entry->next = header_map;
863
1133
  header_map = entry;
864
1134
  } else {
@@ -902,7 +1172,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
902
1172
  const char *read = html;
903
1173
  char *write = output;
904
1174
  size_t remaining = capacity; /* Reserve 1 byte for null terminator */
905
- int current_header_idx = 0;
906
1175
 
907
1176
  while (*read) {
908
1177
  /* Look for header opening tags: <h1>, <h2>, etc. */
@@ -925,6 +1194,58 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
925
1194
  continue;
926
1195
  }
927
1196
 
1197
+ /* Extract header content from HTML (between > and </hN>) for matching */
1198
+ int html_level = tag_start[2] - '0';
1199
+ const char *content_start = tag_end + 1;
1200
+ const char *closing = strstr(content_start, "</h");
1201
+ const char *content_end = content_start;
1202
+ if (closing && closing[2] >= '1' && closing[2] <= '6' && closing[3] == '>') {
1203
+ content_end = closing;
1204
+ }
1205
+ char content_buf[512];
1206
+ size_t content_len = content_end > content_start ? (size_t)(content_end - content_start) : 0;
1207
+ if (content_len >= sizeof(content_buf)) content_len = sizeof(content_buf) - 1;
1208
+ memcpy(content_buf, content_start, content_len);
1209
+ content_buf[content_len] = '\0';
1210
+ /* Decode &amp; to & and strip tags for comparison with AST text */
1211
+ {
1212
+ char *r = content_buf, *w = content_buf;
1213
+ while (*r) {
1214
+ if (strncmp(r, "&amp;", 5) == 0) { *w++ = '&'; r += 5; }
1215
+ else if (strncmp(r, "&lt;", 4) == 0) { *w++ = '<'; r += 4; }
1216
+ else if (strncmp(r, "&gt;", 4) == 0) { *w++ = '>'; r += 4; }
1217
+ else if (*r == '<') { while (*r && *r != '>') r++; if (*r == '>') r++; }
1218
+ else { *w++ = *r++; }
1219
+ }
1220
+ *w = '\0';
1221
+ }
1222
+ /* Trim whitespace and newlines for comparison with AST text */
1223
+ char *trim_start = content_buf;
1224
+ while (*trim_start == ' ' || *trim_start == '\t' || *trim_start == '\n' || *trim_start == '\r') trim_start++;
1225
+ size_t trim_len = strlen(trim_start);
1226
+ while (trim_len > 0 && (trim_start[trim_len - 1] == ' ' || trim_start[trim_len - 1] == '\t' || trim_start[trim_len - 1] == '\n' || trim_start[trim_len - 1] == '\r'))
1227
+ trim_start[--trim_len] = '\0';
1228
+
1229
+ /* Match by (level, text); fallback to first unused at level only when text extraction
1230
+ differs (avoids assigning to raw HTML headers which have no AST entry at that level) */
1231
+ header_id_map *header = NULL;
1232
+ for (header_id_map *p = header_map; p; p = p->next) {
1233
+ if (!p->used && p->level == html_level && p->text && strcmp(p->text, trim_start) == 0) {
1234
+ header = p;
1235
+ p->used = true;
1236
+ break;
1237
+ }
1238
+ }
1239
+ if (!header) {
1240
+ for (header_id_map *p = header_map; p; p = p->next) {
1241
+ if (!p->used && p->level == html_level) {
1242
+ header = p;
1243
+ p->used = true;
1244
+ break;
1245
+ }
1246
+ }
1247
+ }
1248
+
928
1249
  /* Check if ID already exists in the tag */
929
1250
  bool has_id = false;
930
1251
  const char *id_attr = strstr(tag_start, "id=");
@@ -932,8 +1253,7 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
932
1253
  const char *id_end = NULL;
933
1254
  if (id_attr && id_attr < tag_end) {
934
1255
  has_id = true;
935
- /* Find the ID value boundaries for replacement */
936
- id_start = id_attr + 3; /* After 'id=' */
1256
+ id_start = id_attr + 3;
937
1257
  while (id_start < tag_end && (*id_start == ' ' || *id_start == '"' || *id_start == '\'')) {
938
1258
  id_start++;
939
1259
  }
@@ -943,15 +1263,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
943
1263
  }
944
1264
  }
945
1265
 
946
- /* Get the header ID - always get it so we can replace existing IDs */
947
- header_id_map *header = NULL;
948
- if (current_header_idx < header_count) {
949
- header = header_map;
950
- for (int i = 0; i < current_header_idx && header; i++) {
951
- header = header->next;
952
- }
953
- }
954
-
955
1266
  if (use_anchors && header && header->id) {
956
1267
  /* For anchor tags: copy the entire header tag, then inject anchor after '>' */
957
1268
  size_t tag_len = tag_end - tag_start + 1; /* Include '>' */
@@ -973,7 +1284,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
973
1284
  write += anchor_len;
974
1285
  remaining -= anchor_len;
975
1286
  }
976
- current_header_idx++;
977
1287
  } else if (!use_anchors && header && header->id) {
978
1288
  /* For header IDs: replace existing ID or inject new one */
979
1289
  if (has_id && id_attr) {
@@ -1027,11 +1337,10 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
1027
1337
  *write++ = *read++;
1028
1338
  remaining--;
1029
1339
  } else {
1030
- read++;
1031
- }
1340
+ read++;
1032
1341
  }
1033
- current_header_idx++;
1034
- } else {
1342
+ }
1343
+ } else {
1035
1344
  /* No existing ID: copy tag up to '>', inject id attribute, then copy '>' */
1036
1345
  const char *after_tag_name = tag_start + 3;
1037
1346
  while (*after_tag_name && *after_tag_name != '>' && !isspace((unsigned char)*after_tag_name)) {
@@ -1089,7 +1398,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
1089
1398
  }
1090
1399
  }
1091
1400
  }
1092
- current_header_idx++;
1093
1401
  } else {
1094
1402
  /* No ID to inject, just copy the tag */
1095
1403
  size_t tag_len = tag_end - tag_start + 1;
@@ -1099,9 +1407,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
1099
1407
  remaining -= tag_len;
1100
1408
  }
1101
1409
  read = tag_end + 1;
1102
- if (!has_id) {
1103
- current_header_idx++;
1104
- }
1105
1410
  }
1106
1411
  } else {
1107
1412
  /* Copy character */
@@ -2478,6 +2783,74 @@ char *apex_convert_image_captions(const char *html, bool enable_image_captions,
2478
2783
  size_t remaining = capacity;
2479
2784
 
2480
2785
  while (*read) {
2786
+ /* Look for <picture> - wrap in figure when caption from img title/alt */
2787
+ if (*read == '<' && (read[1] == 'p' || read[1] == 'P') &&
2788
+ (read[2] == 'i' || read[2] == 'I') && (read[3] == 'c' || read[3] == 'C') &&
2789
+ (read[4] == 't' || read[4] == 'T') && (read[5] == 'u' || read[5] == 'U') &&
2790
+ (read[6] == 'r' || read[6] == 'R') && (read[7] == 'e' || read[7] == 'E') &&
2791
+ (read[8] == ' ' || read[8] == '>' || read[8] == '\t')) {
2792
+ const char *picture_start = read;
2793
+ const char *picture_end = strstr(read, "</picture>");
2794
+ if (picture_end) {
2795
+ picture_end += 10; /* include </picture> */
2796
+ /* Find <img inside the picture and extract title/alt for caption */
2797
+ const char *img_in = picture_start;
2798
+ char *title_str = NULL, *alt_str = NULL;
2799
+ while ((img_in = strstr(img_in, "<img")) != NULL && img_in < picture_end) {
2800
+ const char *img_tag_end = strchr(img_in, '>');
2801
+ if (img_tag_end && img_tag_end < picture_end) {
2802
+ title_str = extract_attr_from_tag(img_in, img_tag_end + 1, "title");
2803
+ alt_str = extract_attr_from_tag(img_in, img_tag_end + 1, "alt");
2804
+ break;
2805
+ }
2806
+ img_in += 4;
2807
+ }
2808
+ /* Determine caption from title or alt per options */
2809
+ const char *caption = NULL;
2810
+ size_t caption_len = 0;
2811
+ if (enable_image_captions) {
2812
+ if (title_captions_only && title_str && *title_str) {
2813
+ caption = title_str; caption_len = strlen(title_str);
2814
+ } else if (title_str && *title_str) {
2815
+ caption = title_str; caption_len = strlen(title_str);
2816
+ } else if (alt_str && *alt_str) {
2817
+ caption = alt_str; caption_len = strlen(alt_str);
2818
+ }
2819
+ }
2820
+ size_t block_len = (size_t)(picture_end - picture_start);
2821
+ if (caption && caption_len > 0) {
2822
+ size_t extra = 8 + 12 + caption_len + 13 + 9; /* figure + figcaption + </figcaption> + </figure> */
2823
+ if (extra + block_len >= remaining) {
2824
+ size_t used = write - output;
2825
+ size_t new_cap = (used + extra + block_len + 1) * 2;
2826
+ char *new_out = realloc(output, new_cap);
2827
+ if (!new_out) { free(title_str); free(alt_str); free(output); return NULL; }
2828
+ output = new_out; write = output + used; remaining = new_cap - used;
2829
+ }
2830
+ memcpy(write, "<figure>", 8); write += 8; remaining -= 8;
2831
+ memcpy(write, picture_start, block_len); write += block_len; remaining -= block_len;
2832
+ memcpy(write, "<figcaption>", 12); write += 12; remaining -= 12;
2833
+ memcpy(write, caption, caption_len); write += caption_len; remaining -= caption_len;
2834
+ memcpy(write, "</figcaption></figure>", sizeof("</figcaption></figure>") - 1);
2835
+ write += sizeof("</figcaption></figure>") - 1;
2836
+ remaining -= sizeof("</figcaption></figure>") - 1;
2837
+ } else {
2838
+ if (block_len >= remaining) {
2839
+ size_t used = write - output;
2840
+ size_t new_cap = (used + block_len + 1) * 2;
2841
+ char *new_out = realloc(output, new_cap);
2842
+ if (!new_out) { free(title_str); free(alt_str); free(output); return NULL; }
2843
+ output = new_out; write = output + used; remaining = new_cap - used;
2844
+ }
2845
+ memcpy(write, picture_start, block_len); write += block_len; remaining -= block_len;
2846
+ }
2847
+ free(title_str);
2848
+ free(alt_str);
2849
+ read = picture_end;
2850
+ continue;
2851
+ }
2852
+ }
2853
+
2481
2854
  /* Look for <img tag */
2482
2855
  if (*read == '<' && (read[1] == 'i' || read[1] == 'I') &&
2483
2856
  (read[2] == 'm' || read[2] == 'M') &&
@@ -2527,6 +2900,43 @@ char *apex_convert_image_captions(const char *html, bool enable_image_captions,
2527
2900
 
2528
2901
  const char *tag_end = p; /* Points at '>' */
2529
2902
 
2903
+ /* Skip img inside <picture> - picture's img is the fallback, don't wrap in figure */
2904
+ {
2905
+ bool inside_picture = false;
2906
+ const char *scan = tag_start - 1;
2907
+ while (scan >= html) {
2908
+ if (*scan == '<') {
2909
+ if (scan + 8 <= tag_start && strncasecmp(scan, "<picture", 8) == 0 &&
2910
+ (scan[8] == ' ' || scan[8] == '>' || scan[8] == '\t')) {
2911
+ inside_picture = true;
2912
+ break;
2913
+ }
2914
+ if (scan + 10 <= tag_start && strncmp(scan, "</picture>", 10) == 0) {
2915
+ break; /* Outside - we passed closing tag first */
2916
+ }
2917
+ /* Other tags (source, etc.) - keep scanning backwards */
2918
+ }
2919
+ scan--;
2920
+ }
2921
+ if (inside_picture) {
2922
+ size_t tag_len = (size_t)(tag_end - tag_start + 1);
2923
+ if (tag_len >= remaining) {
2924
+ size_t used = write - output;
2925
+ size_t new_cap = (used + tag_len + 1) * 2;
2926
+ char *new_out = realloc(output, new_cap);
2927
+ if (!new_out) { free(output); return NULL; }
2928
+ output = new_out;
2929
+ write = output + used;
2930
+ remaining = new_cap - used;
2931
+ }
2932
+ memcpy(write, tag_start, tag_len);
2933
+ write += tag_len;
2934
+ remaining -= tag_len;
2935
+ read = tag_end + 1;
2936
+ continue;
2937
+ }
2938
+ }
2939
+
2530
2940
  /* Parse attributes between <img and > */
2531
2941
  const char *attr_start = tag_start + 4;
2532
2942
  const char *attr_end = tag_end;
@@ -2655,6 +3065,48 @@ char *apex_convert_image_captions(const char *html, bool enable_image_captions,
2655
3065
  continue;
2656
3066
  }
2657
3067
 
3068
+ /* Don't wrap in another <figure> if this image is already inside a <figure>
3069
+ * (e.g. from a fenced div ::: >figure), to avoid nested figure/figcaption. */
3070
+ {
3071
+ int figure_depth = 0;
3072
+ const char *scan = html;
3073
+ while (scan < tag_start) {
3074
+ if (*scan == '<') {
3075
+ if (scan + 8 <= tag_start &&
3076
+ (strncasecmp(scan + 1, "figure", 6) == 0) &&
3077
+ (scan[7] == '>' || isspace((unsigned char)scan[7]))) {
3078
+ figure_depth++;
3079
+ } else if (scan + 9 <= tag_start &&
3080
+ (strncasecmp(scan + 1, "/figure", 7) == 0) &&
3081
+ (scan[8] == '>' || isspace((unsigned char)scan[8]))) {
3082
+ if (figure_depth > 0) figure_depth--;
3083
+ }
3084
+ }
3085
+ scan++;
3086
+ }
3087
+ if (figure_depth > 0) {
3088
+ /* Already inside a figure - copy img tag as-is, no extra wrap */
3089
+ size_t tag_len = (size_t)(tag_end - tag_start + 1);
3090
+ if (tag_len >= remaining) {
3091
+ size_t used = write - output;
3092
+ size_t new_cap = (used + tag_len + 1) * 2;
3093
+ char *new_out = realloc(output, new_cap);
3094
+ if (!new_out) {
3095
+ free(output);
3096
+ return NULL;
3097
+ }
3098
+ output = new_out;
3099
+ write = output + used;
3100
+ remaining = new_cap - used;
3101
+ }
3102
+ memcpy(write, tag_start, tag_len);
3103
+ write += tag_len;
3104
+ remaining -= tag_len;
3105
+ read = tag_end + 1;
3106
+ continue;
3107
+ }
3108
+ }
3109
+
2658
3110
  /* We have caption text - wrap in <figure><img ...><figcaption>...</figcaption></figure> */
2659
3111
  const char *figure_open = "<figure>";
2660
3112
  const char *figcaption_open = "<figcaption>";
@@ -2760,3 +3212,781 @@ char *apex_convert_image_captions(const char *html, bool enable_image_captions,
2760
3212
  *write = '\0';
2761
3213
  return output;
2762
3214
  }
3215
+
3216
+ /**
3217
+ * Strip redundant <p> that wraps only a single <img> inside <figure>, and any
3218
+ * leading "&lt; " (angle-prefix) so the result is <figure><img...></figure>.
3219
+ * Used when fenced div ::: >figure contains "< ![Image](...)" which becomes
3220
+ * <figure><p>&lt; <img...></p></figure>.
3221
+ */
3222
+ char *apex_strip_figure_paragraph_wrapper(const char *html) {
3223
+ if (!html) return NULL;
3224
+ size_t len = strlen(html);
3225
+ const char *end = html + len;
3226
+ size_t capacity = len + 1;
3227
+ char *output = malloc(capacity);
3228
+ if (!output) return NULL;
3229
+ const char *read = html;
3230
+ char *write = output;
3231
+ size_t remaining = capacity;
3232
+ int figure_depth = 0;
3233
+
3234
+ while (*read) {
3235
+ /* Track when we're inside <figure>...</figure> */
3236
+ if (*read == '<') {
3237
+ const char *tag = read + 1;
3238
+ if (tag[0] != '/') {
3239
+ if ((strncasecmp(tag, "figure", 6) == 0) &&
3240
+ (tag[6] == '>' || isspace((unsigned char)tag[6])))
3241
+ figure_depth++;
3242
+ } else {
3243
+ if ((strncasecmp(tag + 1, "figure", 6) == 0) &&
3244
+ (tag[7] == '>' || isspace((unsigned char)tag[7])))
3245
+ figure_depth--;
3246
+ }
3247
+ }
3248
+ /* Look for <figure (with optional attributes) - copy it */
3249
+ if (*read == '<' && read[1] != '/') {
3250
+ const char *tag = read + 1;
3251
+ if ((strncasecmp(tag, "figure", 6) == 0) &&
3252
+ (tag[6] == '>' || isspace((unsigned char)tag[6]))) {
3253
+ while (*read && *read != '>') {
3254
+ if (remaining < 2) {
3255
+ size_t used = write - output;
3256
+ capacity = (used + len) * 2;
3257
+ char *n = realloc(output, capacity);
3258
+ if (!n) { free(output); return NULL; }
3259
+ output = n; write = output + used; remaining = capacity - used;
3260
+ }
3261
+ *write++ = *read++;
3262
+ remaining--;
3263
+ }
3264
+ if (*read == '>') {
3265
+ *write++ = *read++;
3266
+ remaining--;
3267
+ }
3268
+ continue;
3269
+ }
3270
+ /* Inside figure: look for <p that wraps only &lt; + single <img> */
3271
+ if (figure_depth > 0 &&
3272
+ (strncasecmp(tag, "p", 1) == 0) &&
3273
+ (tag[1] == '>' || isspace((unsigned char)tag[1]))) {
3274
+ const char *p_open_end = read + 1;
3275
+ while (*p_open_end && *p_open_end != '>') p_open_end++;
3276
+ if (!*p_open_end) {
3277
+ *write++ = *read++;
3278
+ remaining--;
3279
+ continue;
3280
+ }
3281
+ p_open_end++; /* past '>' */
3282
+ const char *inner = p_open_end;
3283
+ /* Skip optional "&lt;" or "&lt; " and whitespace */
3284
+ while (*inner == ' ' || *inner == '\t' || *inner == '\n' || *inner == '\r') inner++;
3285
+ if (inner + 4 <= end && strncmp(inner, "&lt;", 4) == 0) {
3286
+ inner += 4;
3287
+ while (*inner == ' ' || *inner == '\t' || *inner == '\n' || *inner == '\r') inner++;
3288
+ }
3289
+ /* Must be <img ...> */
3290
+ if (*inner != '<' || (inner[1] != 'i' && inner[1] != 'I') ||
3291
+ (inner[2] != 'm' && inner[2] != 'M') ||
3292
+ (inner[3] != 'g' && inner[3] != 'G') ||
3293
+ (inner[4] != ' ' && inner[4] != '\t' && inner[4] != '>' && inner[4] != '/')) {
3294
+ *write++ = *read++;
3295
+ remaining--;
3296
+ continue;
3297
+ }
3298
+ const char *img_start = inner;
3299
+ const char *img_end = inner + 4;
3300
+ while (*img_end && *img_end != '>') {
3301
+ if (*img_end == '"' || *img_end == '\'') {
3302
+ char q = *img_end++;
3303
+ while (*img_end && *img_end != q) img_end++;
3304
+ if (*img_end) img_end++;
3305
+ } else {
3306
+ img_end++;
3307
+ }
3308
+ }
3309
+ if (*img_end != '>') {
3310
+ *write++ = *read++;
3311
+ remaining--;
3312
+ continue;
3313
+ }
3314
+ img_end++; /* past '>' */
3315
+ /* Skip whitespace then must be </p> */
3316
+ const char *after_img = img_end;
3317
+ while (*after_img == ' ' || *after_img == '\t' || *after_img == '\n' || *after_img == '\r') after_img++;
3318
+ if (after_img + 5 <= end &&
3319
+ (after_img[0] == '<' && after_img[1] == '/' &&
3320
+ (after_img[2] == 'p' || after_img[2] == 'P') &&
3321
+ (after_img[3] == '>' || isspace((unsigned char)after_img[3])))) {
3322
+ const char *p_close = after_img + 3;
3323
+ while (*p_close && *p_close != '>') p_close++;
3324
+ if (*p_close == '>') p_close++;
3325
+ /* Replace entire <p>...</p> with just the <img> */
3326
+ size_t img_len = (size_t)(img_end - img_start);
3327
+ if (img_len >= remaining) {
3328
+ size_t used = write - output;
3329
+ capacity = (used + img_len + 1) * 2;
3330
+ char *n = realloc(output, capacity);
3331
+ if (!n) { free(output); return NULL; }
3332
+ output = n; write = output + used; remaining = capacity - used;
3333
+ }
3334
+ memcpy(write, img_start, img_len);
3335
+ write += img_len;
3336
+ remaining -= img_len;
3337
+ read = p_close;
3338
+ continue;
3339
+ }
3340
+ }
3341
+ }
3342
+ if (remaining < 2) {
3343
+ size_t used = write - output;
3344
+ capacity = (used + len) * 2;
3345
+ char *n = realloc(output, capacity);
3346
+ if (!n) { free(output); return NULL; }
3347
+ output = n; write = output + used; remaining = capacity - used;
3348
+ }
3349
+ *write++ = *read++;
3350
+ remaining--;
3351
+ }
3352
+ *write = '\0';
3353
+ return output;
3354
+ }
3355
+
3356
+ /**
3357
+ * Find the position of the matching closing tag for a block element.
3358
+ * Given pos pointing at "<figure" (or <video, <picture), returns pointer past "</figure>".
3359
+ * Uses depth counting for nested same-named tags. Returns NULL if not found.
3360
+ */
3361
+ static const char *find_block_close(const char *pos, const char *end, const char *tag_name, size_t tag_len) {
3362
+ /* Skip past the opening tag to its '>' */
3363
+ const char *p = pos;
3364
+ while (p < end && *p != '>') {
3365
+ if (*p == '"' || *p == '\'') {
3366
+ char q = *p++;
3367
+ while (p < end && *p != q) p++;
3368
+ if (p < end) p++;
3369
+ } else {
3370
+ p++;
3371
+ }
3372
+ }
3373
+ if (p >= end || *p != '>') return NULL;
3374
+ p++; /* past '>' */
3375
+ int depth = 1;
3376
+ while (p < end && depth > 0) {
3377
+ const char *next = memchr(p, '<', (size_t)(end - p));
3378
+ if (!next) return NULL;
3379
+ p = next;
3380
+ if (p + 1 >= end) return NULL;
3381
+ if (p[1] == '/') {
3382
+ if (p + 2 + tag_len <= end &&
3383
+ strncasecmp(p + 2, tag_name, tag_len) == 0 &&
3384
+ (p[2 + tag_len] == '>' || isspace((unsigned char)p[2 + tag_len]))) {
3385
+ depth--;
3386
+ if (depth == 0) {
3387
+ const char *close = p + 2 + tag_len;
3388
+ while (close < end && *close != '>') close++;
3389
+ return (close < end && *close == '>') ? close + 1 : NULL;
3390
+ }
3391
+ }
3392
+ p++;
3393
+ } else if (p + 1 + tag_len <= end &&
3394
+ strncasecmp(p + 1, tag_name, tag_len) == 0 &&
3395
+ (p[1 + tag_len] == '>' || isspace((unsigned char)p[1 + tag_len]))) {
3396
+ depth++;
3397
+ p++;
3398
+ } else {
3399
+ p++;
3400
+ }
3401
+ }
3402
+ return NULL;
3403
+ }
3404
+
3405
+ /**
3406
+ * Strip <p> that wraps only a single block element (figure, video, picture).
3407
+ * HTML5 invalid: <p> may only contain phrasing content; figure/video/picture are flow content.
3408
+ * Transforms <p><figure>...</figure></p> -> <figure>...</figure>, etc.
3409
+ */
3410
+ char *apex_strip_block_paragraph_wrapper(const char *html) {
3411
+ if (!html) return NULL;
3412
+ size_t len = strlen(html);
3413
+ const char *end = html + len;
3414
+ size_t capacity = len + 1;
3415
+ char *output = malloc(capacity);
3416
+ if (!output) return NULL;
3417
+ const char *read = html;
3418
+ char *write = output;
3419
+ size_t remaining = capacity;
3420
+
3421
+ while (*read) {
3422
+ if (*read == '<' && read[1] != '/' &&
3423
+ (strncasecmp(read + 1, "p", 1) == 0) &&
3424
+ (read[2] == '>' || isspace((unsigned char)read[2]))) {
3425
+ const char *p_open_end = read + 1;
3426
+ while (*p_open_end && *p_open_end != '>') p_open_end++;
3427
+ if (!*p_open_end || p_open_end >= end) {
3428
+ *write++ = *read++;
3429
+ remaining--;
3430
+ continue;
3431
+ }
3432
+ p_open_end++; /* past '>' */
3433
+ const char *inner = p_open_end;
3434
+ while (inner < end && (*inner == ' ' || *inner == '\t' || *inner == '\n' || *inner == '\r')) inner++;
3435
+ if (inner >= end || *inner != '<') {
3436
+ *write++ = *read++;
3437
+ remaining--;
3438
+ continue;
3439
+ }
3440
+ const char *tag_start = inner + 1;
3441
+ const char *block_close = NULL;
3442
+ if (inner + 7 <= end && strncasecmp(tag_start, "figure", 6) == 0 &&
3443
+ (tag_start[6] == '>' || isspace((unsigned char)tag_start[6]))) {
3444
+ block_close = find_block_close(inner, end, "figure", 6);
3445
+ } else if (inner + 6 <= end && strncasecmp(tag_start, "video", 5) == 0 &&
3446
+ (tag_start[5] == '>' || isspace((unsigned char)tag_start[5]))) {
3447
+ block_close = find_block_close(inner, end, "video", 5);
3448
+ } else if (inner + 8 <= end && strncasecmp(tag_start, "picture", 7) == 0 &&
3449
+ (tag_start[7] == '>' || isspace((unsigned char)tag_start[7]))) {
3450
+ block_close = find_block_close(inner, end, "picture", 7);
3451
+ }
3452
+ if (block_close) {
3453
+ const char *after_block = block_close;
3454
+ while (after_block < end && (*after_block == ' ' || *after_block == '\t' || *after_block == '\n' || *after_block == '\r')) after_block++;
3455
+ if (after_block + 4 <= end &&
3456
+ after_block[0] == '<' && after_block[1] == '/' &&
3457
+ (after_block[2] == 'p' || after_block[2] == 'P') &&
3458
+ (after_block[3] == '>' || isspace((unsigned char)after_block[3]))) {
3459
+ const char *p_close = after_block + 3;
3460
+ while (*p_close && *p_close != '>') p_close++;
3461
+ if (*p_close == '>') {
3462
+ p_close++;
3463
+ size_t block_size = (size_t)(block_close - inner);
3464
+ if (block_size >= remaining) {
3465
+ size_t used = (size_t)(write - output);
3466
+ capacity = used + block_size + 1024;
3467
+ char *n = realloc(output, capacity);
3468
+ if (!n) { free(output); return NULL; }
3469
+ output = n;
3470
+ write = output + used;
3471
+ remaining = capacity - used;
3472
+ }
3473
+ memcpy(write, inner, block_size);
3474
+ write += block_size;
3475
+ remaining -= block_size;
3476
+ read = p_close;
3477
+ continue;
3478
+ }
3479
+ }
3480
+ }
3481
+ }
3482
+ if (remaining < 2) {
3483
+ size_t used = (size_t)(write - output);
3484
+ capacity = (used + len) * 2;
3485
+ char *n = realloc(output, capacity);
3486
+ if (!n) { free(output); return NULL; }
3487
+ output = n;
3488
+ write = output + used;
3489
+ remaining = capacity - used;
3490
+ }
3491
+ *write++ = *read++;
3492
+ remaining--;
3493
+ }
3494
+ *write = '\0';
3495
+ return output;
3496
+ }
3497
+
3498
+ /**
3499
+ * Check if a local file exists (regular file).
3500
+ */
3501
+ static bool file_exists(const char *path) {
3502
+ if (!path || !*path) return false;
3503
+ struct stat st;
3504
+ return (stat(path, &st) == 0 && S_ISREG(st.st_mode));
3505
+ }
3506
+
3507
+ /**
3508
+ * Resolve relative URL against base directory for filesystem checks.
3509
+ * Returns allocated path or NULL. Skips absolute and remote URLs.
3510
+ */
3511
+ static char *resolve_path_for_check(const char *base_dir, const char *url) {
3512
+ if (!base_dir || !*base_dir || !url || !*url) return NULL;
3513
+ if (url[0] == '/') return NULL; /* Absolute path */
3514
+ if (strstr(url, "://")) return NULL; /* Remote URL */
3515
+ size_t len = strlen(base_dir) + strlen(url) + 2;
3516
+ char *out = malloc(len);
3517
+ if (!out) return NULL;
3518
+ snprintf(out, len, "%s/%s", base_dir, url);
3519
+ return out;
3520
+ }
3521
+
3522
+ /**
3523
+ * Insert @2x before extension in URL. Caller must free.
3524
+ */
3525
+ static char *url_with_2x_suffix_auto(const char *url) {
3526
+ if (!url || !*url) return NULL;
3527
+ const char *path_end = strchr(url, '?');
3528
+ if (!path_end) path_end = strchr(url, '#');
3529
+ if (!path_end) path_end = url + strlen(url);
3530
+ const char *last_dot = NULL;
3531
+ for (const char *c = url; c < path_end; c++) {
3532
+ if (*c == '.') last_dot = c;
3533
+ }
3534
+ if (!last_dot) return NULL;
3535
+ size_t prefix_len = (size_t)(last_dot - url);
3536
+ size_t suffix_len = strlen(last_dot);
3537
+ char *out = malloc(prefix_len + 4 + suffix_len + 1);
3538
+ if (!out) return NULL;
3539
+ memcpy(out, url, prefix_len);
3540
+ memcpy(out + prefix_len, "@2x", 3);
3541
+ memcpy(out + prefix_len + 3, last_dot, suffix_len + 1);
3542
+ return out;
3543
+ }
3544
+
3545
+ /**
3546
+ * Insert @3x before extension in URL. Caller must free.
3547
+ */
3548
+ static char *url_with_3x_suffix_auto(const char *url) {
3549
+ if (!url || !*url) return NULL;
3550
+ const char *path_end = strchr(url, '?');
3551
+ if (!path_end) path_end = strchr(url, '#');
3552
+ if (!path_end) path_end = url + strlen(url);
3553
+ const char *last_dot = NULL;
3554
+ for (const char *c = url; c < path_end; c++) {
3555
+ if (*c == '.') last_dot = c;
3556
+ }
3557
+ if (!last_dot) return NULL;
3558
+ size_t prefix_len = (size_t)(last_dot - url);
3559
+ size_t suffix_len = strlen(last_dot);
3560
+ char *out = malloc(prefix_len + 4 + suffix_len + 1);
3561
+ if (!out) return NULL;
3562
+ memcpy(out, url, prefix_len);
3563
+ memcpy(out + prefix_len, "@3x", 3);
3564
+ memcpy(out + prefix_len + 3, last_dot, suffix_len + 1);
3565
+ return out;
3566
+ }
3567
+
3568
+ /**
3569
+ * Check if URL ends with .* (wildcard extension for auto-discover).
3570
+ */
3571
+ static bool url_ends_with_wildcard(const char *url) {
3572
+ if (!url || !*url) return false;
3573
+ size_t len = strlen(url);
3574
+ return (len >= 2 && url[len - 2] == '.' && url[len - 1] == '*');
3575
+ }
3576
+
3577
+ /**
3578
+ * For URL ending in .*, get base path (everything before .*). Caller must free.
3579
+ */
3580
+ static char *base_from_wildcard_url(const char *url) {
3581
+ if (!url || !*url) return NULL;
3582
+ size_t len = strlen(url);
3583
+ if (len < 2 || url[len - 2] != '.' || url[len - 1] != '*') return NULL;
3584
+ char *base = malloc(len - 1);
3585
+ if (!base) return NULL;
3586
+ memcpy(base, url, len - 2);
3587
+ base[len - 2] = '\0';
3588
+ return base;
3589
+ }
3590
+
3591
+ /**
3592
+ * Check if URL has video extension (mp4, mov, webm, ogg, ogv, m4v).
3593
+ */
3594
+ static bool is_video_url_auto(const char *url) {
3595
+ if (!url || !*url) return false;
3596
+ const char *path_end = strchr(url, '?');
3597
+ if (!path_end) path_end = strchr(url, '#');
3598
+ if (!path_end) path_end = url + strlen(url);
3599
+ const char *last_dot = NULL;
3600
+ for (const char *c = url; c < path_end; c++) {
3601
+ if (*c == '.') last_dot = c;
3602
+ }
3603
+ if (!last_dot || last_dot >= path_end - 1) return false;
3604
+ const char *ext = last_dot + 1;
3605
+ size_t ext_len = (size_t)(path_end - ext);
3606
+ if (ext_len == 3 && strncasecmp(ext, "mp4", 3) == 0) return true;
3607
+ if (ext_len == 3 && strncasecmp(ext, "mov", 3) == 0) return true;
3608
+ if (ext_len == 4 && strncasecmp(ext, "webm", 4) == 0) return true;
3609
+ if (ext_len == 3 && strncasecmp(ext, "ogg", 3) == 0) return true;
3610
+ if (ext_len == 3 && strncasecmp(ext, "ogv", 3) == 0) return true;
3611
+ if (ext_len == 3 && strncasecmp(ext, "m4v", 3) == 0) return true;
3612
+ return false;
3613
+ }
3614
+
3615
+ /**
3616
+ * Expand img tags with data-apex-replace-auto=1 by discovering existing
3617
+ * format variants on disk and generating appropriate <picture> or <video>.
3618
+ * Only processes local (relative) URLs when base_directory is provided.
3619
+ * Caller must free the returned string.
3620
+ */
3621
+ char *apex_expand_auto_media(const char *html, const char *base_directory) {
3622
+ if (!html) return NULL;
3623
+ if (!base_directory || !*base_directory) return strdup(html);
3624
+
3625
+ size_t len = strlen(html);
3626
+ size_t capacity = len * 2 + 2048;
3627
+ char *output = malloc(capacity);
3628
+ if (!output) return NULL;
3629
+
3630
+ const char *read = html;
3631
+ char *write = output;
3632
+ size_t remaining = capacity;
3633
+
3634
+ while (*read) {
3635
+ if (*read == '<' && (read[1] == 'i' || read[1] == 'I') &&
3636
+ (read[2] == 'm' || read[2] == 'M') && (read[3] == 'g' || read[3] == 'G') &&
3637
+ (read[4] == ' ' || read[4] == '\t' || read[4] == '>' || read[4] == '/')) {
3638
+
3639
+ const char *tag_start = read;
3640
+ const char *tag_end = find_tag_end(tag_start);
3641
+ if (!tag_end) {
3642
+ *write++ = *read++;
3643
+ remaining--;
3644
+ continue;
3645
+ }
3646
+
3647
+ /* Check for data-apex-replace-auto=1 */
3648
+ if (!strstr(tag_start, "data-apex-replace-auto=1")) {
3649
+ size_t tag_len = (size_t)(tag_end - tag_start + 1);
3650
+ if (tag_len >= remaining) {
3651
+ size_t used = (size_t)(write - output);
3652
+ capacity = used + tag_len + 2048;
3653
+ char *new_out = realloc(output, capacity);
3654
+ if (!new_out) { free(output); return NULL; }
3655
+ output = new_out;
3656
+ write = output + used;
3657
+ remaining = capacity - used;
3658
+ }
3659
+ memcpy(write, tag_start, tag_len);
3660
+ write += tag_len;
3661
+ remaining -= tag_len;
3662
+ read = tag_end + 1;
3663
+ continue;
3664
+ }
3665
+
3666
+ char *src = extract_attr_from_tag(tag_start, tag_end + 1, "src");
3667
+ char *alt = extract_attr_from_tag(tag_start, tag_end + 1, "alt");
3668
+ char *title = extract_attr_from_tag(tag_start, tag_end + 1, "title");
3669
+ if (!src) src = strdup("");
3670
+ if (!alt) alt = strdup("");
3671
+
3672
+ char *replacement = NULL;
3673
+ size_t repl_len = 0;
3674
+
3675
+ /* When src ends with .*, discover first existing file to use as fallback */
3676
+ char *effective_src = strdup(src ? src : "");
3677
+ char *resolved = resolve_path_for_check(base_directory, effective_src);
3678
+ if (url_ends_with_wildcard(src)) {
3679
+ char *base = base_from_wildcard_url(src);
3680
+ if (base) {
3681
+ /* Check video extensions first, then image extensions.
3682
+ * url_with_extension(src, ext) works: "image.*" -> "image.jpg" */
3683
+ static const char *video_exts[] = {"mp4", "webm", "ogg", "ogv", "mov", "m4v", NULL};
3684
+ static const char *image_exts[] = {"jpg", "jpeg", "png", "gif", "webp", "avif", NULL};
3685
+ bool found = false;
3686
+ for (int i = 0; video_exts[i] && !found; i++) {
3687
+ char *candidate = url_with_extension(src, video_exts[i]);
3688
+ if (candidate) {
3689
+ char *cpath = resolve_path_for_check(base_directory, candidate);
3690
+ if (cpath && file_exists(cpath)) {
3691
+ free(effective_src);
3692
+ effective_src = candidate;
3693
+ free(resolved);
3694
+ resolved = cpath;
3695
+ found = true;
3696
+ } else {
3697
+ free(cpath);
3698
+ free(candidate);
3699
+ }
3700
+ }
3701
+ }
3702
+ for (int i = 0; image_exts[i] && !found; i++) {
3703
+ char *candidate = url_with_extension(src, image_exts[i]);
3704
+ if (candidate) {
3705
+ char *cpath = resolve_path_for_check(base_directory, candidate);
3706
+ if (cpath && file_exists(cpath)) {
3707
+ free(effective_src);
3708
+ effective_src = candidate;
3709
+ free(resolved);
3710
+ resolved = cpath;
3711
+ found = true;
3712
+ } else {
3713
+ free(cpath);
3714
+ free(candidate);
3715
+ }
3716
+ }
3717
+ }
3718
+ free(base);
3719
+ if (!found) {
3720
+ free(resolved);
3721
+ resolved = NULL;
3722
+ }
3723
+ }
3724
+ } else if (resolved && !file_exists(resolved)) {
3725
+ free(resolved);
3726
+ resolved = NULL;
3727
+ }
3728
+
3729
+ if (resolved && file_exists(resolved)) {
3730
+ /* Use effective_src (may differ from src when wildcard was resolved) */
3731
+ free(src);
3732
+ src = effective_src;
3733
+ if (is_video_url_auto(src)) {
3734
+ /* Video: discover alternative formats that exist */
3735
+ static const char *video_exts[] = {"webm", "ogg", "mp4", "mov", "m4v", NULL};
3736
+ size_t cap = 512 + strlen(src) * 6;
3737
+ replacement = malloc(cap);
3738
+ if (replacement) {
3739
+ char *w = replacement;
3740
+ w += snprintf(w, cap, "<video");
3741
+ if (alt && *alt) w += snprintf(w, cap - (size_t)(w - replacement), " title=\"%s\"", alt);
3742
+ w += snprintf(w, cap - (size_t)(w - replacement), ">");
3743
+
3744
+ for (int i = 0; video_exts[i]; i++) {
3745
+ char *variant_url = url_with_extension(src, video_exts[i]);
3746
+ if (variant_url) {
3747
+ char *variant_path = resolve_path_for_check(base_directory, variant_url);
3748
+ if (variant_path && file_exists(variant_path)) {
3749
+ const char *mime = (strcmp(video_exts[i], "webm") == 0) ? "video/webm" :
3750
+ (strcmp(video_exts[i], "ogg") == 0) ? "video/ogg" :
3751
+ (strcmp(video_exts[i], "mov") == 0) ? "video/quicktime" : "video/mp4";
3752
+ w += snprintf(w, cap - (size_t)(w - replacement),
3753
+ "<source src=\"%s\" type=\"%s\">", variant_url, mime);
3754
+ }
3755
+ free(variant_path);
3756
+ free(variant_url);
3757
+ }
3758
+ }
3759
+ w += snprintf(w, cap - (size_t)(w - replacement),
3760
+ "<source src=\"%s\" type=\"%s\">", src, video_type_from_url(src));
3761
+ w += snprintf(w, cap - (size_t)(w - replacement), "</video>");
3762
+ repl_len = (size_t)(w - replacement);
3763
+ }
3764
+ } else {
3765
+ /* Image: discover 2x, 3x, webp, avif variants */
3766
+ bool has_2x = false, has_3x = false;
3767
+ bool has_webp_1x = false, has_webp_2x = false, has_webp_3x = false;
3768
+ bool has_avif_1x = false, has_avif_2x = false, has_avif_3x = false;
3769
+
3770
+ char *url_2x = url_with_2x_suffix_auto(src);
3771
+ char *url_3x = url_with_3x_suffix_auto(src);
3772
+ if (url_2x) {
3773
+ char *p2 = resolve_path_for_check(base_directory, url_2x);
3774
+ has_2x = (p2 && file_exists(p2));
3775
+ free(p2);
3776
+ }
3777
+ if (url_3x) {
3778
+ char *p3 = resolve_path_for_check(base_directory, url_3x);
3779
+ has_3x = (p3 && file_exists(p3));
3780
+ free(p3);
3781
+ }
3782
+
3783
+ char *webp_1x = url_with_extension(src, "webp");
3784
+ if (webp_1x) {
3785
+ char *p = resolve_path_for_check(base_directory, webp_1x);
3786
+ has_webp_1x = (p && file_exists(p));
3787
+ free(p);
3788
+ }
3789
+ if (url_2x && webp_1x) {
3790
+ char *webp_2x = url_with_extension(url_2x, "webp");
3791
+ if (webp_2x) {
3792
+ char *p = resolve_path_for_check(base_directory, webp_2x);
3793
+ has_webp_2x = (p && file_exists(p));
3794
+ free(p);
3795
+ free(webp_2x);
3796
+ }
3797
+ }
3798
+ if (url_3x && webp_1x) {
3799
+ char *webp_3x = url_with_extension(url_3x, "webp");
3800
+ if (webp_3x) {
3801
+ char *p = resolve_path_for_check(base_directory, webp_3x);
3802
+ has_webp_3x = (p && file_exists(p));
3803
+ free(p);
3804
+ free(webp_3x);
3805
+ }
3806
+ }
3807
+ free(webp_1x);
3808
+
3809
+ char *avif_1x = url_with_extension(src, "avif");
3810
+ if (avif_1x) {
3811
+ char *p = resolve_path_for_check(base_directory, avif_1x);
3812
+ has_avif_1x = (p && file_exists(p));
3813
+ free(p);
3814
+ }
3815
+ if (url_2x && avif_1x) {
3816
+ char *avif_2x = url_with_extension(url_2x, "avif");
3817
+ if (avif_2x) {
3818
+ char *p = resolve_path_for_check(base_directory, avif_2x);
3819
+ has_avif_2x = (p && file_exists(p));
3820
+ free(p);
3821
+ free(avif_2x);
3822
+ }
3823
+ }
3824
+ if (url_3x && avif_1x) {
3825
+ char *avif_3x = url_with_extension(url_3x, "avif");
3826
+ if (avif_3x) {
3827
+ char *p = resolve_path_for_check(base_directory, avif_3x);
3828
+ has_avif_3x = (p && file_exists(p));
3829
+ free(p);
3830
+ free(avif_3x);
3831
+ }
3832
+ }
3833
+ free(avif_1x);
3834
+ free(url_2x);
3835
+ free(url_3x);
3836
+
3837
+ bool need_picture = has_webp_1x || has_webp_2x || has_webp_3x ||
3838
+ has_avif_1x || has_avif_2x || has_avif_3x;
3839
+ bool need_srcset = has_2x || has_3x;
3840
+
3841
+ if (need_picture || need_srcset) {
3842
+ size_t cap = 1024 + strlen(src) * 8;
3843
+ replacement = malloc(cap);
3844
+ if (replacement) {
3845
+ char *w = replacement;
3846
+ if (need_picture) w += snprintf(w, cap, "<picture>");
3847
+
3848
+ /* AVIF first (preferred), then WebP */
3849
+ if (has_avif_1x || has_avif_2x || has_avif_3x) {
3850
+ char *av1 = url_with_extension(src, "avif");
3851
+ char *s2 = url_with_2x_suffix_auto(src);
3852
+ char *av2 = s2 ? url_with_extension(s2, "avif") : NULL;
3853
+ free(s2);
3854
+ char *s3 = url_with_3x_suffix_auto(src);
3855
+ char *av3 = s3 ? url_with_extension(s3, "avif") : NULL;
3856
+ free(s3);
3857
+ char srcset[512] = "";
3858
+ if (av1) snprintf(srcset, sizeof(srcset), "%s 1x", av1);
3859
+ if (av2 && has_avif_2x) {
3860
+ size_t l = strlen(srcset);
3861
+ snprintf(srcset + l, sizeof(srcset) - l, "%s%s 2x", l ? ", " : "", av2);
3862
+ }
3863
+ if (av3 && has_avif_3x) {
3864
+ size_t l = strlen(srcset);
3865
+ snprintf(srcset + l, sizeof(srcset) - l, "%s%s 3x", l ? ", " : "", av3);
3866
+ }
3867
+ if (*srcset) w += snprintf(w, cap - (size_t)(w - replacement),
3868
+ "<source type=\"image/avif\" srcset=\"%s\">", srcset);
3869
+ free(av1); free(av2); free(av3);
3870
+ }
3871
+ if (has_webp_1x || has_webp_2x || has_webp_3x) {
3872
+ char *wb1 = url_with_extension(src, "webp");
3873
+ char *s2 = url_with_2x_suffix_auto(src);
3874
+ char *wb2 = s2 ? url_with_extension(s2, "webp") : NULL;
3875
+ free(s2);
3876
+ char *s3 = url_with_3x_suffix_auto(src);
3877
+ char *wb3 = s3 ? url_with_extension(s3, "webp") : NULL;
3878
+ free(s3);
3879
+ char srcset[512] = "";
3880
+ if (wb1) snprintf(srcset, sizeof(srcset), "%s 1x", wb1);
3881
+ if (wb2 && has_webp_2x) {
3882
+ size_t l = strlen(srcset);
3883
+ snprintf(srcset + l, sizeof(srcset) - l, "%s%s 2x", l ? ", " : "", wb2);
3884
+ }
3885
+ if (wb3 && has_webp_3x) {
3886
+ size_t l = strlen(srcset);
3887
+ snprintf(srcset + l, sizeof(srcset) - l, "%s%s 3x", l ? ", " : "", wb3);
3888
+ }
3889
+ if (*srcset) w += snprintf(w, cap - (size_t)(w - replacement),
3890
+ "<source type=\"image/webp\" srcset=\"%s\">", srcset);
3891
+ free(wb1); free(wb2); free(wb3);
3892
+ }
3893
+
3894
+ /* Build img with optional srcset for 2x/3x */
3895
+ char srcset_attr[512] = "";
3896
+ if (need_srcset) {
3897
+ char *u2 = url_with_2x_suffix_auto(src);
3898
+ char *u3 = url_with_3x_suffix_auto(src);
3899
+ snprintf(srcset_attr, sizeof(srcset_attr), " srcset=\"%s 1x", src);
3900
+ if (has_2x && u2) {
3901
+ size_t l = strlen(srcset_attr);
3902
+ snprintf(srcset_attr + l, sizeof(srcset_attr) - l, ", %s 2x", u2);
3903
+ }
3904
+ if (has_3x && u3) {
3905
+ size_t l = strlen(srcset_attr);
3906
+ snprintf(srcset_attr + l, sizeof(srcset_attr) - l, ", %s 3x", u3);
3907
+ }
3908
+ strcat(srcset_attr, "\"");
3909
+ free(u2); free(u3);
3910
+ }
3911
+ /* Preserve title on img for caption logic */
3912
+ if (title && *title) {
3913
+ w += snprintf(w, cap - (size_t)(w - replacement),
3914
+ "<img src=\"%s\" alt=\"%s\" title=\"%s\"%s>%s",
3915
+ src, alt && *alt ? alt : "", title, srcset_attr,
3916
+ need_picture ? "</picture>" : "");
3917
+ } else {
3918
+ w += snprintf(w, cap - (size_t)(w - replacement),
3919
+ "<img src=\"%s\" alt=\"%s\"%s>%s",
3920
+ src, alt && *alt ? alt : "", srcset_attr,
3921
+ need_picture ? "</picture>" : "");
3922
+ }
3923
+ repl_len = (size_t)(w - replacement);
3924
+ }
3925
+ }
3926
+ }
3927
+ free(resolved);
3928
+ }
3929
+
3930
+ if (replacement && repl_len > 0) {
3931
+ if (repl_len > remaining) {
3932
+ size_t used = (size_t)(write - output);
3933
+ capacity = used + repl_len + 1024;
3934
+ char *new_out = realloc(output, capacity);
3935
+ if (!new_out) { free(output); free(replacement); free(src); free(alt); free(title); return NULL; }
3936
+ output = new_out;
3937
+ write = output + used;
3938
+ remaining = capacity - used;
3939
+ }
3940
+ memcpy(write, replacement, repl_len);
3941
+ write += repl_len;
3942
+ remaining -= repl_len;
3943
+ read = tag_end + 1;
3944
+ } else {
3945
+ /* Copy original tag */
3946
+ size_t tag_len = (size_t)(tag_end - tag_start + 1);
3947
+ if (tag_len >= remaining) {
3948
+ size_t used = (size_t)(write - output);
3949
+ capacity = used + tag_len + 1024;
3950
+ char *new_out = realloc(output, capacity);
3951
+ if (!new_out) { free(output); free(replacement); free(src); free(alt); free(title); return NULL; }
3952
+ output = new_out;
3953
+ write = output + used;
3954
+ remaining = capacity - used;
3955
+ }
3956
+ memcpy(write, tag_start, tag_len);
3957
+ write += tag_len;
3958
+ remaining -= tag_len;
3959
+ read = tag_end + 1;
3960
+ }
3961
+
3962
+ free(replacement);
3963
+ if (effective_src != src) free(effective_src);
3964
+ free(src);
3965
+ free(alt);
3966
+ free(title);
3967
+ continue;
3968
+ }
3969
+
3970
+ if (remaining < 2) {
3971
+ size_t used = (size_t)(write - output);
3972
+ capacity = used + len + 1024;
3973
+ char *new_out = realloc(output, capacity);
3974
+ if (!new_out) { free(output); return NULL; }
3975
+ output = new_out;
3976
+ write = output + used;
3977
+ remaining = capacity - used;
3978
+ }
3979
+ *write++ = *read++;
3980
+ remaining--;
3981
+ }
3982
+
3983
+ if (remaining < 1) {
3984
+ size_t used = (size_t)(write - output);
3985
+ char *new_out = realloc(output, used + 1);
3986
+ if (!new_out) { free(output); return NULL; }
3987
+ output = new_out;
3988
+ write = output + used;
3989
+ }
3990
+ *write = '\0';
3991
+ return output;
3992
+ }