apex-ruby 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/apex_ext/apex_ext.c +6 -0
- data/ext/apex_ext/apex_src/AGENTS.md +41 -0
- data/ext/apex_ext/apex_src/CHANGELOG.md +412 -2
- data/ext/apex_ext/apex_src/CMakeLists.txt +41 -29
- data/ext/apex_ext/apex_src/Formula/apex.rb +2 -2
- data/ext/apex_ext/apex_src/Package.swift +9 -0
- data/ext/apex_ext/apex_src/README.md +31 -9
- data/ext/apex_ext/apex_src/ROADMAP.md +5 -0
- data/ext/apex_ext/apex_src/VERSION +1 -1
- data/ext/apex_ext/apex_src/cli/main.c +1125 -13
- data/ext/apex_ext/apex_src/docs/index.md +459 -0
- data/ext/apex_ext/apex_src/include/apex/apex.h +67 -5
- data/ext/apex_ext/apex_src/include/apex/ast_man.h +20 -0
- data/ext/apex_ext/apex_src/include/apex/ast_markdown.h +39 -0
- data/ext/apex_ext/apex_src/include/apex/ast_terminal.h +40 -0
- data/ext/apex_ext/apex_src/include/apex/module.modulemap +1 -1
- data/ext/apex_ext/apex_src/man/apex-config.5 +333 -258
- data/ext/apex_ext/apex_src/man/apex-config.5.md +3 -1
- data/ext/apex_ext/apex_src/man/apex-plugins.7 +401 -316
- data/ext/apex_ext/apex_src/man/apex.1 +663 -620
- data/ext/apex_ext/apex_src/man/apex.1.html +703 -0
- data/ext/apex_ext/apex_src/man/apex.1.md +160 -90
- data/ext/apex_ext/apex_src/objc/Apex.swift +6 -0
- data/ext/apex_ext/apex_src/objc/NSString+Apex.h +12 -0
- data/ext/apex_ext/apex_src/objc/NSString+Apex.m +9 -0
- data/ext/apex_ext/apex_src/pages/index.md +459 -0
- data/ext/apex_ext/apex_src/src/_README.md +4 -4
- data/ext/apex_ext/apex_src/src/apex.c +702 -44
- data/ext/apex_ext/apex_src/src/ast_json.c +1130 -0
- data/ext/apex_ext/apex_src/src/ast_json.h +46 -0
- data/ext/apex_ext/apex_src/src/ast_man.c +948 -0
- data/ext/apex_ext/apex_src/src/ast_markdown.c +409 -0
- data/ext/apex_ext/apex_src/src/ast_terminal.c +2516 -0
- data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +8 -5
- data/ext/apex_ext/apex_src/src/extensions/definition_list.c +491 -1514
- data/ext/apex_ext/apex_src/src/extensions/definition_list.h +8 -15
- data/ext/apex_ext/apex_src/src/extensions/emoji.c +207 -0
- data/ext/apex_ext/apex_src/src/extensions/emoji.h +14 -0
- data/ext/apex_ext/apex_src/src/extensions/header_ids.c +178 -71
- data/ext/apex_ext/apex_src/src/extensions/highlight.c +37 -5
- data/ext/apex_ext/apex_src/src/extensions/ial.c +416 -47
- data/ext/apex_ext/apex_src/src/extensions/includes.c +241 -10
- data/ext/apex_ext/apex_src/src/extensions/includes.h +1 -0
- data/ext/apex_ext/apex_src/src/extensions/metadata.c +166 -3
- data/ext/apex_ext/apex_src/src/extensions/metadata.h +7 -0
- data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +34 -3
- data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +55 -10
- data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +7 -4
- data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +84 -52
- data/ext/apex_ext/apex_src/src/extensions/toc.c +133 -19
- data/ext/apex_ext/apex_src/src/filters_ast.c +194 -0
- data/ext/apex_ext/apex_src/src/filters_ast.h +36 -0
- data/ext/apex_ext/apex_src/src/html_renderer.c +1265 -35
- data/ext/apex_ext/apex_src/src/html_renderer.h +21 -0
- data/ext/apex_ext/apex_src/src/plugins_remote.c +40 -14
- data/ext/apex_ext/apex_src/tests/CMakeLists.txt +1 -0
- data/ext/apex_ext/apex_src/tests/README.md +11 -5
- data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +13 -2
- data/ext/apex_ext/apex_src/tests/fixtures/filters/filter_output_with_rawblock.json +1 -0
- data/ext/apex_ext/apex_src/tests/fixtures/filters/unwrap.md +7 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/auto-wildcard.md +8 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.avif +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.jpg +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.webp +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.avif +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.jpg +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.webp +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/media_formats_test.md +63 -0
- data/ext/apex_ext/apex_src/tests/fixtures/includes/data-semi.csv +3 -0
- data/ext/apex_ext/apex_src/tests/fixtures/includes/with space.txt +1 -0
- data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +4 -1
- data/ext/apex_ext/apex_src/tests/paginate_cli_test.sh +64 -0
- data/ext/apex_ext/apex_src/tests/terminal_width_test.sh +29 -0
- data/ext/apex_ext/apex_src/tests/test-swift-package.sh +14 -0
- data/ext/apex_ext/apex_src/tests/test_cmark_callback.c +189 -0
- data/ext/apex_ext/apex_src/tests/test_extensions.c +374 -0
- data/ext/apex_ext/apex_src/tests/test_metadata.c +68 -0
- data/ext/apex_ext/apex_src/tests/test_output.c +291 -2
- data/ext/apex_ext/apex_src/tests/test_runner.c +10 -0
- data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +1 -1
- data/ext/apex_ext/apex_src/tests/test_tables.c +17 -1
- data/lib/apex/version.rb +1 -1
- metadata +32 -2
- data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +0 -456
|
@@ -7,10 +7,12 @@
|
|
|
7
7
|
#include "table.h" /* For CMARK_NODE_TABLE */
|
|
8
8
|
#include "extensions/header_ids.h"
|
|
9
9
|
#include <string.h>
|
|
10
|
+
#include <strings.h> /* For strncasecmp */
|
|
10
11
|
#include <stdlib.h>
|
|
11
12
|
#include <stdio.h>
|
|
12
13
|
#include <stdbool.h>
|
|
13
14
|
#include <ctype.h>
|
|
15
|
+
#include <sys/stat.h>
|
|
14
16
|
|
|
15
17
|
/**
|
|
16
18
|
* Inject attributes into HTML opening tags
|
|
@@ -154,6 +156,125 @@ static char *extract_ial_from_table_attrs(const char *attrs) {
|
|
|
154
156
|
return result;
|
|
155
157
|
}
|
|
156
158
|
|
|
159
|
+
/**
|
|
160
|
+
* Extract value of an attribute from an HTML tag.
|
|
161
|
+
* Returns newly allocated string or NULL. Caller must free.
|
|
162
|
+
*/
|
|
163
|
+
static char *extract_attr_from_tag(const char *tag_start, const char *tag_end, const char *attr_name) {
|
|
164
|
+
size_t attr_len = strlen(attr_name);
|
|
165
|
+
const char *p = tag_start;
|
|
166
|
+
while (p < tag_end) {
|
|
167
|
+
if ((p == tag_start || isspace((unsigned char)p[-1])) &&
|
|
168
|
+
strncasecmp(p, attr_name, attr_len) == 0 && p[attr_len] == '=') {
|
|
169
|
+
p += attr_len + 1;
|
|
170
|
+
if (p >= tag_end) return NULL;
|
|
171
|
+
char q = *p;
|
|
172
|
+
if (q != '"' && q != '\'') return NULL;
|
|
173
|
+
p++;
|
|
174
|
+
const char *val_start = p;
|
|
175
|
+
while (p < tag_end && *p != q) {
|
|
176
|
+
if (*p == '\\' && p + 1 < tag_end) p++;
|
|
177
|
+
p++;
|
|
178
|
+
}
|
|
179
|
+
if (p >= tag_end) return NULL;
|
|
180
|
+
size_t len = (size_t)(p - val_start);
|
|
181
|
+
char *out = malloc(len + 1);
|
|
182
|
+
if (out) {
|
|
183
|
+
memcpy(out, val_start, len);
|
|
184
|
+
out[len] = '\0';
|
|
185
|
+
}
|
|
186
|
+
return out;
|
|
187
|
+
}
|
|
188
|
+
p++;
|
|
189
|
+
}
|
|
190
|
+
return NULL;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Replace extension in URL path. Caller must free. Returns NULL if no extension.
|
|
195
|
+
*/
|
|
196
|
+
static char *url_with_extension(const char *url, const char *new_ext) {
|
|
197
|
+
if (!url || !new_ext) return NULL;
|
|
198
|
+
const char *last_dot = strrchr(url, '.');
|
|
199
|
+
const char *path_end = strchr(url, '?');
|
|
200
|
+
if (!path_end) path_end = strchr(url, '#');
|
|
201
|
+
if (!path_end) path_end = url + strlen(url);
|
|
202
|
+
if (!last_dot || last_dot >= path_end) return NULL;
|
|
203
|
+
|
|
204
|
+
size_t prefix_len = (size_t)(last_dot - url);
|
|
205
|
+
size_t ext_len = strlen(new_ext);
|
|
206
|
+
size_t tail_len = strlen(path_end);
|
|
207
|
+
char *out = malloc(prefix_len + 1 + ext_len + tail_len + 1);
|
|
208
|
+
if (!out) return NULL;
|
|
209
|
+
memcpy(out, url, prefix_len);
|
|
210
|
+
out[prefix_len] = '.';
|
|
211
|
+
memcpy(out + prefix_len + 1, new_ext, ext_len + 1);
|
|
212
|
+
if (tail_len > 0) memcpy(out + prefix_len + 1 + ext_len, path_end, tail_len + 1);
|
|
213
|
+
return out;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Find end of HTML tag (the >), respecting quoted attribute values.
|
|
218
|
+
*/
|
|
219
|
+
static const char *find_tag_end(const char *tag_start) {
|
|
220
|
+
const char *p = tag_start;
|
|
221
|
+
char in_quote = 0;
|
|
222
|
+
while (*p) {
|
|
223
|
+
if (in_quote) {
|
|
224
|
+
if (*p == '\\' && p[1]) p++;
|
|
225
|
+
else if (*p == in_quote) in_quote = 0;
|
|
226
|
+
} else if (*p == '"' || *p == '\'') {
|
|
227
|
+
in_quote = *p;
|
|
228
|
+
} else if (*p == '>') {
|
|
229
|
+
return p;
|
|
230
|
+
}
|
|
231
|
+
p++;
|
|
232
|
+
}
|
|
233
|
+
return NULL;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Get video MIME type from URL extension.
|
|
238
|
+
*/
|
|
239
|
+
static const char *video_type_from_url(const char *url) {
|
|
240
|
+
if (!url) return "video/mp4";
|
|
241
|
+
const char *dot = strrchr(url, '.');
|
|
242
|
+
if (!dot) return "video/mp4";
|
|
243
|
+
const char *ext = dot + 1;
|
|
244
|
+
const char *end = strchr(ext, '?');
|
|
245
|
+
if (!end) end = strchr(ext, '#');
|
|
246
|
+
if (!end) end = ext + strlen(ext);
|
|
247
|
+
size_t len = (size_t)(end - ext);
|
|
248
|
+
if (len >= 3 && strncasecmp(ext, "mp4", 3) == 0) return "video/mp4";
|
|
249
|
+
if (len >= 4 && strncasecmp(ext, "webm", 4) == 0) return "video/webm";
|
|
250
|
+
if (len >= 3 && strncasecmp(ext, "ogg", 3) == 0) return "video/ogg";
|
|
251
|
+
if (len >= 3 && strncasecmp(ext, "ogv", 3) == 0) return "video/ogg";
|
|
252
|
+
if (len >= 3 && strncasecmp(ext, "mov", 3) == 0) return "video/quicktime";
|
|
253
|
+
if (len >= 3 && strncasecmp(ext, "m4v", 3) == 0) return "video/mp4";
|
|
254
|
+
return "video/mp4";
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Extract value of data-apex-picture-webp or data-apex-picture-avif from attrs string.
|
|
259
|
+
* Format: data-apex-picture-webp="value" or data-apex-picture-avif="value"
|
|
260
|
+
* Caller must free.
|
|
261
|
+
*/
|
|
262
|
+
static char *extract_data_apex_picture_srcset(const char *attrs, const char *format) {
|
|
263
|
+
char key[64];
|
|
264
|
+
snprintf(key, sizeof(key), "data-apex-picture-%s=\"", format);
|
|
265
|
+
const char *p = strstr(attrs, key);
|
|
266
|
+
if (!p) return NULL;
|
|
267
|
+
p += strlen(key);
|
|
268
|
+
const char *end = strchr(p, '"');
|
|
269
|
+
if (!end) return NULL;
|
|
270
|
+
size_t len = (size_t)(end - p);
|
|
271
|
+
char *out = malloc(len + 1);
|
|
272
|
+
if (!out) return NULL;
|
|
273
|
+
memcpy(out, p, len);
|
|
274
|
+
out[len] = '\0';
|
|
275
|
+
return out;
|
|
276
|
+
}
|
|
277
|
+
|
|
157
278
|
/* Counters for element indexing */
|
|
158
279
|
typedef struct {
|
|
159
280
|
int para_count;
|
|
@@ -229,16 +350,29 @@ static char *get_node_text_fingerprint(cmark_node *node) {
|
|
|
229
350
|
}
|
|
230
351
|
}
|
|
231
352
|
|
|
232
|
-
/* For images, use
|
|
353
|
+
/* For images, use URL + alt (from first child) to disambiguate same-src images */
|
|
233
354
|
if (type == CMARK_NODE_IMAGE) {
|
|
234
355
|
const char *url = cmark_node_get_url(node);
|
|
235
356
|
if (url) {
|
|
236
|
-
size_t
|
|
237
|
-
if (
|
|
238
|
-
|
|
357
|
+
size_t url_len = strlen(url);
|
|
358
|
+
if (url_len > 50) url_len = 50;
|
|
359
|
+
cmark_node *child = cmark_node_first_child(node);
|
|
360
|
+
const char *alt = (child && cmark_node_get_type(child) == CMARK_NODE_TEXT) ?
|
|
361
|
+
cmark_node_get_literal(child) : NULL;
|
|
362
|
+
size_t alt_len = alt ? strlen(alt) : 0;
|
|
363
|
+
if (alt_len > 20) alt_len = 20;
|
|
364
|
+
size_t total = url_len + (alt_len ? 1 + alt_len : 0);
|
|
365
|
+
if (total > 50) total = 50;
|
|
366
|
+
char *fingerprint = malloc(total + 1);
|
|
239
367
|
if (fingerprint) {
|
|
240
|
-
memcpy(fingerprint, url,
|
|
241
|
-
|
|
368
|
+
memcpy(fingerprint, url, url_len);
|
|
369
|
+
size_t pos = url_len;
|
|
370
|
+
if (alt_len && pos + 1 + alt_len <= 50) {
|
|
371
|
+
fingerprint[pos++] = '|';
|
|
372
|
+
memcpy(fingerprint + pos, alt, alt_len);
|
|
373
|
+
pos += alt_len;
|
|
374
|
+
}
|
|
375
|
+
fingerprint[pos] = '\0';
|
|
242
376
|
return fingerprint;
|
|
243
377
|
}
|
|
244
378
|
}
|
|
@@ -302,7 +436,8 @@ static void collect_nodes_with_attrs(cmark_node *node, attr_node **list) {
|
|
|
302
436
|
element_counters counters = {0};
|
|
303
437
|
collect_nodes_with_attrs_recursive(node, list, &counters);
|
|
304
438
|
|
|
305
|
-
/* Reverse the list
|
|
439
|
+
/* Reverse the list: prepend builds [last_visited, ..., first_visited];
|
|
440
|
+
* we need document order [first, ..., last] for matching. */
|
|
306
441
|
attr_node *reversed = NULL;
|
|
307
442
|
while (*list) {
|
|
308
443
|
attr_node *next = (*list)->next;
|
|
@@ -499,7 +634,7 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
|
|
|
499
634
|
int fp_idx = 0;
|
|
500
635
|
|
|
501
636
|
if (elem_type == CMARK_NODE_LINK || elem_type == CMARK_NODE_IMAGE) {
|
|
502
|
-
/* For links/images, extract
|
|
637
|
+
/* For links/images, extract href/src and for images also alt (to disambiguate same-src) */
|
|
503
638
|
const char *url_attr = (elem_type == CMARK_NODE_LINK) ? "href=\"" : "src=\"";
|
|
504
639
|
const char *url_start = strstr(read, url_attr);
|
|
505
640
|
if (url_start) {
|
|
@@ -509,8 +644,25 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
|
|
|
509
644
|
size_t url_len = url_end - url_start;
|
|
510
645
|
if (url_len > 50) url_len = 50;
|
|
511
646
|
memcpy(html_fingerprint, url_start, url_len);
|
|
512
|
-
html_fingerprint[url_len] = '\0';
|
|
513
647
|
fp_idx = url_len;
|
|
648
|
+
if (elem_type == CMARK_NODE_IMAGE && fp_idx < 49) {
|
|
649
|
+
const char *alt_attr = "alt=\"";
|
|
650
|
+
const char *alt_start = strstr(read, alt_attr);
|
|
651
|
+
if (alt_start && alt_start < tag_end) {
|
|
652
|
+
alt_start += strlen(alt_attr);
|
|
653
|
+
const char *alt_end = strchr(alt_start, '"');
|
|
654
|
+
if (alt_end) {
|
|
655
|
+
size_t alt_len = alt_end - alt_start;
|
|
656
|
+
if (alt_len > 20) alt_len = 20;
|
|
657
|
+
if (fp_idx + 1 + alt_len <= 50) {
|
|
658
|
+
html_fingerprint[fp_idx++] = '|';
|
|
659
|
+
memcpy(html_fingerprint + fp_idx, alt_start, alt_len);
|
|
660
|
+
fp_idx += alt_len;
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
html_fingerprint[fp_idx] = '\0';
|
|
514
666
|
}
|
|
515
667
|
}
|
|
516
668
|
} else if (elem_type == CMARK_NODE_STRONG || elem_type == CMARK_NODE_EMPH || elem_type == CMARK_NODE_CODE) {
|
|
@@ -551,6 +703,17 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
|
|
|
551
703
|
break;
|
|
552
704
|
}
|
|
553
705
|
}
|
|
706
|
+
} else if (elem_type == CMARK_NODE_IMAGE) {
|
|
707
|
+
/* Images: match by element_index (document order). */
|
|
708
|
+
for (attr_node *a = attr_list; a; a = a->next, idx++) {
|
|
709
|
+
if (used[idx]) continue;
|
|
710
|
+
if (a->node_type != CMARK_NODE_IMAGE) continue;
|
|
711
|
+
if (a->element_index == elem_idx) {
|
|
712
|
+
matching = a;
|
|
713
|
+
used[idx] = true;
|
|
714
|
+
break;
|
|
715
|
+
}
|
|
716
|
+
}
|
|
554
717
|
} else {
|
|
555
718
|
/* For other elements, use the existing matching logic */
|
|
556
719
|
for (attr_node *a = attr_list; a; a = a->next, idx++) {
|
|
@@ -565,7 +728,8 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
|
|
|
565
728
|
/* Try fingerprint match first (works for both block and inline) */
|
|
566
729
|
if (a->text_fingerprint && fp_idx > 0 &&
|
|
567
730
|
strncmp(a->text_fingerprint, html_fingerprint, 50) == 0) {
|
|
568
|
-
/* For inline elements, also check element_index to handle duplicates
|
|
731
|
+
/* For inline elements, also check element_index to handle duplicates.
|
|
732
|
+
* (Images with same src use sequential matching in the branch above.) */
|
|
569
733
|
if (elem_type == CMARK_NODE_LINK || elem_type == CMARK_NODE_IMAGE ||
|
|
570
734
|
elem_type == CMARK_NODE_STRONG || elem_type == CMARK_NODE_EMPH ||
|
|
571
735
|
elem_type == CMARK_NODE_CODE) {
|
|
@@ -667,6 +831,109 @@ char *apex_render_html_with_attributes(cmark_node *document, int options) {
|
|
|
667
831
|
}
|
|
668
832
|
if (ial_attrs) free(ial_attrs);
|
|
669
833
|
/* No IAL attributes to inject, but table still needs to be copied - fall through */
|
|
834
|
+
} else if (elem_type == CMARK_NODE_IMAGE &&
|
|
835
|
+
(strstr(matching->attrs, "data-apex-replace-video") ||
|
|
836
|
+
strstr(matching->attrs, "data-apex-replace-picture"))) {
|
|
837
|
+
/* Replace img with video or picture element */
|
|
838
|
+
const char *img_tag_end = find_tag_end(read);
|
|
839
|
+
if (img_tag_end && img_tag_end > read) {
|
|
840
|
+
char *src = extract_attr_from_tag(read, img_tag_end + 1, "src");
|
|
841
|
+
char *alt = extract_attr_from_tag(read, img_tag_end + 1, "alt");
|
|
842
|
+
char *title = extract_attr_from_tag(read, img_tag_end + 1, "title");
|
|
843
|
+
/* Fallback: title may be in IAL attrs (cmark may not emit it on img) */
|
|
844
|
+
if ((!title || !*title) && matching->attrs) {
|
|
845
|
+
size_t alen = strlen(matching->attrs);
|
|
846
|
+
char *fake_tag = malloc(alen + 10);
|
|
847
|
+
if (fake_tag) {
|
|
848
|
+
snprintf(fake_tag, alen + 10, "<img %s>", matching->attrs);
|
|
849
|
+
char *t = extract_attr_from_tag(fake_tag, fake_tag + strlen(fake_tag) + 1, "title");
|
|
850
|
+
free(fake_tag);
|
|
851
|
+
if (t) { free(title); title = t; }
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
if (!src) src = strdup("");
|
|
855
|
+
if (!alt) alt = strdup("");
|
|
856
|
+
|
|
857
|
+
char *replacement = NULL;
|
|
858
|
+
size_t repl_len = 0;
|
|
859
|
+
|
|
860
|
+
if (strstr(matching->attrs, "data-apex-replace-video")) {
|
|
861
|
+
/* Build <video> with <source> elements. Order: webm, ogg, mp4/mov/m4v (primary) */
|
|
862
|
+
size_t cap = 256 + (src ? strlen(src) * 4 : 0);
|
|
863
|
+
replacement = malloc(cap);
|
|
864
|
+
if (replacement) {
|
|
865
|
+
char *w = replacement;
|
|
866
|
+
w += snprintf(w, cap, "<video");
|
|
867
|
+
if (alt && *alt) w += snprintf(w, cap - (size_t)(w - replacement), " title=\"%s\"", alt);
|
|
868
|
+
w += snprintf(w, cap - (size_t)(w - replacement), ">");
|
|
869
|
+
|
|
870
|
+
if (strstr(matching->attrs, "data-apex-video-webm")) {
|
|
871
|
+
char *u = url_with_extension(src, "webm");
|
|
872
|
+
if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/webm\">", u); free(u); }
|
|
873
|
+
}
|
|
874
|
+
if (strstr(matching->attrs, "data-apex-video-ogg")) {
|
|
875
|
+
char *u = url_with_extension(src, "ogg");
|
|
876
|
+
if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/ogg\">", u); free(u); }
|
|
877
|
+
}
|
|
878
|
+
if (strstr(matching->attrs, "data-apex-video-mp4")) {
|
|
879
|
+
char *u = url_with_extension(src, "mp4");
|
|
880
|
+
if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/mp4\">", u); free(u); }
|
|
881
|
+
}
|
|
882
|
+
if (strstr(matching->attrs, "data-apex-video-mov")) {
|
|
883
|
+
char *u = url_with_extension(src, "mov");
|
|
884
|
+
if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/quicktime\">", u); free(u); }
|
|
885
|
+
}
|
|
886
|
+
if (strstr(matching->attrs, "data-apex-video-m4v")) {
|
|
887
|
+
char *u = url_with_extension(src, "m4v");
|
|
888
|
+
if (u) { w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"video/mp4\">", u); free(u); }
|
|
889
|
+
}
|
|
890
|
+
/* Primary src as fallback (always include) */
|
|
891
|
+
w += snprintf(w, cap - (size_t)(w - replacement), "<source src=\"%s\" type=\"%s\">", src, video_type_from_url(src));
|
|
892
|
+
w += snprintf(w, cap - (size_t)(w - replacement), "</video>");
|
|
893
|
+
repl_len = (size_t)(w - replacement);
|
|
894
|
+
}
|
|
895
|
+
} else {
|
|
896
|
+
/* Build <picture> with <source> elements and <img> fallback */
|
|
897
|
+
char *webp_srcset = extract_data_apex_picture_srcset(matching->attrs, "webp");
|
|
898
|
+
char *avif_srcset = extract_data_apex_picture_srcset(matching->attrs, "avif");
|
|
899
|
+
|
|
900
|
+
/* Strip data-apex-* from attrs for the img */
|
|
901
|
+
size_t cap = 512 + (src ? strlen(src) * 2 : 0) + (webp_srcset ? strlen(webp_srcset) : 0) + (avif_srcset ? strlen(avif_srcset) : 0);
|
|
902
|
+
replacement = malloc(cap);
|
|
903
|
+
if (replacement) {
|
|
904
|
+
char *w = replacement;
|
|
905
|
+
w += snprintf(w, cap, "<picture>");
|
|
906
|
+
if (avif_srcset) w += snprintf(w, cap - (size_t)(w - replacement), "<source type=\"image/avif\" srcset=\"%s\">", avif_srcset);
|
|
907
|
+
if (webp_srcset) w += snprintf(w, cap - (size_t)(w - replacement), "<source type=\"image/webp\" srcset=\"%s\">", webp_srcset);
|
|
908
|
+
/* Preserve title on img for caption logic (apex_convert_image_captions) */
|
|
909
|
+
if (title && *title) {
|
|
910
|
+
w += snprintf(w, cap - (size_t)(w - replacement), "<img src=\"%s\" alt=\"%s\" title=\"%s\"></picture>", src, alt, title);
|
|
911
|
+
} else {
|
|
912
|
+
w += snprintf(w, cap - (size_t)(w - replacement), "<img src=\"%s\" alt=\"%s\"></picture>", src, alt);
|
|
913
|
+
}
|
|
914
|
+
repl_len = (size_t)(w - replacement);
|
|
915
|
+
}
|
|
916
|
+
free(webp_srcset);
|
|
917
|
+
free(avif_srcset);
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
if (replacement && repl_len > 0 && repl_len <= remaining) {
|
|
921
|
+
memcpy(write, replacement, repl_len);
|
|
922
|
+
write += repl_len;
|
|
923
|
+
remaining -= repl_len;
|
|
924
|
+
read = img_tag_end + 1;
|
|
925
|
+
free(replacement);
|
|
926
|
+
free(src);
|
|
927
|
+
free(alt);
|
|
928
|
+
free(title);
|
|
929
|
+
continue;
|
|
930
|
+
}
|
|
931
|
+
free(replacement);
|
|
932
|
+
free(src);
|
|
933
|
+
free(alt);
|
|
934
|
+
free(title);
|
|
935
|
+
}
|
|
936
|
+
/* Fall through to normal inject if replacement failed */
|
|
670
937
|
} else {
|
|
671
938
|
/* Find where to inject attributes */
|
|
672
939
|
const char *inject_point = NULL;
|
|
@@ -810,30 +1077,32 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
810
1077
|
return html ? strdup(html) : NULL;
|
|
811
1078
|
}
|
|
812
1079
|
|
|
813
|
-
/* Collect all headers from AST with their IDs */
|
|
1080
|
+
/* Collect all headers from AST with their IDs (level + text for matching) */
|
|
814
1081
|
typedef struct header_id_map {
|
|
1082
|
+
int level;
|
|
815
1083
|
char *text;
|
|
816
1084
|
char *id;
|
|
817
1085
|
int index;
|
|
1086
|
+
bool used;
|
|
818
1087
|
struct header_id_map *next;
|
|
819
1088
|
} header_id_map;
|
|
820
1089
|
|
|
821
1090
|
header_id_map *header_map = NULL;
|
|
822
1091
|
int header_count = 0;
|
|
823
1092
|
|
|
824
|
-
/* Walk AST to collect headers */
|
|
1093
|
+
/* Walk AST to collect headers (only markdown HEADING nodes, not raw HTML) */
|
|
825
1094
|
cmark_iter *iter = cmark_iter_new(document);
|
|
826
1095
|
cmark_event_type event;
|
|
827
1096
|
while ((event = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
|
828
1097
|
cmark_node *node = cmark_iter_get_node(iter);
|
|
829
1098
|
if (event == CMARK_EVENT_ENTER && cmark_node_get_type(node) == CMARK_NODE_HEADING) {
|
|
1099
|
+
int level = cmark_node_get_heading_level(node);
|
|
830
1100
|
char *text = apex_extract_heading_text(node);
|
|
831
1101
|
char *id = NULL;
|
|
832
1102
|
|
|
833
1103
|
/* Check if ID already exists from IAL or manual ID (stored in user_data) */
|
|
834
1104
|
char *user_data = (char *)cmark_node_get_user_data(node);
|
|
835
1105
|
if (user_data) {
|
|
836
|
-
/* Look for id="..." in user_data */
|
|
837
1106
|
const char *id_attr = strstr(user_data, "id=\"");
|
|
838
1107
|
if (id_attr) {
|
|
839
1108
|
const char *id_start = id_attr + 4;
|
|
@@ -849,16 +1118,17 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
849
1118
|
}
|
|
850
1119
|
}
|
|
851
1120
|
|
|
852
|
-
/* If no manual/IAL ID, generate one automatically */
|
|
853
1121
|
if (!id) {
|
|
854
1122
|
id = apex_generate_header_id(text, (apex_id_format_t)id_format);
|
|
855
1123
|
}
|
|
856
1124
|
|
|
857
1125
|
header_id_map *entry = malloc(sizeof(header_id_map));
|
|
858
1126
|
if (entry) {
|
|
1127
|
+
entry->level = level;
|
|
859
1128
|
entry->text = text;
|
|
860
1129
|
entry->id = id;
|
|
861
1130
|
entry->index = header_count++;
|
|
1131
|
+
entry->used = false;
|
|
862
1132
|
entry->next = header_map;
|
|
863
1133
|
header_map = entry;
|
|
864
1134
|
} else {
|
|
@@ -902,7 +1172,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
902
1172
|
const char *read = html;
|
|
903
1173
|
char *write = output;
|
|
904
1174
|
size_t remaining = capacity; /* Reserve 1 byte for null terminator */
|
|
905
|
-
int current_header_idx = 0;
|
|
906
1175
|
|
|
907
1176
|
while (*read) {
|
|
908
1177
|
/* Look for header opening tags: <h1>, <h2>, etc. */
|
|
@@ -925,6 +1194,58 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
925
1194
|
continue;
|
|
926
1195
|
}
|
|
927
1196
|
|
|
1197
|
+
/* Extract header content from HTML (between > and </hN>) for matching */
|
|
1198
|
+
int html_level = tag_start[2] - '0';
|
|
1199
|
+
const char *content_start = tag_end + 1;
|
|
1200
|
+
const char *closing = strstr(content_start, "</h");
|
|
1201
|
+
const char *content_end = content_start;
|
|
1202
|
+
if (closing && closing[2] >= '1' && closing[2] <= '6' && closing[3] == '>') {
|
|
1203
|
+
content_end = closing;
|
|
1204
|
+
}
|
|
1205
|
+
char content_buf[512];
|
|
1206
|
+
size_t content_len = content_end > content_start ? (size_t)(content_end - content_start) : 0;
|
|
1207
|
+
if (content_len >= sizeof(content_buf)) content_len = sizeof(content_buf) - 1;
|
|
1208
|
+
memcpy(content_buf, content_start, content_len);
|
|
1209
|
+
content_buf[content_len] = '\0';
|
|
1210
|
+
/* Decode & to & and strip tags for comparison with AST text */
|
|
1211
|
+
{
|
|
1212
|
+
char *r = content_buf, *w = content_buf;
|
|
1213
|
+
while (*r) {
|
|
1214
|
+
if (strncmp(r, "&", 5) == 0) { *w++ = '&'; r += 5; }
|
|
1215
|
+
else if (strncmp(r, "<", 4) == 0) { *w++ = '<'; r += 4; }
|
|
1216
|
+
else if (strncmp(r, ">", 4) == 0) { *w++ = '>'; r += 4; }
|
|
1217
|
+
else if (*r == '<') { while (*r && *r != '>') r++; if (*r == '>') r++; }
|
|
1218
|
+
else { *w++ = *r++; }
|
|
1219
|
+
}
|
|
1220
|
+
*w = '\0';
|
|
1221
|
+
}
|
|
1222
|
+
/* Trim whitespace and newlines for comparison with AST text */
|
|
1223
|
+
char *trim_start = content_buf;
|
|
1224
|
+
while (*trim_start == ' ' || *trim_start == '\t' || *trim_start == '\n' || *trim_start == '\r') trim_start++;
|
|
1225
|
+
size_t trim_len = strlen(trim_start);
|
|
1226
|
+
while (trim_len > 0 && (trim_start[trim_len - 1] == ' ' || trim_start[trim_len - 1] == '\t' || trim_start[trim_len - 1] == '\n' || trim_start[trim_len - 1] == '\r'))
|
|
1227
|
+
trim_start[--trim_len] = '\0';
|
|
1228
|
+
|
|
1229
|
+
/* Match by (level, text); fallback to first unused at level only when text extraction
|
|
1230
|
+
differs (avoids assigning to raw HTML headers which have no AST entry at that level) */
|
|
1231
|
+
header_id_map *header = NULL;
|
|
1232
|
+
for (header_id_map *p = header_map; p; p = p->next) {
|
|
1233
|
+
if (!p->used && p->level == html_level && p->text && strcmp(p->text, trim_start) == 0) {
|
|
1234
|
+
header = p;
|
|
1235
|
+
p->used = true;
|
|
1236
|
+
break;
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
if (!header) {
|
|
1240
|
+
for (header_id_map *p = header_map; p; p = p->next) {
|
|
1241
|
+
if (!p->used && p->level == html_level) {
|
|
1242
|
+
header = p;
|
|
1243
|
+
p->used = true;
|
|
1244
|
+
break;
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
|
|
928
1249
|
/* Check if ID already exists in the tag */
|
|
929
1250
|
bool has_id = false;
|
|
930
1251
|
const char *id_attr = strstr(tag_start, "id=");
|
|
@@ -932,8 +1253,7 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
932
1253
|
const char *id_end = NULL;
|
|
933
1254
|
if (id_attr && id_attr < tag_end) {
|
|
934
1255
|
has_id = true;
|
|
935
|
-
|
|
936
|
-
id_start = id_attr + 3; /* After 'id=' */
|
|
1256
|
+
id_start = id_attr + 3;
|
|
937
1257
|
while (id_start < tag_end && (*id_start == ' ' || *id_start == '"' || *id_start == '\'')) {
|
|
938
1258
|
id_start++;
|
|
939
1259
|
}
|
|
@@ -943,15 +1263,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
943
1263
|
}
|
|
944
1264
|
}
|
|
945
1265
|
|
|
946
|
-
/* Get the header ID - always get it so we can replace existing IDs */
|
|
947
|
-
header_id_map *header = NULL;
|
|
948
|
-
if (current_header_idx < header_count) {
|
|
949
|
-
header = header_map;
|
|
950
|
-
for (int i = 0; i < current_header_idx && header; i++) {
|
|
951
|
-
header = header->next;
|
|
952
|
-
}
|
|
953
|
-
}
|
|
954
|
-
|
|
955
1266
|
if (use_anchors && header && header->id) {
|
|
956
1267
|
/* For anchor tags: copy the entire header tag, then inject anchor after '>' */
|
|
957
1268
|
size_t tag_len = tag_end - tag_start + 1; /* Include '>' */
|
|
@@ -973,7 +1284,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
973
1284
|
write += anchor_len;
|
|
974
1285
|
remaining -= anchor_len;
|
|
975
1286
|
}
|
|
976
|
-
current_header_idx++;
|
|
977
1287
|
} else if (!use_anchors && header && header->id) {
|
|
978
1288
|
/* For header IDs: replace existing ID or inject new one */
|
|
979
1289
|
if (has_id && id_attr) {
|
|
@@ -1027,11 +1337,10 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
1027
1337
|
*write++ = *read++;
|
|
1028
1338
|
remaining--;
|
|
1029
1339
|
} else {
|
|
1030
|
-
|
|
1031
|
-
}
|
|
1340
|
+
read++;
|
|
1032
1341
|
}
|
|
1033
|
-
|
|
1034
|
-
|
|
1342
|
+
}
|
|
1343
|
+
} else {
|
|
1035
1344
|
/* No existing ID: copy tag up to '>', inject id attribute, then copy '>' */
|
|
1036
1345
|
const char *after_tag_name = tag_start + 3;
|
|
1037
1346
|
while (*after_tag_name && *after_tag_name != '>' && !isspace((unsigned char)*after_tag_name)) {
|
|
@@ -1089,7 +1398,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
1089
1398
|
}
|
|
1090
1399
|
}
|
|
1091
1400
|
}
|
|
1092
|
-
current_header_idx++;
|
|
1093
1401
|
} else {
|
|
1094
1402
|
/* No ID to inject, just copy the tag */
|
|
1095
1403
|
size_t tag_len = tag_end - tag_start + 1;
|
|
@@ -1099,9 +1407,6 @@ char *apex_inject_header_ids(const char *html, cmark_node *document, bool genera
|
|
|
1099
1407
|
remaining -= tag_len;
|
|
1100
1408
|
}
|
|
1101
1409
|
read = tag_end + 1;
|
|
1102
|
-
if (!has_id) {
|
|
1103
|
-
current_header_idx++;
|
|
1104
|
-
}
|
|
1105
1410
|
}
|
|
1106
1411
|
} else {
|
|
1107
1412
|
/* Copy character */
|
|
@@ -2478,6 +2783,74 @@ char *apex_convert_image_captions(const char *html, bool enable_image_captions,
|
|
|
2478
2783
|
size_t remaining = capacity;
|
|
2479
2784
|
|
|
2480
2785
|
while (*read) {
|
|
2786
|
+
/* Look for <picture> - wrap in figure when caption from img title/alt */
|
|
2787
|
+
if (*read == '<' && (read[1] == 'p' || read[1] == 'P') &&
|
|
2788
|
+
(read[2] == 'i' || read[2] == 'I') && (read[3] == 'c' || read[3] == 'C') &&
|
|
2789
|
+
(read[4] == 't' || read[4] == 'T') && (read[5] == 'u' || read[5] == 'U') &&
|
|
2790
|
+
(read[6] == 'r' || read[6] == 'R') && (read[7] == 'e' || read[7] == 'E') &&
|
|
2791
|
+
(read[8] == ' ' || read[8] == '>' || read[8] == '\t')) {
|
|
2792
|
+
const char *picture_start = read;
|
|
2793
|
+
const char *picture_end = strstr(read, "</picture>");
|
|
2794
|
+
if (picture_end) {
|
|
2795
|
+
picture_end += 10; /* include </picture> */
|
|
2796
|
+
/* Find <img inside the picture and extract title/alt for caption */
|
|
2797
|
+
const char *img_in = picture_start;
|
|
2798
|
+
char *title_str = NULL, *alt_str = NULL;
|
|
2799
|
+
while ((img_in = strstr(img_in, "<img")) != NULL && img_in < picture_end) {
|
|
2800
|
+
const char *img_tag_end = strchr(img_in, '>');
|
|
2801
|
+
if (img_tag_end && img_tag_end < picture_end) {
|
|
2802
|
+
title_str = extract_attr_from_tag(img_in, img_tag_end + 1, "title");
|
|
2803
|
+
alt_str = extract_attr_from_tag(img_in, img_tag_end + 1, "alt");
|
|
2804
|
+
break;
|
|
2805
|
+
}
|
|
2806
|
+
img_in += 4;
|
|
2807
|
+
}
|
|
2808
|
+
/* Determine caption from title or alt per options */
|
|
2809
|
+
const char *caption = NULL;
|
|
2810
|
+
size_t caption_len = 0;
|
|
2811
|
+
if (enable_image_captions) {
|
|
2812
|
+
if (title_captions_only && title_str && *title_str) {
|
|
2813
|
+
caption = title_str; caption_len = strlen(title_str);
|
|
2814
|
+
} else if (title_str && *title_str) {
|
|
2815
|
+
caption = title_str; caption_len = strlen(title_str);
|
|
2816
|
+
} else if (alt_str && *alt_str) {
|
|
2817
|
+
caption = alt_str; caption_len = strlen(alt_str);
|
|
2818
|
+
}
|
|
2819
|
+
}
|
|
2820
|
+
size_t block_len = (size_t)(picture_end - picture_start);
|
|
2821
|
+
if (caption && caption_len > 0) {
|
|
2822
|
+
size_t extra = 8 + 12 + caption_len + 13 + 9; /* figure + figcaption + </figcaption> + </figure> */
|
|
2823
|
+
if (extra + block_len >= remaining) {
|
|
2824
|
+
size_t used = write - output;
|
|
2825
|
+
size_t new_cap = (used + extra + block_len + 1) * 2;
|
|
2826
|
+
char *new_out = realloc(output, new_cap);
|
|
2827
|
+
if (!new_out) { free(title_str); free(alt_str); free(output); return NULL; }
|
|
2828
|
+
output = new_out; write = output + used; remaining = new_cap - used;
|
|
2829
|
+
}
|
|
2830
|
+
memcpy(write, "<figure>", 8); write += 8; remaining -= 8;
|
|
2831
|
+
memcpy(write, picture_start, block_len); write += block_len; remaining -= block_len;
|
|
2832
|
+
memcpy(write, "<figcaption>", 12); write += 12; remaining -= 12;
|
|
2833
|
+
memcpy(write, caption, caption_len); write += caption_len; remaining -= caption_len;
|
|
2834
|
+
memcpy(write, "</figcaption></figure>", sizeof("</figcaption></figure>") - 1);
|
|
2835
|
+
write += sizeof("</figcaption></figure>") - 1;
|
|
2836
|
+
remaining -= sizeof("</figcaption></figure>") - 1;
|
|
2837
|
+
} else {
|
|
2838
|
+
if (block_len >= remaining) {
|
|
2839
|
+
size_t used = write - output;
|
|
2840
|
+
size_t new_cap = (used + block_len + 1) * 2;
|
|
2841
|
+
char *new_out = realloc(output, new_cap);
|
|
2842
|
+
if (!new_out) { free(title_str); free(alt_str); free(output); return NULL; }
|
|
2843
|
+
output = new_out; write = output + used; remaining = new_cap - used;
|
|
2844
|
+
}
|
|
2845
|
+
memcpy(write, picture_start, block_len); write += block_len; remaining -= block_len;
|
|
2846
|
+
}
|
|
2847
|
+
free(title_str);
|
|
2848
|
+
free(alt_str);
|
|
2849
|
+
read = picture_end;
|
|
2850
|
+
continue;
|
|
2851
|
+
}
|
|
2852
|
+
}
|
|
2853
|
+
|
|
2481
2854
|
/* Look for <img tag */
|
|
2482
2855
|
if (*read == '<' && (read[1] == 'i' || read[1] == 'I') &&
|
|
2483
2856
|
(read[2] == 'm' || read[2] == 'M') &&
|
|
@@ -2527,6 +2900,43 @@ char *apex_convert_image_captions(const char *html, bool enable_image_captions,
|
|
|
2527
2900
|
|
|
2528
2901
|
const char *tag_end = p; /* Points at '>' */
|
|
2529
2902
|
|
|
2903
|
+
/* Skip img inside <picture> - picture's img is the fallback, don't wrap in figure */
|
|
2904
|
+
{
|
|
2905
|
+
bool inside_picture = false;
|
|
2906
|
+
const char *scan = tag_start - 1;
|
|
2907
|
+
while (scan >= html) {
|
|
2908
|
+
if (*scan == '<') {
|
|
2909
|
+
if (scan + 8 <= tag_start && strncasecmp(scan, "<picture", 8) == 0 &&
|
|
2910
|
+
(scan[8] == ' ' || scan[8] == '>' || scan[8] == '\t')) {
|
|
2911
|
+
inside_picture = true;
|
|
2912
|
+
break;
|
|
2913
|
+
}
|
|
2914
|
+
if (scan + 10 <= tag_start && strncmp(scan, "</picture>", 10) == 0) {
|
|
2915
|
+
break; /* Outside - we passed closing tag first */
|
|
2916
|
+
}
|
|
2917
|
+
/* Other tags (source, etc.) - keep scanning backwards */
|
|
2918
|
+
}
|
|
2919
|
+
scan--;
|
|
2920
|
+
}
|
|
2921
|
+
if (inside_picture) {
|
|
2922
|
+
size_t tag_len = (size_t)(tag_end - tag_start + 1);
|
|
2923
|
+
if (tag_len >= remaining) {
|
|
2924
|
+
size_t used = write - output;
|
|
2925
|
+
size_t new_cap = (used + tag_len + 1) * 2;
|
|
2926
|
+
char *new_out = realloc(output, new_cap);
|
|
2927
|
+
if (!new_out) { free(output); return NULL; }
|
|
2928
|
+
output = new_out;
|
|
2929
|
+
write = output + used;
|
|
2930
|
+
remaining = new_cap - used;
|
|
2931
|
+
}
|
|
2932
|
+
memcpy(write, tag_start, tag_len);
|
|
2933
|
+
write += tag_len;
|
|
2934
|
+
remaining -= tag_len;
|
|
2935
|
+
read = tag_end + 1;
|
|
2936
|
+
continue;
|
|
2937
|
+
}
|
|
2938
|
+
}
|
|
2939
|
+
|
|
2530
2940
|
/* Parse attributes between <img and > */
|
|
2531
2941
|
const char *attr_start = tag_start + 4;
|
|
2532
2942
|
const char *attr_end = tag_end;
|
|
@@ -2655,6 +3065,48 @@ char *apex_convert_image_captions(const char *html, bool enable_image_captions,
|
|
|
2655
3065
|
continue;
|
|
2656
3066
|
}
|
|
2657
3067
|
|
|
3068
|
+
/* Don't wrap in another <figure> if this image is already inside a <figure>
|
|
3069
|
+
* (e.g. from a fenced div ::: >figure), to avoid nested figure/figcaption. */
|
|
3070
|
+
{
|
|
3071
|
+
int figure_depth = 0;
|
|
3072
|
+
const char *scan = html;
|
|
3073
|
+
while (scan < tag_start) {
|
|
3074
|
+
if (*scan == '<') {
|
|
3075
|
+
if (scan + 8 <= tag_start &&
|
|
3076
|
+
(strncasecmp(scan + 1, "figure", 6) == 0) &&
|
|
3077
|
+
(scan[7] == '>' || isspace((unsigned char)scan[7]))) {
|
|
3078
|
+
figure_depth++;
|
|
3079
|
+
} else if (scan + 9 <= tag_start &&
|
|
3080
|
+
(strncasecmp(scan + 1, "/figure", 7) == 0) &&
|
|
3081
|
+
(scan[8] == '>' || isspace((unsigned char)scan[8]))) {
|
|
3082
|
+
if (figure_depth > 0) figure_depth--;
|
|
3083
|
+
}
|
|
3084
|
+
}
|
|
3085
|
+
scan++;
|
|
3086
|
+
}
|
|
3087
|
+
if (figure_depth > 0) {
|
|
3088
|
+
/* Already inside a figure - copy img tag as-is, no extra wrap */
|
|
3089
|
+
size_t tag_len = (size_t)(tag_end - tag_start + 1);
|
|
3090
|
+
if (tag_len >= remaining) {
|
|
3091
|
+
size_t used = write - output;
|
|
3092
|
+
size_t new_cap = (used + tag_len + 1) * 2;
|
|
3093
|
+
char *new_out = realloc(output, new_cap);
|
|
3094
|
+
if (!new_out) {
|
|
3095
|
+
free(output);
|
|
3096
|
+
return NULL;
|
|
3097
|
+
}
|
|
3098
|
+
output = new_out;
|
|
3099
|
+
write = output + used;
|
|
3100
|
+
remaining = new_cap - used;
|
|
3101
|
+
}
|
|
3102
|
+
memcpy(write, tag_start, tag_len);
|
|
3103
|
+
write += tag_len;
|
|
3104
|
+
remaining -= tag_len;
|
|
3105
|
+
read = tag_end + 1;
|
|
3106
|
+
continue;
|
|
3107
|
+
}
|
|
3108
|
+
}
|
|
3109
|
+
|
|
2658
3110
|
/* We have caption text - wrap in <figure><img ...><figcaption>...</figcaption></figure> */
|
|
2659
3111
|
const char *figure_open = "<figure>";
|
|
2660
3112
|
const char *figcaption_open = "<figcaption>";
|
|
@@ -2760,3 +3212,781 @@ char *apex_convert_image_captions(const char *html, bool enable_image_captions,
|
|
|
2760
3212
|
*write = '\0';
|
|
2761
3213
|
return output;
|
|
2762
3214
|
}
|
|
3215
|
+
|
|
3216
|
+
/**
|
|
3217
|
+
* Strip redundant <p> that wraps only a single <img> inside <figure>, and any
|
|
3218
|
+
* leading "< " (angle-prefix) so the result is <figure><img...></figure>.
|
|
3219
|
+
* Used when fenced div ::: >figure contains "< " which becomes
|
|
3220
|
+
* <figure><p>< <img...></p></figure>.
|
|
3221
|
+
*/
|
|
3222
|
+
char *apex_strip_figure_paragraph_wrapper(const char *html) {
|
|
3223
|
+
if (!html) return NULL;
|
|
3224
|
+
size_t len = strlen(html);
|
|
3225
|
+
const char *end = html + len;
|
|
3226
|
+
size_t capacity = len + 1;
|
|
3227
|
+
char *output = malloc(capacity);
|
|
3228
|
+
if (!output) return NULL;
|
|
3229
|
+
const char *read = html;
|
|
3230
|
+
char *write = output;
|
|
3231
|
+
size_t remaining = capacity;
|
|
3232
|
+
int figure_depth = 0;
|
|
3233
|
+
|
|
3234
|
+
while (*read) {
|
|
3235
|
+
/* Track when we're inside <figure>...</figure> */
|
|
3236
|
+
if (*read == '<') {
|
|
3237
|
+
const char *tag = read + 1;
|
|
3238
|
+
if (tag[0] != '/') {
|
|
3239
|
+
if ((strncasecmp(tag, "figure", 6) == 0) &&
|
|
3240
|
+
(tag[6] == '>' || isspace((unsigned char)tag[6])))
|
|
3241
|
+
figure_depth++;
|
|
3242
|
+
} else {
|
|
3243
|
+
if ((strncasecmp(tag + 1, "figure", 6) == 0) &&
|
|
3244
|
+
(tag[7] == '>' || isspace((unsigned char)tag[7])))
|
|
3245
|
+
figure_depth--;
|
|
3246
|
+
}
|
|
3247
|
+
}
|
|
3248
|
+
/* Look for <figure (with optional attributes) - copy it */
|
|
3249
|
+
if (*read == '<' && read[1] != '/') {
|
|
3250
|
+
const char *tag = read + 1;
|
|
3251
|
+
if ((strncasecmp(tag, "figure", 6) == 0) &&
|
|
3252
|
+
(tag[6] == '>' || isspace((unsigned char)tag[6]))) {
|
|
3253
|
+
while (*read && *read != '>') {
|
|
3254
|
+
if (remaining < 2) {
|
|
3255
|
+
size_t used = write - output;
|
|
3256
|
+
capacity = (used + len) * 2;
|
|
3257
|
+
char *n = realloc(output, capacity);
|
|
3258
|
+
if (!n) { free(output); return NULL; }
|
|
3259
|
+
output = n; write = output + used; remaining = capacity - used;
|
|
3260
|
+
}
|
|
3261
|
+
*write++ = *read++;
|
|
3262
|
+
remaining--;
|
|
3263
|
+
}
|
|
3264
|
+
if (*read == '>') {
|
|
3265
|
+
*write++ = *read++;
|
|
3266
|
+
remaining--;
|
|
3267
|
+
}
|
|
3268
|
+
continue;
|
|
3269
|
+
}
|
|
3270
|
+
/* Inside figure: look for <p that wraps only < + single <img> */
|
|
3271
|
+
if (figure_depth > 0 &&
|
|
3272
|
+
(strncasecmp(tag, "p", 1) == 0) &&
|
|
3273
|
+
(tag[1] == '>' || isspace((unsigned char)tag[1]))) {
|
|
3274
|
+
const char *p_open_end = read + 1;
|
|
3275
|
+
while (*p_open_end && *p_open_end != '>') p_open_end++;
|
|
3276
|
+
if (!*p_open_end) {
|
|
3277
|
+
*write++ = *read++;
|
|
3278
|
+
remaining--;
|
|
3279
|
+
continue;
|
|
3280
|
+
}
|
|
3281
|
+
p_open_end++; /* past '>' */
|
|
3282
|
+
const char *inner = p_open_end;
|
|
3283
|
+
/* Skip optional "<" or "< " and whitespace */
|
|
3284
|
+
while (*inner == ' ' || *inner == '\t' || *inner == '\n' || *inner == '\r') inner++;
|
|
3285
|
+
if (inner + 4 <= end && strncmp(inner, "<", 4) == 0) {
|
|
3286
|
+
inner += 4;
|
|
3287
|
+
while (*inner == ' ' || *inner == '\t' || *inner == '\n' || *inner == '\r') inner++;
|
|
3288
|
+
}
|
|
3289
|
+
/* Must be <img ...> */
|
|
3290
|
+
if (*inner != '<' || (inner[1] != 'i' && inner[1] != 'I') ||
|
|
3291
|
+
(inner[2] != 'm' && inner[2] != 'M') ||
|
|
3292
|
+
(inner[3] != 'g' && inner[3] != 'G') ||
|
|
3293
|
+
(inner[4] != ' ' && inner[4] != '\t' && inner[4] != '>' && inner[4] != '/')) {
|
|
3294
|
+
*write++ = *read++;
|
|
3295
|
+
remaining--;
|
|
3296
|
+
continue;
|
|
3297
|
+
}
|
|
3298
|
+
const char *img_start = inner;
|
|
3299
|
+
const char *img_end = inner + 4;
|
|
3300
|
+
while (*img_end && *img_end != '>') {
|
|
3301
|
+
if (*img_end == '"' || *img_end == '\'') {
|
|
3302
|
+
char q = *img_end++;
|
|
3303
|
+
while (*img_end && *img_end != q) img_end++;
|
|
3304
|
+
if (*img_end) img_end++;
|
|
3305
|
+
} else {
|
|
3306
|
+
img_end++;
|
|
3307
|
+
}
|
|
3308
|
+
}
|
|
3309
|
+
if (*img_end != '>') {
|
|
3310
|
+
*write++ = *read++;
|
|
3311
|
+
remaining--;
|
|
3312
|
+
continue;
|
|
3313
|
+
}
|
|
3314
|
+
img_end++; /* past '>' */
|
|
3315
|
+
/* Skip whitespace then must be </p> */
|
|
3316
|
+
const char *after_img = img_end;
|
|
3317
|
+
while (*after_img == ' ' || *after_img == '\t' || *after_img == '\n' || *after_img == '\r') after_img++;
|
|
3318
|
+
if (after_img + 5 <= end &&
|
|
3319
|
+
(after_img[0] == '<' && after_img[1] == '/' &&
|
|
3320
|
+
(after_img[2] == 'p' || after_img[2] == 'P') &&
|
|
3321
|
+
(after_img[3] == '>' || isspace((unsigned char)after_img[3])))) {
|
|
3322
|
+
const char *p_close = after_img + 3;
|
|
3323
|
+
while (*p_close && *p_close != '>') p_close++;
|
|
3324
|
+
if (*p_close == '>') p_close++;
|
|
3325
|
+
/* Replace entire <p>...</p> with just the <img> */
|
|
3326
|
+
size_t img_len = (size_t)(img_end - img_start);
|
|
3327
|
+
if (img_len >= remaining) {
|
|
3328
|
+
size_t used = write - output;
|
|
3329
|
+
capacity = (used + img_len + 1) * 2;
|
|
3330
|
+
char *n = realloc(output, capacity);
|
|
3331
|
+
if (!n) { free(output); return NULL; }
|
|
3332
|
+
output = n; write = output + used; remaining = capacity - used;
|
|
3333
|
+
}
|
|
3334
|
+
memcpy(write, img_start, img_len);
|
|
3335
|
+
write += img_len;
|
|
3336
|
+
remaining -= img_len;
|
|
3337
|
+
read = p_close;
|
|
3338
|
+
continue;
|
|
3339
|
+
}
|
|
3340
|
+
}
|
|
3341
|
+
}
|
|
3342
|
+
if (remaining < 2) {
|
|
3343
|
+
size_t used = write - output;
|
|
3344
|
+
capacity = (used + len) * 2;
|
|
3345
|
+
char *n = realloc(output, capacity);
|
|
3346
|
+
if (!n) { free(output); return NULL; }
|
|
3347
|
+
output = n; write = output + used; remaining = capacity - used;
|
|
3348
|
+
}
|
|
3349
|
+
*write++ = *read++;
|
|
3350
|
+
remaining--;
|
|
3351
|
+
}
|
|
3352
|
+
*write = '\0';
|
|
3353
|
+
return output;
|
|
3354
|
+
}
|
|
3355
|
+
|
|
3356
|
+
/**
|
|
3357
|
+
* Find the position of the matching closing tag for a block element.
|
|
3358
|
+
* Given pos pointing at "<figure" (or <video, <picture), returns pointer past "</figure>".
|
|
3359
|
+
* Uses depth counting for nested same-named tags. Returns NULL if not found.
|
|
3360
|
+
*/
|
|
3361
|
+
static const char *find_block_close(const char *pos, const char *end, const char *tag_name, size_t tag_len) {
|
|
3362
|
+
/* Skip past the opening tag to its '>' */
|
|
3363
|
+
const char *p = pos;
|
|
3364
|
+
while (p < end && *p != '>') {
|
|
3365
|
+
if (*p == '"' || *p == '\'') {
|
|
3366
|
+
char q = *p++;
|
|
3367
|
+
while (p < end && *p != q) p++;
|
|
3368
|
+
if (p < end) p++;
|
|
3369
|
+
} else {
|
|
3370
|
+
p++;
|
|
3371
|
+
}
|
|
3372
|
+
}
|
|
3373
|
+
if (p >= end || *p != '>') return NULL;
|
|
3374
|
+
p++; /* past '>' */
|
|
3375
|
+
int depth = 1;
|
|
3376
|
+
while (p < end && depth > 0) {
|
|
3377
|
+
const char *next = memchr(p, '<', (size_t)(end - p));
|
|
3378
|
+
if (!next) return NULL;
|
|
3379
|
+
p = next;
|
|
3380
|
+
if (p + 1 >= end) return NULL;
|
|
3381
|
+
if (p[1] == '/') {
|
|
3382
|
+
if (p + 2 + tag_len <= end &&
|
|
3383
|
+
strncasecmp(p + 2, tag_name, tag_len) == 0 &&
|
|
3384
|
+
(p[2 + tag_len] == '>' || isspace((unsigned char)p[2 + tag_len]))) {
|
|
3385
|
+
depth--;
|
|
3386
|
+
if (depth == 0) {
|
|
3387
|
+
const char *close = p + 2 + tag_len;
|
|
3388
|
+
while (close < end && *close != '>') close++;
|
|
3389
|
+
return (close < end && *close == '>') ? close + 1 : NULL;
|
|
3390
|
+
}
|
|
3391
|
+
}
|
|
3392
|
+
p++;
|
|
3393
|
+
} else if (p + 1 + tag_len <= end &&
|
|
3394
|
+
strncasecmp(p + 1, tag_name, tag_len) == 0 &&
|
|
3395
|
+
(p[1 + tag_len] == '>' || isspace((unsigned char)p[1 + tag_len]))) {
|
|
3396
|
+
depth++;
|
|
3397
|
+
p++;
|
|
3398
|
+
} else {
|
|
3399
|
+
p++;
|
|
3400
|
+
}
|
|
3401
|
+
}
|
|
3402
|
+
return NULL;
|
|
3403
|
+
}
|
|
3404
|
+
|
|
3405
|
+
/**
|
|
3406
|
+
* Strip <p> that wraps only a single block element (figure, video, picture).
|
|
3407
|
+
* HTML5 invalid: <p> may only contain phrasing content; figure/video/picture are flow content.
|
|
3408
|
+
* Transforms <p><figure>...</figure></p> -> <figure>...</figure>, etc.
|
|
3409
|
+
*/
|
|
3410
|
+
char *apex_strip_block_paragraph_wrapper(const char *html) {
|
|
3411
|
+
if (!html) return NULL;
|
|
3412
|
+
size_t len = strlen(html);
|
|
3413
|
+
const char *end = html + len;
|
|
3414
|
+
size_t capacity = len + 1;
|
|
3415
|
+
char *output = malloc(capacity);
|
|
3416
|
+
if (!output) return NULL;
|
|
3417
|
+
const char *read = html;
|
|
3418
|
+
char *write = output;
|
|
3419
|
+
size_t remaining = capacity;
|
|
3420
|
+
|
|
3421
|
+
while (*read) {
|
|
3422
|
+
if (*read == '<' && read[1] != '/' &&
|
|
3423
|
+
(strncasecmp(read + 1, "p", 1) == 0) &&
|
|
3424
|
+
(read[2] == '>' || isspace((unsigned char)read[2]))) {
|
|
3425
|
+
const char *p_open_end = read + 1;
|
|
3426
|
+
while (*p_open_end && *p_open_end != '>') p_open_end++;
|
|
3427
|
+
if (!*p_open_end || p_open_end >= end) {
|
|
3428
|
+
*write++ = *read++;
|
|
3429
|
+
remaining--;
|
|
3430
|
+
continue;
|
|
3431
|
+
}
|
|
3432
|
+
p_open_end++; /* past '>' */
|
|
3433
|
+
const char *inner = p_open_end;
|
|
3434
|
+
while (inner < end && (*inner == ' ' || *inner == '\t' || *inner == '\n' || *inner == '\r')) inner++;
|
|
3435
|
+
if (inner >= end || *inner != '<') {
|
|
3436
|
+
*write++ = *read++;
|
|
3437
|
+
remaining--;
|
|
3438
|
+
continue;
|
|
3439
|
+
}
|
|
3440
|
+
const char *tag_start = inner + 1;
|
|
3441
|
+
const char *block_close = NULL;
|
|
3442
|
+
if (inner + 7 <= end && strncasecmp(tag_start, "figure", 6) == 0 &&
|
|
3443
|
+
(tag_start[6] == '>' || isspace((unsigned char)tag_start[6]))) {
|
|
3444
|
+
block_close = find_block_close(inner, end, "figure", 6);
|
|
3445
|
+
} else if (inner + 6 <= end && strncasecmp(tag_start, "video", 5) == 0 &&
|
|
3446
|
+
(tag_start[5] == '>' || isspace((unsigned char)tag_start[5]))) {
|
|
3447
|
+
block_close = find_block_close(inner, end, "video", 5);
|
|
3448
|
+
} else if (inner + 8 <= end && strncasecmp(tag_start, "picture", 7) == 0 &&
|
|
3449
|
+
(tag_start[7] == '>' || isspace((unsigned char)tag_start[7]))) {
|
|
3450
|
+
block_close = find_block_close(inner, end, "picture", 7);
|
|
3451
|
+
}
|
|
3452
|
+
if (block_close) {
|
|
3453
|
+
const char *after_block = block_close;
|
|
3454
|
+
while (after_block < end && (*after_block == ' ' || *after_block == '\t' || *after_block == '\n' || *after_block == '\r')) after_block++;
|
|
3455
|
+
if (after_block + 4 <= end &&
|
|
3456
|
+
after_block[0] == '<' && after_block[1] == '/' &&
|
|
3457
|
+
(after_block[2] == 'p' || after_block[2] == 'P') &&
|
|
3458
|
+
(after_block[3] == '>' || isspace((unsigned char)after_block[3]))) {
|
|
3459
|
+
const char *p_close = after_block + 3;
|
|
3460
|
+
while (*p_close && *p_close != '>') p_close++;
|
|
3461
|
+
if (*p_close == '>') {
|
|
3462
|
+
p_close++;
|
|
3463
|
+
size_t block_size = (size_t)(block_close - inner);
|
|
3464
|
+
if (block_size >= remaining) {
|
|
3465
|
+
size_t used = (size_t)(write - output);
|
|
3466
|
+
capacity = used + block_size + 1024;
|
|
3467
|
+
char *n = realloc(output, capacity);
|
|
3468
|
+
if (!n) { free(output); return NULL; }
|
|
3469
|
+
output = n;
|
|
3470
|
+
write = output + used;
|
|
3471
|
+
remaining = capacity - used;
|
|
3472
|
+
}
|
|
3473
|
+
memcpy(write, inner, block_size);
|
|
3474
|
+
write += block_size;
|
|
3475
|
+
remaining -= block_size;
|
|
3476
|
+
read = p_close;
|
|
3477
|
+
continue;
|
|
3478
|
+
}
|
|
3479
|
+
}
|
|
3480
|
+
}
|
|
3481
|
+
}
|
|
3482
|
+
if (remaining < 2) {
|
|
3483
|
+
size_t used = (size_t)(write - output);
|
|
3484
|
+
capacity = (used + len) * 2;
|
|
3485
|
+
char *n = realloc(output, capacity);
|
|
3486
|
+
if (!n) { free(output); return NULL; }
|
|
3487
|
+
output = n;
|
|
3488
|
+
write = output + used;
|
|
3489
|
+
remaining = capacity - used;
|
|
3490
|
+
}
|
|
3491
|
+
*write++ = *read++;
|
|
3492
|
+
remaining--;
|
|
3493
|
+
}
|
|
3494
|
+
*write = '\0';
|
|
3495
|
+
return output;
|
|
3496
|
+
}
|
|
3497
|
+
|
|
3498
|
+
/**
|
|
3499
|
+
* Check if a local file exists (regular file).
|
|
3500
|
+
*/
|
|
3501
|
+
static bool file_exists(const char *path) {
|
|
3502
|
+
if (!path || !*path) return false;
|
|
3503
|
+
struct stat st;
|
|
3504
|
+
return (stat(path, &st) == 0 && S_ISREG(st.st_mode));
|
|
3505
|
+
}
|
|
3506
|
+
|
|
3507
|
+
/**
|
|
3508
|
+
* Resolve relative URL against base directory for filesystem checks.
|
|
3509
|
+
* Returns allocated path or NULL. Skips absolute and remote URLs.
|
|
3510
|
+
*/
|
|
3511
|
+
static char *resolve_path_for_check(const char *base_dir, const char *url) {
|
|
3512
|
+
if (!base_dir || !*base_dir || !url || !*url) return NULL;
|
|
3513
|
+
if (url[0] == '/') return NULL; /* Absolute path */
|
|
3514
|
+
if (strstr(url, "://")) return NULL; /* Remote URL */
|
|
3515
|
+
size_t len = strlen(base_dir) + strlen(url) + 2;
|
|
3516
|
+
char *out = malloc(len);
|
|
3517
|
+
if (!out) return NULL;
|
|
3518
|
+
snprintf(out, len, "%s/%s", base_dir, url);
|
|
3519
|
+
return out;
|
|
3520
|
+
}
|
|
3521
|
+
|
|
3522
|
+
/**
|
|
3523
|
+
* Insert @2x before extension in URL. Caller must free.
|
|
3524
|
+
*/
|
|
3525
|
+
static char *url_with_2x_suffix_auto(const char *url) {
|
|
3526
|
+
if (!url || !*url) return NULL;
|
|
3527
|
+
const char *path_end = strchr(url, '?');
|
|
3528
|
+
if (!path_end) path_end = strchr(url, '#');
|
|
3529
|
+
if (!path_end) path_end = url + strlen(url);
|
|
3530
|
+
const char *last_dot = NULL;
|
|
3531
|
+
for (const char *c = url; c < path_end; c++) {
|
|
3532
|
+
if (*c == '.') last_dot = c;
|
|
3533
|
+
}
|
|
3534
|
+
if (!last_dot) return NULL;
|
|
3535
|
+
size_t prefix_len = (size_t)(last_dot - url);
|
|
3536
|
+
size_t suffix_len = strlen(last_dot);
|
|
3537
|
+
char *out = malloc(prefix_len + 4 + suffix_len + 1);
|
|
3538
|
+
if (!out) return NULL;
|
|
3539
|
+
memcpy(out, url, prefix_len);
|
|
3540
|
+
memcpy(out + prefix_len, "@2x", 3);
|
|
3541
|
+
memcpy(out + prefix_len + 3, last_dot, suffix_len + 1);
|
|
3542
|
+
return out;
|
|
3543
|
+
}
|
|
3544
|
+
|
|
3545
|
+
/**
|
|
3546
|
+
* Insert @3x before extension in URL. Caller must free.
|
|
3547
|
+
*/
|
|
3548
|
+
static char *url_with_3x_suffix_auto(const char *url) {
|
|
3549
|
+
if (!url || !*url) return NULL;
|
|
3550
|
+
const char *path_end = strchr(url, '?');
|
|
3551
|
+
if (!path_end) path_end = strchr(url, '#');
|
|
3552
|
+
if (!path_end) path_end = url + strlen(url);
|
|
3553
|
+
const char *last_dot = NULL;
|
|
3554
|
+
for (const char *c = url; c < path_end; c++) {
|
|
3555
|
+
if (*c == '.') last_dot = c;
|
|
3556
|
+
}
|
|
3557
|
+
if (!last_dot) return NULL;
|
|
3558
|
+
size_t prefix_len = (size_t)(last_dot - url);
|
|
3559
|
+
size_t suffix_len = strlen(last_dot);
|
|
3560
|
+
char *out = malloc(prefix_len + 4 + suffix_len + 1);
|
|
3561
|
+
if (!out) return NULL;
|
|
3562
|
+
memcpy(out, url, prefix_len);
|
|
3563
|
+
memcpy(out + prefix_len, "@3x", 3);
|
|
3564
|
+
memcpy(out + prefix_len + 3, last_dot, suffix_len + 1);
|
|
3565
|
+
return out;
|
|
3566
|
+
}
|
|
3567
|
+
|
|
3568
|
+
/**
|
|
3569
|
+
* Check if URL ends with .* (wildcard extension for auto-discover).
|
|
3570
|
+
*/
|
|
3571
|
+
static bool url_ends_with_wildcard(const char *url) {
|
|
3572
|
+
if (!url || !*url) return false;
|
|
3573
|
+
size_t len = strlen(url);
|
|
3574
|
+
return (len >= 2 && url[len - 2] == '.' && url[len - 1] == '*');
|
|
3575
|
+
}
|
|
3576
|
+
|
|
3577
|
+
/**
|
|
3578
|
+
* For URL ending in .*, get base path (everything before .*). Caller must free.
|
|
3579
|
+
*/
|
|
3580
|
+
static char *base_from_wildcard_url(const char *url) {
|
|
3581
|
+
if (!url || !*url) return NULL;
|
|
3582
|
+
size_t len = strlen(url);
|
|
3583
|
+
if (len < 2 || url[len - 2] != '.' || url[len - 1] != '*') return NULL;
|
|
3584
|
+
char *base = malloc(len - 1);
|
|
3585
|
+
if (!base) return NULL;
|
|
3586
|
+
memcpy(base, url, len - 2);
|
|
3587
|
+
base[len - 2] = '\0';
|
|
3588
|
+
return base;
|
|
3589
|
+
}
|
|
3590
|
+
|
|
3591
|
+
/**
|
|
3592
|
+
* Check if URL has video extension (mp4, mov, webm, ogg, ogv, m4v).
|
|
3593
|
+
*/
|
|
3594
|
+
static bool is_video_url_auto(const char *url) {
|
|
3595
|
+
if (!url || !*url) return false;
|
|
3596
|
+
const char *path_end = strchr(url, '?');
|
|
3597
|
+
if (!path_end) path_end = strchr(url, '#');
|
|
3598
|
+
if (!path_end) path_end = url + strlen(url);
|
|
3599
|
+
const char *last_dot = NULL;
|
|
3600
|
+
for (const char *c = url; c < path_end; c++) {
|
|
3601
|
+
if (*c == '.') last_dot = c;
|
|
3602
|
+
}
|
|
3603
|
+
if (!last_dot || last_dot >= path_end - 1) return false;
|
|
3604
|
+
const char *ext = last_dot + 1;
|
|
3605
|
+
size_t ext_len = (size_t)(path_end - ext);
|
|
3606
|
+
if (ext_len == 3 && strncasecmp(ext, "mp4", 3) == 0) return true;
|
|
3607
|
+
if (ext_len == 3 && strncasecmp(ext, "mov", 3) == 0) return true;
|
|
3608
|
+
if (ext_len == 4 && strncasecmp(ext, "webm", 4) == 0) return true;
|
|
3609
|
+
if (ext_len == 3 && strncasecmp(ext, "ogg", 3) == 0) return true;
|
|
3610
|
+
if (ext_len == 3 && strncasecmp(ext, "ogv", 3) == 0) return true;
|
|
3611
|
+
if (ext_len == 3 && strncasecmp(ext, "m4v", 3) == 0) return true;
|
|
3612
|
+
return false;
|
|
3613
|
+
}
|
|
3614
|
+
|
|
3615
|
+
/**
|
|
3616
|
+
* Expand img tags with data-apex-replace-auto=1 by discovering existing
|
|
3617
|
+
* format variants on disk and generating appropriate <picture> or <video>.
|
|
3618
|
+
* Only processes local (relative) URLs when base_directory is provided.
|
|
3619
|
+
* Caller must free the returned string.
|
|
3620
|
+
*/
|
|
3621
|
+
char *apex_expand_auto_media(const char *html, const char *base_directory) {
|
|
3622
|
+
if (!html) return NULL;
|
|
3623
|
+
if (!base_directory || !*base_directory) return strdup(html);
|
|
3624
|
+
|
|
3625
|
+
size_t len = strlen(html);
|
|
3626
|
+
size_t capacity = len * 2 + 2048;
|
|
3627
|
+
char *output = malloc(capacity);
|
|
3628
|
+
if (!output) return NULL;
|
|
3629
|
+
|
|
3630
|
+
const char *read = html;
|
|
3631
|
+
char *write = output;
|
|
3632
|
+
size_t remaining = capacity;
|
|
3633
|
+
|
|
3634
|
+
while (*read) {
|
|
3635
|
+
if (*read == '<' && (read[1] == 'i' || read[1] == 'I') &&
|
|
3636
|
+
(read[2] == 'm' || read[2] == 'M') && (read[3] == 'g' || read[3] == 'G') &&
|
|
3637
|
+
(read[4] == ' ' || read[4] == '\t' || read[4] == '>' || read[4] == '/')) {
|
|
3638
|
+
|
|
3639
|
+
const char *tag_start = read;
|
|
3640
|
+
const char *tag_end = find_tag_end(tag_start);
|
|
3641
|
+
if (!tag_end) {
|
|
3642
|
+
*write++ = *read++;
|
|
3643
|
+
remaining--;
|
|
3644
|
+
continue;
|
|
3645
|
+
}
|
|
3646
|
+
|
|
3647
|
+
/* Check for data-apex-replace-auto=1 */
|
|
3648
|
+
if (!strstr(tag_start, "data-apex-replace-auto=1")) {
|
|
3649
|
+
size_t tag_len = (size_t)(tag_end - tag_start + 1);
|
|
3650
|
+
if (tag_len >= remaining) {
|
|
3651
|
+
size_t used = (size_t)(write - output);
|
|
3652
|
+
capacity = used + tag_len + 2048;
|
|
3653
|
+
char *new_out = realloc(output, capacity);
|
|
3654
|
+
if (!new_out) { free(output); return NULL; }
|
|
3655
|
+
output = new_out;
|
|
3656
|
+
write = output + used;
|
|
3657
|
+
remaining = capacity - used;
|
|
3658
|
+
}
|
|
3659
|
+
memcpy(write, tag_start, tag_len);
|
|
3660
|
+
write += tag_len;
|
|
3661
|
+
remaining -= tag_len;
|
|
3662
|
+
read = tag_end + 1;
|
|
3663
|
+
continue;
|
|
3664
|
+
}
|
|
3665
|
+
|
|
3666
|
+
char *src = extract_attr_from_tag(tag_start, tag_end + 1, "src");
|
|
3667
|
+
char *alt = extract_attr_from_tag(tag_start, tag_end + 1, "alt");
|
|
3668
|
+
char *title = extract_attr_from_tag(tag_start, tag_end + 1, "title");
|
|
3669
|
+
if (!src) src = strdup("");
|
|
3670
|
+
if (!alt) alt = strdup("");
|
|
3671
|
+
|
|
3672
|
+
char *replacement = NULL;
|
|
3673
|
+
size_t repl_len = 0;
|
|
3674
|
+
|
|
3675
|
+
/* When src ends with .*, discover first existing file to use as fallback */
|
|
3676
|
+
char *effective_src = strdup(src ? src : "");
|
|
3677
|
+
char *resolved = resolve_path_for_check(base_directory, effective_src);
|
|
3678
|
+
if (url_ends_with_wildcard(src)) {
|
|
3679
|
+
char *base = base_from_wildcard_url(src);
|
|
3680
|
+
if (base) {
|
|
3681
|
+
/* Check video extensions first, then image extensions.
|
|
3682
|
+
* url_with_extension(src, ext) works: "image.*" -> "image.jpg" */
|
|
3683
|
+
static const char *video_exts[] = {"mp4", "webm", "ogg", "ogv", "mov", "m4v", NULL};
|
|
3684
|
+
static const char *image_exts[] = {"jpg", "jpeg", "png", "gif", "webp", "avif", NULL};
|
|
3685
|
+
bool found = false;
|
|
3686
|
+
for (int i = 0; video_exts[i] && !found; i++) {
|
|
3687
|
+
char *candidate = url_with_extension(src, video_exts[i]);
|
|
3688
|
+
if (candidate) {
|
|
3689
|
+
char *cpath = resolve_path_for_check(base_directory, candidate);
|
|
3690
|
+
if (cpath && file_exists(cpath)) {
|
|
3691
|
+
free(effective_src);
|
|
3692
|
+
effective_src = candidate;
|
|
3693
|
+
free(resolved);
|
|
3694
|
+
resolved = cpath;
|
|
3695
|
+
found = true;
|
|
3696
|
+
} else {
|
|
3697
|
+
free(cpath);
|
|
3698
|
+
free(candidate);
|
|
3699
|
+
}
|
|
3700
|
+
}
|
|
3701
|
+
}
|
|
3702
|
+
for (int i = 0; image_exts[i] && !found; i++) {
|
|
3703
|
+
char *candidate = url_with_extension(src, image_exts[i]);
|
|
3704
|
+
if (candidate) {
|
|
3705
|
+
char *cpath = resolve_path_for_check(base_directory, candidate);
|
|
3706
|
+
if (cpath && file_exists(cpath)) {
|
|
3707
|
+
free(effective_src);
|
|
3708
|
+
effective_src = candidate;
|
|
3709
|
+
free(resolved);
|
|
3710
|
+
resolved = cpath;
|
|
3711
|
+
found = true;
|
|
3712
|
+
} else {
|
|
3713
|
+
free(cpath);
|
|
3714
|
+
free(candidate);
|
|
3715
|
+
}
|
|
3716
|
+
}
|
|
3717
|
+
}
|
|
3718
|
+
free(base);
|
|
3719
|
+
if (!found) {
|
|
3720
|
+
free(resolved);
|
|
3721
|
+
resolved = NULL;
|
|
3722
|
+
}
|
|
3723
|
+
}
|
|
3724
|
+
} else if (resolved && !file_exists(resolved)) {
|
|
3725
|
+
free(resolved);
|
|
3726
|
+
resolved = NULL;
|
|
3727
|
+
}
|
|
3728
|
+
|
|
3729
|
+
if (resolved && file_exists(resolved)) {
|
|
3730
|
+
/* Use effective_src (may differ from src when wildcard was resolved) */
|
|
3731
|
+
free(src);
|
|
3732
|
+
src = effective_src;
|
|
3733
|
+
if (is_video_url_auto(src)) {
|
|
3734
|
+
/* Video: discover alternative formats that exist */
|
|
3735
|
+
static const char *video_exts[] = {"webm", "ogg", "mp4", "mov", "m4v", NULL};
|
|
3736
|
+
size_t cap = 512 + strlen(src) * 6;
|
|
3737
|
+
replacement = malloc(cap);
|
|
3738
|
+
if (replacement) {
|
|
3739
|
+
char *w = replacement;
|
|
3740
|
+
w += snprintf(w, cap, "<video");
|
|
3741
|
+
if (alt && *alt) w += snprintf(w, cap - (size_t)(w - replacement), " title=\"%s\"", alt);
|
|
3742
|
+
w += snprintf(w, cap - (size_t)(w - replacement), ">");
|
|
3743
|
+
|
|
3744
|
+
for (int i = 0; video_exts[i]; i++) {
|
|
3745
|
+
char *variant_url = url_with_extension(src, video_exts[i]);
|
|
3746
|
+
if (variant_url) {
|
|
3747
|
+
char *variant_path = resolve_path_for_check(base_directory, variant_url);
|
|
3748
|
+
if (variant_path && file_exists(variant_path)) {
|
|
3749
|
+
const char *mime = (strcmp(video_exts[i], "webm") == 0) ? "video/webm" :
|
|
3750
|
+
(strcmp(video_exts[i], "ogg") == 0) ? "video/ogg" :
|
|
3751
|
+
(strcmp(video_exts[i], "mov") == 0) ? "video/quicktime" : "video/mp4";
|
|
3752
|
+
w += snprintf(w, cap - (size_t)(w - replacement),
|
|
3753
|
+
"<source src=\"%s\" type=\"%s\">", variant_url, mime);
|
|
3754
|
+
}
|
|
3755
|
+
free(variant_path);
|
|
3756
|
+
free(variant_url);
|
|
3757
|
+
}
|
|
3758
|
+
}
|
|
3759
|
+
w += snprintf(w, cap - (size_t)(w - replacement),
|
|
3760
|
+
"<source src=\"%s\" type=\"%s\">", src, video_type_from_url(src));
|
|
3761
|
+
w += snprintf(w, cap - (size_t)(w - replacement), "</video>");
|
|
3762
|
+
repl_len = (size_t)(w - replacement);
|
|
3763
|
+
}
|
|
3764
|
+
} else {
|
|
3765
|
+
/* Image: discover 2x, 3x, webp, avif variants */
|
|
3766
|
+
bool has_2x = false, has_3x = false;
|
|
3767
|
+
bool has_webp_1x = false, has_webp_2x = false, has_webp_3x = false;
|
|
3768
|
+
bool has_avif_1x = false, has_avif_2x = false, has_avif_3x = false;
|
|
3769
|
+
|
|
3770
|
+
char *url_2x = url_with_2x_suffix_auto(src);
|
|
3771
|
+
char *url_3x = url_with_3x_suffix_auto(src);
|
|
3772
|
+
if (url_2x) {
|
|
3773
|
+
char *p2 = resolve_path_for_check(base_directory, url_2x);
|
|
3774
|
+
has_2x = (p2 && file_exists(p2));
|
|
3775
|
+
free(p2);
|
|
3776
|
+
}
|
|
3777
|
+
if (url_3x) {
|
|
3778
|
+
char *p3 = resolve_path_for_check(base_directory, url_3x);
|
|
3779
|
+
has_3x = (p3 && file_exists(p3));
|
|
3780
|
+
free(p3);
|
|
3781
|
+
}
|
|
3782
|
+
|
|
3783
|
+
char *webp_1x = url_with_extension(src, "webp");
|
|
3784
|
+
if (webp_1x) {
|
|
3785
|
+
char *p = resolve_path_for_check(base_directory, webp_1x);
|
|
3786
|
+
has_webp_1x = (p && file_exists(p));
|
|
3787
|
+
free(p);
|
|
3788
|
+
}
|
|
3789
|
+
if (url_2x && webp_1x) {
|
|
3790
|
+
char *webp_2x = url_with_extension(url_2x, "webp");
|
|
3791
|
+
if (webp_2x) {
|
|
3792
|
+
char *p = resolve_path_for_check(base_directory, webp_2x);
|
|
3793
|
+
has_webp_2x = (p && file_exists(p));
|
|
3794
|
+
free(p);
|
|
3795
|
+
free(webp_2x);
|
|
3796
|
+
}
|
|
3797
|
+
}
|
|
3798
|
+
if (url_3x && webp_1x) {
|
|
3799
|
+
char *webp_3x = url_with_extension(url_3x, "webp");
|
|
3800
|
+
if (webp_3x) {
|
|
3801
|
+
char *p = resolve_path_for_check(base_directory, webp_3x);
|
|
3802
|
+
has_webp_3x = (p && file_exists(p));
|
|
3803
|
+
free(p);
|
|
3804
|
+
free(webp_3x);
|
|
3805
|
+
}
|
|
3806
|
+
}
|
|
3807
|
+
free(webp_1x);
|
|
3808
|
+
|
|
3809
|
+
char *avif_1x = url_with_extension(src, "avif");
|
|
3810
|
+
if (avif_1x) {
|
|
3811
|
+
char *p = resolve_path_for_check(base_directory, avif_1x);
|
|
3812
|
+
has_avif_1x = (p && file_exists(p));
|
|
3813
|
+
free(p);
|
|
3814
|
+
}
|
|
3815
|
+
if (url_2x && avif_1x) {
|
|
3816
|
+
char *avif_2x = url_with_extension(url_2x, "avif");
|
|
3817
|
+
if (avif_2x) {
|
|
3818
|
+
char *p = resolve_path_for_check(base_directory, avif_2x);
|
|
3819
|
+
has_avif_2x = (p && file_exists(p));
|
|
3820
|
+
free(p);
|
|
3821
|
+
free(avif_2x);
|
|
3822
|
+
}
|
|
3823
|
+
}
|
|
3824
|
+
if (url_3x && avif_1x) {
|
|
3825
|
+
char *avif_3x = url_with_extension(url_3x, "avif");
|
|
3826
|
+
if (avif_3x) {
|
|
3827
|
+
char *p = resolve_path_for_check(base_directory, avif_3x);
|
|
3828
|
+
has_avif_3x = (p && file_exists(p));
|
|
3829
|
+
free(p);
|
|
3830
|
+
free(avif_3x);
|
|
3831
|
+
}
|
|
3832
|
+
}
|
|
3833
|
+
free(avif_1x);
|
|
3834
|
+
free(url_2x);
|
|
3835
|
+
free(url_3x);
|
|
3836
|
+
|
|
3837
|
+
bool need_picture = has_webp_1x || has_webp_2x || has_webp_3x ||
|
|
3838
|
+
has_avif_1x || has_avif_2x || has_avif_3x;
|
|
3839
|
+
bool need_srcset = has_2x || has_3x;
|
|
3840
|
+
|
|
3841
|
+
if (need_picture || need_srcset) {
|
|
3842
|
+
size_t cap = 1024 + strlen(src) * 8;
|
|
3843
|
+
replacement = malloc(cap);
|
|
3844
|
+
if (replacement) {
|
|
3845
|
+
char *w = replacement;
|
|
3846
|
+
if (need_picture) w += snprintf(w, cap, "<picture>");
|
|
3847
|
+
|
|
3848
|
+
/* AVIF first (preferred), then WebP */
|
|
3849
|
+
if (has_avif_1x || has_avif_2x || has_avif_3x) {
|
|
3850
|
+
char *av1 = url_with_extension(src, "avif");
|
|
3851
|
+
char *s2 = url_with_2x_suffix_auto(src);
|
|
3852
|
+
char *av2 = s2 ? url_with_extension(s2, "avif") : NULL;
|
|
3853
|
+
free(s2);
|
|
3854
|
+
char *s3 = url_with_3x_suffix_auto(src);
|
|
3855
|
+
char *av3 = s3 ? url_with_extension(s3, "avif") : NULL;
|
|
3856
|
+
free(s3);
|
|
3857
|
+
char srcset[512] = "";
|
|
3858
|
+
if (av1) snprintf(srcset, sizeof(srcset), "%s 1x", av1);
|
|
3859
|
+
if (av2 && has_avif_2x) {
|
|
3860
|
+
size_t l = strlen(srcset);
|
|
3861
|
+
snprintf(srcset + l, sizeof(srcset) - l, "%s%s 2x", l ? ", " : "", av2);
|
|
3862
|
+
}
|
|
3863
|
+
if (av3 && has_avif_3x) {
|
|
3864
|
+
size_t l = strlen(srcset);
|
|
3865
|
+
snprintf(srcset + l, sizeof(srcset) - l, "%s%s 3x", l ? ", " : "", av3);
|
|
3866
|
+
}
|
|
3867
|
+
if (*srcset) w += snprintf(w, cap - (size_t)(w - replacement),
|
|
3868
|
+
"<source type=\"image/avif\" srcset=\"%s\">", srcset);
|
|
3869
|
+
free(av1); free(av2); free(av3);
|
|
3870
|
+
}
|
|
3871
|
+
if (has_webp_1x || has_webp_2x || has_webp_3x) {
|
|
3872
|
+
char *wb1 = url_with_extension(src, "webp");
|
|
3873
|
+
char *s2 = url_with_2x_suffix_auto(src);
|
|
3874
|
+
char *wb2 = s2 ? url_with_extension(s2, "webp") : NULL;
|
|
3875
|
+
free(s2);
|
|
3876
|
+
char *s3 = url_with_3x_suffix_auto(src);
|
|
3877
|
+
char *wb3 = s3 ? url_with_extension(s3, "webp") : NULL;
|
|
3878
|
+
free(s3);
|
|
3879
|
+
char srcset[512] = "";
|
|
3880
|
+
if (wb1) snprintf(srcset, sizeof(srcset), "%s 1x", wb1);
|
|
3881
|
+
if (wb2 && has_webp_2x) {
|
|
3882
|
+
size_t l = strlen(srcset);
|
|
3883
|
+
snprintf(srcset + l, sizeof(srcset) - l, "%s%s 2x", l ? ", " : "", wb2);
|
|
3884
|
+
}
|
|
3885
|
+
if (wb3 && has_webp_3x) {
|
|
3886
|
+
size_t l = strlen(srcset);
|
|
3887
|
+
snprintf(srcset + l, sizeof(srcset) - l, "%s%s 3x", l ? ", " : "", wb3);
|
|
3888
|
+
}
|
|
3889
|
+
if (*srcset) w += snprintf(w, cap - (size_t)(w - replacement),
|
|
3890
|
+
"<source type=\"image/webp\" srcset=\"%s\">", srcset);
|
|
3891
|
+
free(wb1); free(wb2); free(wb3);
|
|
3892
|
+
}
|
|
3893
|
+
|
|
3894
|
+
/* Build img with optional srcset for 2x/3x */
|
|
3895
|
+
char srcset_attr[512] = "";
|
|
3896
|
+
if (need_srcset) {
|
|
3897
|
+
char *u2 = url_with_2x_suffix_auto(src);
|
|
3898
|
+
char *u3 = url_with_3x_suffix_auto(src);
|
|
3899
|
+
snprintf(srcset_attr, sizeof(srcset_attr), " srcset=\"%s 1x", src);
|
|
3900
|
+
if (has_2x && u2) {
|
|
3901
|
+
size_t l = strlen(srcset_attr);
|
|
3902
|
+
snprintf(srcset_attr + l, sizeof(srcset_attr) - l, ", %s 2x", u2);
|
|
3903
|
+
}
|
|
3904
|
+
if (has_3x && u3) {
|
|
3905
|
+
size_t l = strlen(srcset_attr);
|
|
3906
|
+
snprintf(srcset_attr + l, sizeof(srcset_attr) - l, ", %s 3x", u3);
|
|
3907
|
+
}
|
|
3908
|
+
strcat(srcset_attr, "\"");
|
|
3909
|
+
free(u2); free(u3);
|
|
3910
|
+
}
|
|
3911
|
+
/* Preserve title on img for caption logic */
|
|
3912
|
+
if (title && *title) {
|
|
3913
|
+
w += snprintf(w, cap - (size_t)(w - replacement),
|
|
3914
|
+
"<img src=\"%s\" alt=\"%s\" title=\"%s\"%s>%s",
|
|
3915
|
+
src, alt && *alt ? alt : "", title, srcset_attr,
|
|
3916
|
+
need_picture ? "</picture>" : "");
|
|
3917
|
+
} else {
|
|
3918
|
+
w += snprintf(w, cap - (size_t)(w - replacement),
|
|
3919
|
+
"<img src=\"%s\" alt=\"%s\"%s>%s",
|
|
3920
|
+
src, alt && *alt ? alt : "", srcset_attr,
|
|
3921
|
+
need_picture ? "</picture>" : "");
|
|
3922
|
+
}
|
|
3923
|
+
repl_len = (size_t)(w - replacement);
|
|
3924
|
+
}
|
|
3925
|
+
}
|
|
3926
|
+
}
|
|
3927
|
+
free(resolved);
|
|
3928
|
+
}
|
|
3929
|
+
|
|
3930
|
+
if (replacement && repl_len > 0) {
|
|
3931
|
+
if (repl_len > remaining) {
|
|
3932
|
+
size_t used = (size_t)(write - output);
|
|
3933
|
+
capacity = used + repl_len + 1024;
|
|
3934
|
+
char *new_out = realloc(output, capacity);
|
|
3935
|
+
if (!new_out) { free(output); free(replacement); free(src); free(alt); free(title); return NULL; }
|
|
3936
|
+
output = new_out;
|
|
3937
|
+
write = output + used;
|
|
3938
|
+
remaining = capacity - used;
|
|
3939
|
+
}
|
|
3940
|
+
memcpy(write, replacement, repl_len);
|
|
3941
|
+
write += repl_len;
|
|
3942
|
+
remaining -= repl_len;
|
|
3943
|
+
read = tag_end + 1;
|
|
3944
|
+
} else {
|
|
3945
|
+
/* Copy original tag */
|
|
3946
|
+
size_t tag_len = (size_t)(tag_end - tag_start + 1);
|
|
3947
|
+
if (tag_len >= remaining) {
|
|
3948
|
+
size_t used = (size_t)(write - output);
|
|
3949
|
+
capacity = used + tag_len + 1024;
|
|
3950
|
+
char *new_out = realloc(output, capacity);
|
|
3951
|
+
if (!new_out) { free(output); free(replacement); free(src); free(alt); free(title); return NULL; }
|
|
3952
|
+
output = new_out;
|
|
3953
|
+
write = output + used;
|
|
3954
|
+
remaining = capacity - used;
|
|
3955
|
+
}
|
|
3956
|
+
memcpy(write, tag_start, tag_len);
|
|
3957
|
+
write += tag_len;
|
|
3958
|
+
remaining -= tag_len;
|
|
3959
|
+
read = tag_end + 1;
|
|
3960
|
+
}
|
|
3961
|
+
|
|
3962
|
+
free(replacement);
|
|
3963
|
+
if (effective_src != src) free(effective_src);
|
|
3964
|
+
free(src);
|
|
3965
|
+
free(alt);
|
|
3966
|
+
free(title);
|
|
3967
|
+
continue;
|
|
3968
|
+
}
|
|
3969
|
+
|
|
3970
|
+
if (remaining < 2) {
|
|
3971
|
+
size_t used = (size_t)(write - output);
|
|
3972
|
+
capacity = used + len + 1024;
|
|
3973
|
+
char *new_out = realloc(output, capacity);
|
|
3974
|
+
if (!new_out) { free(output); return NULL; }
|
|
3975
|
+
output = new_out;
|
|
3976
|
+
write = output + used;
|
|
3977
|
+
remaining = capacity - used;
|
|
3978
|
+
}
|
|
3979
|
+
*write++ = *read++;
|
|
3980
|
+
remaining--;
|
|
3981
|
+
}
|
|
3982
|
+
|
|
3983
|
+
if (remaining < 1) {
|
|
3984
|
+
size_t used = (size_t)(write - output);
|
|
3985
|
+
char *new_out = realloc(output, used + 1);
|
|
3986
|
+
if (!new_out) { free(output); return NULL; }
|
|
3987
|
+
output = new_out;
|
|
3988
|
+
write = output + used;
|
|
3989
|
+
}
|
|
3990
|
+
*write = '\0';
|
|
3991
|
+
return output;
|
|
3992
|
+
}
|