apex-ruby 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/ext/apex_ext/apex_ext.c +6 -0
  3. data/ext/apex_ext/apex_src/AGENTS.md +41 -0
  4. data/ext/apex_ext/apex_src/CHANGELOG.md +412 -2
  5. data/ext/apex_ext/apex_src/CMakeLists.txt +41 -29
  6. data/ext/apex_ext/apex_src/Formula/apex.rb +2 -2
  7. data/ext/apex_ext/apex_src/Package.swift +9 -0
  8. data/ext/apex_ext/apex_src/README.md +31 -9
  9. data/ext/apex_ext/apex_src/ROADMAP.md +5 -0
  10. data/ext/apex_ext/apex_src/VERSION +1 -1
  11. data/ext/apex_ext/apex_src/cli/main.c +1125 -13
  12. data/ext/apex_ext/apex_src/docs/index.md +459 -0
  13. data/ext/apex_ext/apex_src/include/apex/apex.h +67 -5
  14. data/ext/apex_ext/apex_src/include/apex/ast_man.h +20 -0
  15. data/ext/apex_ext/apex_src/include/apex/ast_markdown.h +39 -0
  16. data/ext/apex_ext/apex_src/include/apex/ast_terminal.h +40 -0
  17. data/ext/apex_ext/apex_src/include/apex/module.modulemap +1 -1
  18. data/ext/apex_ext/apex_src/man/apex-config.5 +333 -258
  19. data/ext/apex_ext/apex_src/man/apex-config.5.md +3 -1
  20. data/ext/apex_ext/apex_src/man/apex-plugins.7 +401 -316
  21. data/ext/apex_ext/apex_src/man/apex.1 +663 -620
  22. data/ext/apex_ext/apex_src/man/apex.1.html +703 -0
  23. data/ext/apex_ext/apex_src/man/apex.1.md +160 -90
  24. data/ext/apex_ext/apex_src/objc/Apex.swift +6 -0
  25. data/ext/apex_ext/apex_src/objc/NSString+Apex.h +12 -0
  26. data/ext/apex_ext/apex_src/objc/NSString+Apex.m +9 -0
  27. data/ext/apex_ext/apex_src/pages/index.md +459 -0
  28. data/ext/apex_ext/apex_src/src/_README.md +4 -4
  29. data/ext/apex_ext/apex_src/src/apex.c +702 -44
  30. data/ext/apex_ext/apex_src/src/ast_json.c +1130 -0
  31. data/ext/apex_ext/apex_src/src/ast_json.h +46 -0
  32. data/ext/apex_ext/apex_src/src/ast_man.c +948 -0
  33. data/ext/apex_ext/apex_src/src/ast_markdown.c +409 -0
  34. data/ext/apex_ext/apex_src/src/ast_terminal.c +2516 -0
  35. data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +8 -5
  36. data/ext/apex_ext/apex_src/src/extensions/definition_list.c +491 -1514
  37. data/ext/apex_ext/apex_src/src/extensions/definition_list.h +8 -15
  38. data/ext/apex_ext/apex_src/src/extensions/emoji.c +207 -0
  39. data/ext/apex_ext/apex_src/src/extensions/emoji.h +14 -0
  40. data/ext/apex_ext/apex_src/src/extensions/header_ids.c +178 -71
  41. data/ext/apex_ext/apex_src/src/extensions/highlight.c +37 -5
  42. data/ext/apex_ext/apex_src/src/extensions/ial.c +416 -47
  43. data/ext/apex_ext/apex_src/src/extensions/includes.c +241 -10
  44. data/ext/apex_ext/apex_src/src/extensions/includes.h +1 -0
  45. data/ext/apex_ext/apex_src/src/extensions/metadata.c +166 -3
  46. data/ext/apex_ext/apex_src/src/extensions/metadata.h +7 -0
  47. data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +34 -3
  48. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +55 -10
  49. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +7 -4
  50. data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +84 -52
  51. data/ext/apex_ext/apex_src/src/extensions/toc.c +133 -19
  52. data/ext/apex_ext/apex_src/src/filters_ast.c +194 -0
  53. data/ext/apex_ext/apex_src/src/filters_ast.h +36 -0
  54. data/ext/apex_ext/apex_src/src/html_renderer.c +1265 -35
  55. data/ext/apex_ext/apex_src/src/html_renderer.h +21 -0
  56. data/ext/apex_ext/apex_src/src/plugins_remote.c +40 -14
  57. data/ext/apex_ext/apex_src/tests/CMakeLists.txt +1 -0
  58. data/ext/apex_ext/apex_src/tests/README.md +11 -5
  59. data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +13 -2
  60. data/ext/apex_ext/apex_src/tests/fixtures/filters/filter_output_with_rawblock.json +1 -0
  61. data/ext/apex_ext/apex_src/tests/fixtures/filters/unwrap.md +7 -0
  62. data/ext/apex_ext/apex_src/tests/fixtures/images/auto-wildcard.md +8 -0
  63. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.avif +0 -0
  64. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.jpg +0 -0
  65. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.webp +0 -0
  66. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.avif +0 -0
  67. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.jpg +0 -0
  68. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.webp +0 -0
  69. data/ext/apex_ext/apex_src/tests/fixtures/images/media_formats_test.md +63 -0
  70. data/ext/apex_ext/apex_src/tests/fixtures/includes/data-semi.csv +3 -0
  71. data/ext/apex_ext/apex_src/tests/fixtures/includes/with space.txt +1 -0
  72. data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +4 -1
  73. data/ext/apex_ext/apex_src/tests/paginate_cli_test.sh +64 -0
  74. data/ext/apex_ext/apex_src/tests/terminal_width_test.sh +29 -0
  75. data/ext/apex_ext/apex_src/tests/test-swift-package.sh +14 -0
  76. data/ext/apex_ext/apex_src/tests/test_cmark_callback.c +189 -0
  77. data/ext/apex_ext/apex_src/tests/test_extensions.c +374 -0
  78. data/ext/apex_ext/apex_src/tests/test_metadata.c +68 -0
  79. data/ext/apex_ext/apex_src/tests/test_output.c +291 -2
  80. data/ext/apex_ext/apex_src/tests/test_runner.c +10 -0
  81. data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +1 -1
  82. data/ext/apex_ext/apex_src/tests/test_tables.c +17 -1
  83. data/lib/apex/version.rb +1 -1
  84. metadata +32 -2
  85. data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +0 -456
@@ -1,12 +1,11 @@
1
1
  #include "apex/apex.h"
2
2
  #include <stdlib.h>
3
3
  #include <string.h>
4
+ #include <strings.h>
4
5
  #include <stdio.h>
5
6
  #include <ctype.h>
6
7
  #include <sys/stat.h>
7
8
  #include <unistd.h>
8
- #include <libgen.h>
9
- #include <time.h>
10
9
  #include <sys/time.h>
11
10
 
12
11
  /* cmark-gfm headers */
@@ -42,6 +41,11 @@
42
41
  #include "extensions/fenced_divs.h"
43
42
  #include "extensions/syntax_highlight.h"
44
43
  #include "plugins.h"
44
+ #include "ast_json.h"
45
+ #include "apex/ast_markdown.h"
46
+ #include "apex/ast_terminal.h"
47
+ #include "apex/ast_man.h"
48
+ #include "filters_ast.h"
45
49
 
46
50
  /* Custom renderer */
47
51
  #include "html_renderer.h"
@@ -70,6 +74,185 @@ static char *apex_encode_hex_entities(const char *text, size_t len) {
70
74
  return out;
71
75
  }
72
76
 
77
+ /**
78
+ * Escape string for safe HTML attribute usage.
79
+ */
80
+ static char *apex_escape_html_attr(const char *input) {
81
+ if (!input) return strdup("");
82
+
83
+ size_t len = strlen(input);
84
+ size_t max_len = len * 6 + 1; /* Worst case for &quot; */
85
+ char *out = malloc(max_len);
86
+ if (!out) return NULL;
87
+
88
+ char *w = out;
89
+ for (const char *p = input; *p; p++) {
90
+ switch (*p) {
91
+ case '&':
92
+ memcpy(w, "&amp;", 5);
93
+ w += 5;
94
+ break;
95
+ case '<':
96
+ memcpy(w, "&lt;", 4);
97
+ w += 4;
98
+ break;
99
+ case '>':
100
+ memcpy(w, "&gt;", 4);
101
+ w += 4;
102
+ break;
103
+ case '"':
104
+ memcpy(w, "&quot;", 6);
105
+ w += 6;
106
+ break;
107
+ case '\'':
108
+ memcpy(w, "&#39;", 5);
109
+ w += 5;
110
+ break;
111
+ default:
112
+ *w++ = *p;
113
+ break;
114
+ }
115
+ }
116
+ *w = '\0';
117
+ return out;
118
+ }
119
+
120
+ /**
121
+ * Normalize metadata key by removing spaces and lowercasing.
122
+ */
123
+ static char *apex_normalize_meta_key(const char *key) {
124
+ if (!key) return NULL;
125
+ size_t len = strlen(key);
126
+ char *normalized = malloc(len + 1);
127
+ if (!normalized) return NULL;
128
+
129
+ char *out = normalized;
130
+ for (const char *in = key; *in; in++) {
131
+ if (!isspace((unsigned char)*in)) {
132
+ *out++ = (char)tolower((unsigned char)*in);
133
+ }
134
+ }
135
+ *out = '\0';
136
+ return normalized;
137
+ }
138
+
139
+ /**
140
+ * Keys handled elsewhere (title/lang/css/html header/footer/etc.) should not
141
+ * be emitted as generic <meta name="..."> tags.
142
+ */
143
+ static bool apex_skip_generic_meta_key(const char *key) {
144
+ char *normalized = apex_normalize_meta_key(key);
145
+ if (!normalized) return false;
146
+
147
+ static const char *skip_keys[] = {
148
+ "title",
149
+ "css",
150
+ "language",
151
+ "htmlheader",
152
+ "htmlfooter",
153
+ "htmlheaderlevel",
154
+ "baseheaderlevel",
155
+ "quoteslanguage",
156
+ NULL
157
+ };
158
+
159
+ bool skip = false;
160
+ for (int i = 0; skip_keys[i]; i++) {
161
+ if (strcmp(normalized, skip_keys[i]) == 0) {
162
+ skip = true;
163
+ break;
164
+ }
165
+ }
166
+ free(normalized);
167
+ return skip;
168
+ }
169
+
170
+ /**
171
+ * Render metadata list to newline-separated generic HTML meta tags.
172
+ */
173
+ static char *apex_render_generic_meta_tags(apex_metadata_item *metadata) {
174
+ if (!metadata) return NULL;
175
+
176
+ size_t capacity = 256;
177
+ size_t used = 0;
178
+ char *out = malloc(capacity);
179
+ if (!out) return NULL;
180
+ out[0] = '\0';
181
+
182
+ /* Metadata entries are prepended during parsing; reverse iteration restores
183
+ * source declaration order in generated head tags. */
184
+ size_t item_count = 0;
185
+ for (apex_metadata_item *it = metadata; it; it = it->next) item_count++;
186
+ if (item_count == 0) {
187
+ free(out);
188
+ return NULL;
189
+ }
190
+ apex_metadata_item **items = malloc(item_count * sizeof(apex_metadata_item *));
191
+ if (!items) {
192
+ free(out);
193
+ return NULL;
194
+ }
195
+ size_t item_index = 0;
196
+ for (apex_metadata_item *it = metadata; it; it = it->next) {
197
+ items[item_index++] = it;
198
+ }
199
+
200
+ for (size_t i = item_count; i > 0; i--) {
201
+ apex_metadata_item *item = items[i - 1];
202
+ if (!item->key || !item->value || apex_skip_generic_meta_key(item->key)) {
203
+ continue;
204
+ }
205
+
206
+ char *escaped_key = apex_escape_html_attr(item->key);
207
+ char *escaped_value = apex_escape_html_attr(item->value);
208
+ if (!escaped_key || !escaped_value) {
209
+ if (escaped_key) free(escaped_key);
210
+ if (escaped_value) free(escaped_value);
211
+ free(items);
212
+ free(out);
213
+ return NULL;
214
+ }
215
+
216
+ size_t needed = strlen(escaped_key) + strlen(escaped_value) + 36;
217
+ if (used + needed + 1 > capacity) {
218
+ size_t new_capacity = capacity * 2;
219
+ while (used + needed + 1 > new_capacity) {
220
+ new_capacity *= 2;
221
+ }
222
+ char *new_out = realloc(out, new_capacity);
223
+ if (!new_out) {
224
+ free(escaped_key);
225
+ free(escaped_value);
226
+ free(items);
227
+ free(out);
228
+ return NULL;
229
+ }
230
+ out = new_out;
231
+ capacity = new_capacity;
232
+ }
233
+
234
+ int written = snprintf(out + used, capacity - used,
235
+ " <meta name=\"%s\" content=\"%s\"/>\n",
236
+ escaped_key, escaped_value);
237
+ free(escaped_key);
238
+ free(escaped_value);
239
+ if (written < 0) {
240
+ free(items);
241
+ free(out);
242
+ return NULL;
243
+ }
244
+ used += (size_t)written;
245
+ }
246
+ free(items);
247
+
248
+ if (used == 0) {
249
+ free(out);
250
+ return NULL;
251
+ }
252
+
253
+ return out;
254
+ }
255
+
73
256
  /**
74
257
  * Base64 encode binary data
75
258
  * Caller must free the returned buffer.
@@ -526,9 +709,36 @@ static char *apex_preprocess_autolinks(const char *text, const apex_options *opt
526
709
  continue;
527
710
  }
528
711
 
529
- /* Check if we're at the start of a reference link definition: [id]: URL */
712
+ /* At start of line: handle reference definitions and indented code blocks */
530
713
  if (r == text || r[-1] == '\n') {
531
714
  const char *line_start = r;
715
+
716
+ /* First: skip indented code blocks (4+ spaces or a leading tab) entirely */
717
+ int indent_spaces = 0;
718
+ while (*line_start == ' ' && indent_spaces < 4) {
719
+ line_start++;
720
+ indent_spaces++;
721
+ }
722
+ if (indent_spaces == 4 || *line_start == '\t') {
723
+ const char *line_end = strchr(r, '\n');
724
+ if (!line_end) line_end = r + strlen(r);
725
+ size_t line_len = line_end - r;
726
+ if ((size_t)(w - out) + line_len + 1 > cap) {
727
+ size_t used = (size_t)(w - out);
728
+ cap = (used + line_len + 1) * 2;
729
+ char *new_out = realloc(out, cap);
730
+ if (!new_out) { free(out); return NULL; }
731
+ out = new_out;
732
+ w = out + used;
733
+ }
734
+ memcpy(w, r, line_len);
735
+ w += line_len;
736
+ r = line_end;
737
+ continue;
738
+ }
739
+
740
+ /* Then: check for reference link definitions: [id]: URL */
741
+ line_start = r;
532
742
  /* Skip leading whitespace */
533
743
  while (*line_start == ' ' || *line_start == '\t') {
534
744
  line_start++;
@@ -1293,19 +1503,49 @@ static char *apex_preprocess_table_captions(const char *text) {
1293
1503
  }
1294
1504
  table_check++;
1295
1505
  }
1296
- /* Check for : Caption format (Pandoc-style, only after tables) */
1297
- /* Skip up to 3 leading spaces (matching definition list rules) */
1506
+ /* Check for : Caption format (Pandoc-style, only in table context)
1507
+ * Require prev_line_was_table_row or in_table_section - NOT prev_line_was_blank alone.
1508
+ * prev_line_was_blank alone would wrongly convert "Term\n\n: definition 1" (def list) to caption. */
1509
+ if ((prev_line_was_table_row || in_table_section) &&
1510
+ !is_pandoc_caption_line) {
1511
+ const char *check = p;
1512
+ int spaces = 0;
1513
+ while (spaces < 3 && check < line_end && *check == ' ') {
1514
+ spaces++;
1515
+ check++;
1516
+ }
1517
+ if (check < line_end && *check == ':' &&
1518
+ (check + 1) < line_end &&
1519
+ (check[1] == ' ' || check[1] == '\t')) {
1520
+ is_colon_caption_line = true;
1521
+ }
1522
+ }
1523
+ } else {
1524
+ /* Check for : Caption BEFORE table (next non-blank line is a table row) */
1298
1525
  const char *check = p;
1299
1526
  int spaces = 0;
1300
1527
  while (spaces < 3 && check < line_end && *check == ' ') {
1301
1528
  spaces++;
1302
1529
  check++;
1303
1530
  }
1304
- /* Must start with : followed by space or tab */
1305
1531
  if (check < line_end && *check == ':' &&
1306
1532
  (check + 1) < line_end &&
1307
1533
  (check[1] == ' ' || check[1] == '\t')) {
1308
- is_colon_caption_line = true;
1534
+ /* Peek ahead: is next non-blank line a table row? */
1535
+ const char *next = line_end;
1536
+ if (next < text + len && *next == '\n') next++;
1537
+ if (next < text + len && *next == '\r') next++;
1538
+ while (next < text + len && (*next == '\n' || *next == '\r' || *next == ' ' || *next == '\t')) {
1539
+ if (*next == '\n' || *next == '\r') {
1540
+ next++;
1541
+ if (next < text + len && next[-1] == '\r' && *next == '\n') next++;
1542
+ } else {
1543
+ next++;
1544
+ }
1545
+ }
1546
+ if (next < text + len && *next == '|') {
1547
+ is_colon_caption_line = true;
1548
+ }
1309
1549
  }
1310
1550
  }
1311
1551
  }
@@ -1427,10 +1667,9 @@ static char *apex_preprocess_table_captions(const char *text) {
1427
1667
  *write++ = '\n';
1428
1668
  }
1429
1669
  }
1430
- } else if (!in_code_block &&
1431
- prev_line_was_table_row &&
1432
- is_colon_caption_line) {
1433
- /* Case 3: Pandoc-style ': Caption {#id .class}' -> convert to '[Caption {#id .class}]' */
1670
+ } else if (!in_code_block && is_colon_caption_line) {
1671
+ /* Case 3: Pandoc-style ': Caption {#id .class}' -> convert to '[Caption {#id .class}]'
1672
+ * Handles both: (a) after table, (b) before table (next line is | table row) */
1434
1673
  /* Skip leading whitespace (up to 3 spaces) */
1435
1674
  const char *caption_start = p;
1436
1675
  int spaces = 0;
@@ -2474,11 +2713,14 @@ apex_options apex_options_default(void) {
2474
2713
  opts.base_directory = NULL;
2475
2714
 
2476
2715
  /* Output options */
2716
+ opts.output_format = APEX_OUTPUT_HTML; /* Default: HTML output */
2477
2717
  opts.unsafe = true;
2478
2718
  opts.validate_utf8 = true;
2479
2719
  opts.github_pre_lang = true;
2480
2720
  opts.standalone = false;
2481
2721
  opts.pretty = false;
2722
+ opts.xhtml = false;
2723
+ opts.strict_xhtml = false;
2482
2724
  opts.stylesheet_paths = NULL;
2483
2725
  opts.stylesheet_count = 0;
2484
2726
  opts.document_title = NULL;
@@ -2552,9 +2794,10 @@ apex_options apex_options_default(void) {
2552
2794
  opts.enable_emoji_autocorrect = true; /* Enabled by default in unified mode */
2553
2795
 
2554
2796
  /* Syntax highlighting options */
2555
- opts.code_highlighter = NULL; /* Default: no external syntax highlighting */
2556
- opts.code_line_numbers = false; /* Default: no line numbers */
2557
- opts.highlight_language_only = false; /* Default: highlight all code blocks */
2797
+ opts.code_highlighter = NULL; /* Default: no external syntax highlighting */
2798
+ opts.code_line_numbers = false; /* Default: no line numbers */
2799
+ opts.highlight_language_only = false; /* Default: highlight all code blocks */
2800
+ opts.code_highlight_theme = NULL; /* Default: no explicit theme */
2558
2801
 
2559
2802
  /* Marked / integration-specific options (unified defaults) */
2560
2803
  opts.enable_widont = false;
@@ -2571,10 +2814,25 @@ apex_options apex_options_default(void) {
2571
2814
  /* Source file information (used by plugins via APEX_FILE_PATH) */
2572
2815
  opts.input_file_path = NULL;
2573
2816
 
2817
+ /* AST filter options (Pandoc-style JSON filters) */
2818
+ opts.ast_filter_commands = NULL;
2819
+ opts.ast_filter_count = 0;
2820
+ opts.ast_filter_strict = true; /* Default: fail fast on filter errors */
2821
+
2574
2822
  /* Progress reporting */
2575
2823
  opts.progress_callback = NULL;
2576
2824
  opts.progress_user_data = NULL;
2577
2825
 
2826
+ /* Custom cmark extension callback */
2827
+ opts.cmark_init = NULL;
2828
+ opts.cmark_done = NULL;
2829
+ opts.cmark_user_data = NULL;
2830
+
2831
+ /* Terminal theme and width (for -t terminal/terminal256) */
2832
+ opts.theme_name = NULL;
2833
+ opts.terminal_width = 0;
2834
+ opts.paginate = false;
2835
+
2578
2836
  return opts;
2579
2837
  }
2580
2838
 
@@ -2860,13 +3118,7 @@ static void apex_register_extensions(cmark_parser *parser, const apex_options *o
2860
3118
  }
2861
3119
  }
2862
3120
 
2863
- /* Definition lists (Kramdown/PHP Extra style) */
2864
- if (options->enable_definition_lists) {
2865
- cmark_syntax_extension *deflist_ext = create_definition_list_extension();
2866
- if (deflist_ext) {
2867
- cmark_parser_attach_syntax_extension(parser, deflist_ext);
2868
- }
2869
- }
3121
+ /* Definition lists (one-line format: Term :: Definition) - handled by preprocessing only */
2870
3122
 
2871
3123
  /* Advanced footnotes (block-level content support) */
2872
3124
  if (options->enable_footnotes) {
@@ -3913,6 +4165,190 @@ static char *apex_apply_widont_to_headings(const char *html) {
3913
4165
  return output;
3914
4166
  }
3915
4167
 
4168
+ static const char *apex_find_unquoted_gt(const char *p, const char *end) {
4169
+ int quote = 0;
4170
+ while (p < end) {
4171
+ if (quote) {
4172
+ if (*p == quote) quote = 0;
4173
+ p++;
4174
+ continue;
4175
+ }
4176
+ if (*p == '"' || *p == '\'') {
4177
+ quote = *p;
4178
+ p++;
4179
+ continue;
4180
+ }
4181
+ if (*p == '>') return p;
4182
+ p++;
4183
+ }
4184
+ return NULL;
4185
+ }
4186
+
4187
+ static bool apex_html_void_element_name(const char *name, size_t nlen) {
4188
+ static const char *void_tags[] = {
4189
+ "area", "base", "br", "col", "embed", "hr", "img", "input",
4190
+ "link", "meta", "param", "source", "track", "wbr"
4191
+ };
4192
+ for (size_t t = 0; t < sizeof(void_tags) / sizeof(void_tags[0]); t++) {
4193
+ const char *v = void_tags[t];
4194
+ size_t vl = strlen(v);
4195
+ if (vl != nlen) continue;
4196
+ size_t j;
4197
+ for (j = 0; j < nlen; j++) {
4198
+ if (tolower((unsigned char)name[j]) != tolower((unsigned char)v[j])) break;
4199
+ }
4200
+ if (j == nlen) return true;
4201
+ }
4202
+ return false;
4203
+ }
4204
+
4205
+ static int apex_html_buf_append(char **outp, size_t *cap, size_t *olen, const char *s, size_t n) {
4206
+ while (*olen + n + 1 > *cap) {
4207
+ size_t new_cap = *cap ? *cap * 2 : 8192;
4208
+ char *nbuf = realloc(*outp, new_cap);
4209
+ if (!nbuf) return -1;
4210
+ *outp = nbuf;
4211
+ *cap = new_cap;
4212
+ }
4213
+ memcpy(*outp + *olen, s, n);
4214
+ *olen += n;
4215
+ (*outp)[*olen] = '\0';
4216
+ return 0;
4217
+ }
4218
+
4219
+ /**
4220
+ * Rewrite HTML void/empty elements to XML self-closing form (e.g. <br> -> <br />).
4221
+ * Skips contents of script, style, and HTML comments. Returns newly allocated string or NULL.
4222
+ */
4223
+ static char *apex_html_apply_xhtml_void_tags(const char *html) {
4224
+ if (!html) return NULL;
4225
+
4226
+ size_t cap = strlen(html) * 2 + 256;
4227
+ if (cap < 8192) cap = 8192;
4228
+ char *out = malloc(cap);
4229
+ if (!out) return NULL;
4230
+ size_t olen = 0;
4231
+
4232
+ const char *r = html;
4233
+ const char *end = html + strlen(html);
4234
+
4235
+ while (r < end) {
4236
+ if (*r != '<') {
4237
+ if (apex_html_buf_append(&out, &cap, &olen, r, 1) != 0) goto fail;
4238
+ r++;
4239
+ continue;
4240
+ }
4241
+
4242
+ /* Comment */
4243
+ if (r + 4 <= end && strncmp(r, "<!--", 4) == 0) {
4244
+ const char *ce = strstr(r + 4, "-->");
4245
+ if (!ce) {
4246
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
4247
+ break;
4248
+ }
4249
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(ce + 3 - r)) != 0) goto fail;
4250
+ r = ce + 3;
4251
+ continue;
4252
+ }
4253
+
4254
+ /* CDATA */
4255
+ if (r + 9 <= end && strncmp(r, "<![CDATA[", 9) == 0) {
4256
+ const char *ce = strstr(r + 9, "]]>");
4257
+ if (!ce) {
4258
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
4259
+ break;
4260
+ }
4261
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(ce + 3 - r)) != 0) goto fail;
4262
+ r = ce + 3;
4263
+ continue;
4264
+ }
4265
+
4266
+ /* script */
4267
+ if (r + 7 <= end && strncasecmp(r, "<script", 7) == 0) {
4268
+ const char *close = strcasestr(r + 7, "</script>");
4269
+ if (!close) {
4270
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
4271
+ break;
4272
+ }
4273
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(close + 9 - r)) != 0) goto fail;
4274
+ r = close + 9;
4275
+ continue;
4276
+ }
4277
+
4278
+ /* style */
4279
+ if (r + 6 <= end && strncasecmp(r, "<style", 6) == 0) {
4280
+ const char *close = strcasestr(r + 6, "</style>");
4281
+ if (!close) {
4282
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
4283
+ break;
4284
+ }
4285
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(close + 8 - r)) != 0) goto fail;
4286
+ r = close + 8;
4287
+ continue;
4288
+ }
4289
+
4290
+ /* Declaration <!...> */
4291
+ if (r + 1 < end && r[1] == '!') {
4292
+ const char *gt = apex_find_unquoted_gt(r, end);
4293
+ if (!gt) {
4294
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
4295
+ break;
4296
+ }
4297
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt + 1 - r)) != 0) goto fail;
4298
+ r = gt + 1;
4299
+ continue;
4300
+ }
4301
+
4302
+ /* Closing tag */
4303
+ if (r + 1 < end && r[1] == '/') {
4304
+ const char *gt = apex_find_unquoted_gt(r, end);
4305
+ if (!gt) {
4306
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
4307
+ break;
4308
+ }
4309
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt + 1 - r)) != 0) goto fail;
4310
+ r = gt + 1;
4311
+ continue;
4312
+ }
4313
+
4314
+ /* Opening tag: extract name */
4315
+ const char *name_start = r + 1;
4316
+ while (name_start < end && isspace((unsigned char)*name_start)) name_start++;
4317
+ const char *name_end = name_start;
4318
+ while (name_end < end && (isalnum((unsigned char)*name_end) || *name_end == '-' || *name_end == '_' || *name_end == ':')) {
4319
+ name_end++;
4320
+ }
4321
+ size_t name_len = (size_t)(name_end - name_start);
4322
+
4323
+ const char *gt = apex_find_unquoted_gt(r, end);
4324
+ if (!gt) {
4325
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
4326
+ break;
4327
+ }
4328
+
4329
+ if (name_len > 0 && apex_html_void_element_name(name_start, name_len)) {
4330
+ const char *slash = gt - 1;
4331
+ while (slash > r && isspace((unsigned char)*slash)) slash--;
4332
+ if (slash >= r && *slash == '/') {
4333
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt + 1 - r)) != 0) goto fail;
4334
+ } else {
4335
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt - r)) != 0) goto fail;
4336
+ if (apex_html_buf_append(&out, &cap, &olen, " />", 3) != 0) goto fail;
4337
+ }
4338
+ r = gt + 1;
4339
+ continue;
4340
+ }
4341
+
4342
+ if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt + 1 - r)) != 0) goto fail;
4343
+ r = gt + 1;
4344
+ }
4345
+
4346
+ return out;
4347
+ fail:
4348
+ free(out);
4349
+ return NULL;
4350
+ }
4351
+
3916
4352
  char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options *options) {
3917
4353
  if (!markdown || len == 0) {
3918
4354
  char *empty = malloc(1);
@@ -3934,6 +4370,15 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
3934
4370
  /* Use local_opts for rest of function (mutable) - shadow the const parameter */
3935
4371
  #define options (&local_opts)
3936
4372
 
4373
+ if (local_opts.strict_xhtml) {
4374
+ local_opts.xhtml = true;
4375
+ }
4376
+
4377
+ /* Man/man-html output: force disable smart typography so option names (e.g. --to) stay as literal -- */
4378
+ if (options->output_format == APEX_OUTPUT_MAN || options->output_format == APEX_OUTPUT_MAN_HTML) {
4379
+ local_opts.enable_smart_typography = false;
4380
+ }
4381
+
3937
4382
  /* Extract metadata if enabled (preprocessing step) */
3938
4383
  /* Safety check: ensure len doesn't exceed actual string length */
3939
4384
  size_t actual_len = strlen(markdown);
@@ -3985,12 +4430,15 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
3985
4430
  len, text_ptr, len > 200 ? "..." : "");
3986
4431
  }
3987
4432
 
4433
+ /* Create deflist debug log as soon as conversion starts (so it exists even if we exit early or deflists are disabled) */
4434
+ apex_deflist_debug_touch(options->enable_definition_lists);
4435
+
3988
4436
  if (options->mode == APEX_MODE_MULTIMARKDOWN ||
3989
4437
  options->mode == APEX_MODE_KRAMDOWN ||
3990
4438
  options->mode == APEX_MODE_UNIFIED) {
3991
4439
  /* Extract metadata FIRST */
3992
4440
  PROFILE_START(metadata);
3993
- metadata = apex_extract_metadata(&text_ptr);
4441
+ metadata = apex_extract_metadata_for_mode(&text_ptr, options->mode);
3994
4442
  PROFILE_END(metadata);
3995
4443
  if (getenv("APEX_DEBUG_PIPELINE")) {
3996
4444
  size_t len = strlen(text_ptr);
@@ -4949,12 +5397,16 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
4949
5397
  /* Register extensions based on mode and options */
4950
5398
  apex_register_extensions(parser, options);
4951
5399
 
5400
+ if (options->cmark_init) {
5401
+ options->cmark_init(parser, options, cmark_opts, options->cmark_user_data);
5402
+ }
5403
+
4952
5404
  /* Feed normalized text to parser */
4953
5405
  if (getenv("APEX_DEBUG_PIPELINE")) {
4954
5406
  fprintf(stderr, "[APEX_DEBUG] markdown to parse (len=%zu): %.350s%s\n",
4955
5407
  text_len, text_ptr, text_len > 350 ? "..." : "");
4956
5408
  }
4957
- cmark_parser_feed(parser, text_ptr, text_len);
5409
+ cmark_parser_feed(parser, text_len ? text_ptr : "", text_len);
4958
5410
  cmark_node *document = cmark_parser_finish(parser);
4959
5411
  PROFILE_END(parsing);
4960
5412
 
@@ -4964,12 +5416,62 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
4964
5416
  }
4965
5417
 
4966
5418
  if (!document) {
5419
+ if (options->cmark_done) {
5420
+ options->cmark_done(parser, options, cmark_opts, options->cmark_user_data);
5421
+ }
4967
5422
  cmark_parser_free(parser);
4968
5423
  free(working_text);
4969
5424
  apex_free_metadata(metadata);
4970
5425
  return NULL;
4971
5426
  }
4972
5427
 
5428
+ /* If output format is JSON, emit JSON right after parsing (before AST filters) */
5429
+ if (options->output_format == APEX_OUTPUT_JSON) {
5430
+ char *json = apex_cmark_to_pandoc_json(document, options);
5431
+ cmark_node_free(document);
5432
+ cmark_parser_free(parser);
5433
+ free(working_text);
5434
+ apex_free_metadata(metadata);
5435
+ /* Note: Preprocessing buffers are conditionally allocated and may not be in scope here.
5436
+ * This is acceptable as JSON output is typically used for debugging/inspection. */
5437
+ return json;
5438
+ }
5439
+
5440
+ /* Run AST-level filters (Pandoc-style JSON filters) before any */
5441
+ /* AST post-processing or rendering. */
5442
+ if (options->ast_filter_commands && options->ast_filter_count > 0) {
5443
+ /* Determine target format string for filters based on output format */
5444
+ const char *target_format = "html";
5445
+ if (options->output_format == APEX_OUTPUT_JSON ||
5446
+ options->output_format == APEX_OUTPUT_JSON_FILTERED) {
5447
+ target_format = "json";
5448
+ } else if (options->output_format == APEX_OUTPUT_MARKDOWN ||
5449
+ options->output_format == APEX_OUTPUT_MMD ||
5450
+ options->output_format == APEX_OUTPUT_COMMONMARK ||
5451
+ options->output_format == APEX_OUTPUT_KRAMDOWN ||
5452
+ options->output_format == APEX_OUTPUT_GFM) {
5453
+ target_format = "markdown";
5454
+ } else if (options->output_format == APEX_OUTPUT_TERMINAL ||
5455
+ options->output_format == APEX_OUTPUT_TERMINAL256) {
5456
+ target_format = "terminal";
5457
+ }
5458
+ cmark_node *filtered = apex_run_ast_filters(document, options, target_format);
5459
+ if (!filtered && options->ast_filter_strict) {
5460
+ cmark_node_free(document);
5461
+ if (options->cmark_done) {
5462
+ options->cmark_done(parser, options, cmark_opts, options->cmark_user_data);
5463
+ }
5464
+ cmark_parser_free(parser);
5465
+ free(working_text);
5466
+ apex_free_metadata(metadata);
5467
+ return NULL;
5468
+ }
5469
+ if (filtered && filtered != document) {
5470
+ cmark_node_free(document);
5471
+ document = filtered;
5472
+ }
5473
+ }
5474
+
4973
5475
  /* Postprocess wiki links if enabled */
4974
5476
  if (options->enable_wiki_links) {
4975
5477
  /* Fast path: skip AST walk if no wiki link markers present */
@@ -4990,20 +5492,10 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
4990
5492
  apex_process_callouts_in_tree(document);
4991
5493
  }
4992
5494
 
4993
- /* Process manual header IDs (MMD [id] and Kramdown {#id}) */
4994
- if (options->generate_header_ids) {
4995
- cmark_iter *iter = cmark_iter_new(document);
4996
- cmark_event_type event;
4997
- while ((event = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
4998
- cmark_node *node = cmark_iter_get_node(iter);
4999
- if (event == CMARK_EVENT_ENTER && cmark_node_get_type(node) == CMARK_NODE_HEADING) {
5000
- apex_process_manual_header_id(node);
5001
- }
5002
- }
5003
- cmark_iter_free(iter);
5004
- }
5005
-
5006
- /* Process IAL (Inline Attribute Lists) if in Kramdown or Unified mode */
5495
+ /* Process IAL (Inline Attribute Lists) BEFORE manual header IDs.
5496
+ IAL handles {: #id}, {#id}, and {.class} - running first ensures these
5497
+ are extracted and removed from heading text before manual header ID
5498
+ looks for MMD [id] or Kramdown {#id}. Avoids duplicate handling. */
5007
5499
  if (alds || options->mode == APEX_MODE_KRAMDOWN || options->mode == APEX_MODE_UNIFIED) {
5008
5500
  /* Fast path: skip AST walk if no IAL markers present */
5009
5501
  /* Check for both Kramdown-style ({:) and Pandoc-style ({# or {.) IALs */
@@ -5016,6 +5508,21 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5016
5508
  }
5017
5509
  }
5018
5510
 
5511
+ /* Process manual header IDs (MMD [id] and Kramdown {#id}) - after IAL
5512
+ so IAL's {#id} handling doesn't conflict; manual ID handles [id] and
5513
+ any {#id} IAL might have missed (e.g. in multi-child headings) */
5514
+ if (options->generate_header_ids) {
5515
+ cmark_iter *iter = cmark_iter_new(document);
5516
+ cmark_event_type event;
5517
+ while ((event = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
5518
+ cmark_node *node = cmark_iter_get_node(iter);
5519
+ if (event == CMARK_EVENT_ENTER && cmark_node_get_type(node) == CMARK_NODE_HEADING) {
5520
+ apex_process_manual_header_id(node);
5521
+ }
5522
+ }
5523
+ cmark_iter_free(iter);
5524
+ }
5525
+
5019
5526
  /* Apply image attributes to image nodes */
5020
5527
  if (img_attrs) {
5021
5528
  PROFILE_START(image_attrs);
@@ -5030,6 +5537,56 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5030
5537
 
5031
5538
  /* Note: Critic Markup is now handled via preprocessing (before parsing) */
5032
5539
 
5540
+ /* If output format is JSON (after filters), serialize AST to JSON and return */
5541
+ if (options->output_format == APEX_OUTPUT_JSON_FILTERED) {
5542
+ char *json = apex_cmark_to_pandoc_json(document, options);
5543
+ /* Note: Cleanup happens at end of function - document and other resources
5544
+ * will be freed there. We return the JSON string here. */
5545
+ return json;
5546
+ }
5547
+
5548
+ /* If output format is Markdown, serialize AST to Markdown and return */
5549
+ if (options->output_format == APEX_OUTPUT_MARKDOWN ||
5550
+ options->output_format == APEX_OUTPUT_MMD ||
5551
+ options->output_format == APEX_OUTPUT_COMMONMARK ||
5552
+ options->output_format == APEX_OUTPUT_KRAMDOWN ||
5553
+ options->output_format == APEX_OUTPUT_GFM) {
5554
+ apex_markdown_dialect_t dialect;
5555
+ if (options->output_format == APEX_OUTPUT_MARKDOWN) {
5556
+ dialect = APEX_MD_DIALECT_UNIFIED;
5557
+ } else if (options->output_format == APEX_OUTPUT_MMD) {
5558
+ dialect = APEX_MD_DIALECT_MMD;
5559
+ } else if (options->output_format == APEX_OUTPUT_COMMONMARK) {
5560
+ dialect = APEX_MD_DIALECT_COMMONMARK;
5561
+ } else if (options->output_format == APEX_OUTPUT_KRAMDOWN) {
5562
+ dialect = APEX_MD_DIALECT_KRAMDOWN;
5563
+ } else { /* APEX_OUTPUT_GFM */
5564
+ dialect = APEX_MD_DIALECT_GFM;
5565
+ }
5566
+ char *markdown = apex_cmark_to_markdown(document, options, dialect);
5567
+ /* Note: Cleanup happens at end of function - document and other resources
5568
+ * will be freed there. We return the markdown string here. */
5569
+ return markdown;
5570
+ }
5571
+
5572
+ /* If output format is terminal/terminal256, serialize AST to ANSI terminal and return */
5573
+ if (options->output_format == APEX_OUTPUT_TERMINAL ||
5574
+ options->output_format == APEX_OUTPUT_TERMINAL256) {
5575
+ bool use_256 = (options->output_format == APEX_OUTPUT_TERMINAL256);
5576
+ char *tty = apex_cmark_to_terminal(document, options, use_256);
5577
+ return tty;
5578
+ }
5579
+
5580
+ /* If output format is man (roff) or man-html, serialize AST and return */
5581
+ if (options->output_format == APEX_OUTPUT_MAN) {
5582
+ char *roff = apex_cmark_to_man_roff(document, options);
5583
+ return roff ? roff : strdup(".TH stub 1 \"\" \"\"\n");
5584
+ }
5585
+ if (options->output_format == APEX_OUTPUT_MAN_HTML) {
5586
+ char *man_html = apex_cmark_to_man_html(document, options);
5587
+ return man_html ? man_html : strdup("<!DOCTYPE html><html><body><p>stub</p></body></html>");
5588
+ }
5589
+
5033
5590
  /* Render to HTML
5034
5591
  * Use custom renderer when we have attributes (IAL, ALDs, or image attributes)
5035
5592
  * Otherwise use standard renderer
@@ -5167,6 +5724,7 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5167
5724
  char *html_footer_metadata = NULL;
5168
5725
  char *language_metadata = NULL;
5169
5726
  char *quotes_lang_metadata = NULL;
5727
+ char *generic_meta_tags = NULL;
5170
5728
  int base_header_level = 1; /* Default is 1 */
5171
5729
 
5172
5730
  if (metadata) {
@@ -5215,6 +5773,9 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5215
5773
  base_header_level = (int)level;
5216
5774
  }
5217
5775
  }
5776
+
5777
+ /* Collect remaining metadata as generic head meta tags. */
5778
+ generic_meta_tags = apex_render_generic_meta_tags(metadata);
5218
5779
  }
5219
5780
 
5220
5781
  /* Adjust header levels and quote language based on metadata */
@@ -5240,6 +5801,21 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5240
5801
  }
5241
5802
  }
5242
5803
 
5804
+ /* Expand auto media (discover formats from filesystem for img with auto attribute).
5805
+ * Use base_directory when set (e.g. from file path or metadata); otherwise use "."
5806
+ * so auto expansion runs when piping stdin (images resolved relative to cwd). */
5807
+ if (html && strstr(html, "data-apex-replace-auto=1")) {
5808
+ PROFILE_START(expand_auto_media);
5809
+ const char *base = options->base_directory && options->base_directory[0]
5810
+ ? options->base_directory : ".";
5811
+ char *expanded = apex_expand_auto_media(html, base);
5812
+ PROFILE_END(expand_auto_media);
5813
+ if (expanded) {
5814
+ free(html);
5815
+ html = expanded;
5816
+ }
5817
+ }
5818
+
5243
5819
  /* Convert images to figures with captions (caption="..." always wraps; otherwise when enable_image_captions) */
5244
5820
  if (html) {
5245
5821
  PROFILE_START(image_captions);
@@ -5251,6 +5827,24 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5251
5827
  }
5252
5828
  }
5253
5829
 
5830
+ /* Strip redundant <p> around single <img> inside <figure> (e.g. from ::: >figure with "< ![Image](...)") */
5831
+ if (html) {
5832
+ char *stripped = apex_strip_figure_paragraph_wrapper(html);
5833
+ if (stripped) {
5834
+ free(html);
5835
+ html = stripped;
5836
+ }
5837
+ }
5838
+
5839
+ /* Strip <p> that wraps only a single block element (figure, video, picture) - invalid HTML5 */
5840
+ if (html) {
5841
+ char *stripped = apex_strip_block_paragraph_wrapper(html);
5842
+ if (stripped) {
5843
+ free(html);
5844
+ html = stripped;
5845
+ }
5846
+ }
5847
+
5254
5848
  /* Inject header IDs if enabled */
5255
5849
  if (options->generate_header_ids && html) {
5256
5850
  PROFILE_START(header_ids);
@@ -5325,7 +5919,13 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5325
5919
  /* Apply external syntax highlighting if requested */
5326
5920
  if (options->code_highlighter && html) {
5327
5921
  PROFILE_START(syntax_highlight);
5328
- char *highlighted = apex_apply_syntax_highlighting(html, options->code_highlighter, options->code_line_numbers, options->highlight_language_only);
5922
+ bool ansi_out = (options->output_format == APEX_OUTPUT_TERMINAL || options->output_format == APEX_OUTPUT_TERMINAL256);
5923
+ char *highlighted = apex_apply_syntax_highlighting(html,
5924
+ options->code_highlighter,
5925
+ options->code_line_numbers,
5926
+ options->highlight_language_only,
5927
+ ansi_out,
5928
+ options->code_highlight_theme);
5329
5929
  PROFILE_END(syntax_highlight);
5330
5930
  if (highlighted && highlighted != html) {
5331
5931
  free(html);
@@ -5480,6 +6080,9 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5480
6080
 
5481
6081
  /* Clean up */
5482
6082
  cmark_node_free(document);
6083
+ if (options->cmark_done) {
6084
+ options->cmark_done(parser, options, cmark_opts, options->cmark_user_data);
6085
+ }
5483
6086
  cmark_parser_free(parser);
5484
6087
  free(working_text);
5485
6088
  if (ial_preprocessed) free(ial_preprocessed);
@@ -5627,10 +6230,36 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5627
6230
 
5628
6231
  const char *footer_to_use = footer_with_scripts ? footer_with_scripts : html_footer_metadata;
5629
6232
 
6233
+ /* Combine generated generic meta tags with any explicit HTML Header metadata. */
6234
+ char *combined_head_metadata = NULL;
6235
+ const char *head_to_use = html_header_metadata;
6236
+ if (generic_meta_tags || html_header_metadata) {
6237
+ size_t generic_len = generic_meta_tags ? strlen(generic_meta_tags) : 0;
6238
+ size_t header_len = html_header_metadata ? strlen(html_header_metadata) : 0;
6239
+ size_t newline_len = (generic_len > 0 && header_len > 0) ? 1 : 0;
6240
+ combined_head_metadata = malloc(generic_len + newline_len + header_len + 1);
6241
+ if (combined_head_metadata) {
6242
+ size_t pos = 0;
6243
+ if (generic_len > 0) {
6244
+ memcpy(combined_head_metadata + pos, generic_meta_tags, generic_len);
6245
+ pos += generic_len;
6246
+ }
6247
+ if (newline_len) {
6248
+ combined_head_metadata[pos++] = '\n';
6249
+ }
6250
+ if (header_len > 0) {
6251
+ memcpy(combined_head_metadata + pos, html_header_metadata, header_len);
6252
+ pos += header_len;
6253
+ }
6254
+ combined_head_metadata[pos] = '\0';
6255
+ head_to_use = combined_head_metadata;
6256
+ }
6257
+ }
6258
+
5630
6259
  PROFILE_START(standalone_wrap);
5631
6260
  char *document = apex_wrap_html_document(html, local_opts.document_title, css_paths, css_count,
5632
- local_opts.code_highlighter, html_header_metadata, footer_to_use,
5633
- language_metadata);
6261
+ local_opts.code_highlighter, head_to_use, footer_to_use,
6262
+ language_metadata, local_opts.strict_xhtml);
5634
6263
  PROFILE_END(standalone_wrap);
5635
6264
 
5636
6265
  /* Free temporary metadata stylesheet array if we allocated it */
@@ -5645,6 +6274,9 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5645
6274
  if (footer_with_scripts) {
5646
6275
  free(footer_with_scripts);
5647
6276
  }
6277
+ if (combined_head_metadata) {
6278
+ free(combined_head_metadata);
6279
+ }
5648
6280
 
5649
6281
  /* If requested, replace stylesheet links with embedded CSS contents */
5650
6282
  if (html && css_paths && css_count > 0 && local_opts.embed_stylesheet) {
@@ -5773,6 +6405,7 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5773
6405
  if (html_footer_metadata) free(html_footer_metadata);
5774
6406
  if (language_metadata) free(language_metadata);
5775
6407
  if (quotes_lang_metadata) free(quotes_lang_metadata);
6408
+ if (generic_meta_tags) free(generic_meta_tags);
5776
6409
  if (h1_title) free(h1_title);
5777
6410
 
5778
6411
  /* Remove blank lines within tables (applies to both pretty and non-pretty) */
@@ -5810,6 +6443,17 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5810
6443
  }
5811
6444
  }
5812
6445
 
6446
+ /* XHTML-style void elements (--xhtml / --strict-xhtml); run after pretty-print (HTML only) */
6447
+ if (local_opts.xhtml && html && options->output_format == APEX_OUTPUT_HTML) {
6448
+ PROFILE_START(xhtml_void_tags);
6449
+ char *xhtml_out = apex_html_apply_xhtml_void_tags(html);
6450
+ PROFILE_END(xhtml_void_tags);
6451
+ if (xhtml_out) {
6452
+ free(html);
6453
+ html = xhtml_out;
6454
+ }
6455
+ }
6456
+
5813
6457
  PROFILE_END(total);
5814
6458
 
5815
6459
  if (profiling_enabled()) {
@@ -5822,7 +6466,7 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
5822
6466
  /**
5823
6467
  * Wrap HTML content in complete HTML5 document structure
5824
6468
  */
5825
- char *apex_wrap_html_document(const char *content, const char *title, const char **stylesheet_paths, size_t stylesheet_count, const char *code_highlighter, const char *html_header, const char *html_footer, const char *language) {
6469
+ char *apex_wrap_html_document(const char *content, const char *title, const char **stylesheet_paths, size_t stylesheet_count, const char *code_highlighter, const char *html_header, const char *html_footer, const char *language, bool strict_xhtml) {
5826
6470
  if (!content) return NULL;
5827
6471
 
5828
6472
  const char *doc_title = title ? title : "Document";
@@ -5906,10 +6550,19 @@ char *apex_wrap_html_document(const char *content, const char *title, const char
5906
6550
  const char *version_str = APEX_VERSION_STRING;
5907
6551
  if (!version_str) version_str = "unknown";
5908
6552
 
5909
- /* HTML5 doctype and opening */
6553
+ /* HTML5 doctype and opening (polyglot XHTML when strict_xhtml) */
5910
6554
  /* Add body class if code highlighting is enabled */
5911
6555
  const char *body_class = code_highlighter ? " class=\"code-highlighted\"" : "";
5912
- int n = snprintf(write, remaining, "<!DOCTYPE html>\n<html lang=\"%s\">\n<head>\n", lang);
6556
+ int n;
6557
+ if (strict_xhtml) {
6558
+ n = snprintf(write, remaining,
6559
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
6560
+ "<!DOCTYPE html>\n"
6561
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"%s\" lang=\"%s\">\n<head>\n",
6562
+ lang, lang);
6563
+ } else {
6564
+ n = snprintf(write, remaining, "<!DOCTYPE html>\n<html lang=\"%s\">\n<head>\n", lang);
6565
+ }
5913
6566
  if (n < 0 || (size_t)n >= remaining) {
5914
6567
  free(output);
5915
6568
  return strdup(content);
@@ -5918,7 +6571,12 @@ char *apex_wrap_html_document(const char *content, const char *title, const char
5918
6571
  remaining -= n;
5919
6572
 
5920
6573
  /* Meta tags */
5921
- n = snprintf(write, remaining, " <meta charset=\"UTF-8\">\n");
6574
+ if (strict_xhtml) {
6575
+ n = snprintf(write, remaining,
6576
+ " <meta http-equiv=\"Content-Type\" content=\"application/xhtml+xml; charset=UTF-8\" />\n");
6577
+ } else {
6578
+ n = snprintf(write, remaining, " <meta charset=\"UTF-8\">\n");
6579
+ }
5922
6580
  if (n < 0 || (size_t)n >= remaining) {
5923
6581
  free(output);
5924
6582
  return strdup(content);