apex-ruby 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/apex_ext/apex_ext.c +6 -0
- data/ext/apex_ext/apex_src/AGENTS.md +41 -0
- data/ext/apex_ext/apex_src/CHANGELOG.md +412 -2
- data/ext/apex_ext/apex_src/CMakeLists.txt +41 -29
- data/ext/apex_ext/apex_src/Formula/apex.rb +2 -2
- data/ext/apex_ext/apex_src/Package.swift +9 -0
- data/ext/apex_ext/apex_src/README.md +31 -9
- data/ext/apex_ext/apex_src/ROADMAP.md +5 -0
- data/ext/apex_ext/apex_src/VERSION +1 -1
- data/ext/apex_ext/apex_src/cli/main.c +1125 -13
- data/ext/apex_ext/apex_src/docs/index.md +459 -0
- data/ext/apex_ext/apex_src/include/apex/apex.h +67 -5
- data/ext/apex_ext/apex_src/include/apex/ast_man.h +20 -0
- data/ext/apex_ext/apex_src/include/apex/ast_markdown.h +39 -0
- data/ext/apex_ext/apex_src/include/apex/ast_terminal.h +40 -0
- data/ext/apex_ext/apex_src/include/apex/module.modulemap +1 -1
- data/ext/apex_ext/apex_src/man/apex-config.5 +333 -258
- data/ext/apex_ext/apex_src/man/apex-config.5.md +3 -1
- data/ext/apex_ext/apex_src/man/apex-plugins.7 +401 -316
- data/ext/apex_ext/apex_src/man/apex.1 +663 -620
- data/ext/apex_ext/apex_src/man/apex.1.html +703 -0
- data/ext/apex_ext/apex_src/man/apex.1.md +160 -90
- data/ext/apex_ext/apex_src/objc/Apex.swift +6 -0
- data/ext/apex_ext/apex_src/objc/NSString+Apex.h +12 -0
- data/ext/apex_ext/apex_src/objc/NSString+Apex.m +9 -0
- data/ext/apex_ext/apex_src/pages/index.md +459 -0
- data/ext/apex_ext/apex_src/src/_README.md +4 -4
- data/ext/apex_ext/apex_src/src/apex.c +702 -44
- data/ext/apex_ext/apex_src/src/ast_json.c +1130 -0
- data/ext/apex_ext/apex_src/src/ast_json.h +46 -0
- data/ext/apex_ext/apex_src/src/ast_man.c +948 -0
- data/ext/apex_ext/apex_src/src/ast_markdown.c +409 -0
- data/ext/apex_ext/apex_src/src/ast_terminal.c +2516 -0
- data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +8 -5
- data/ext/apex_ext/apex_src/src/extensions/definition_list.c +491 -1514
- data/ext/apex_ext/apex_src/src/extensions/definition_list.h +8 -15
- data/ext/apex_ext/apex_src/src/extensions/emoji.c +207 -0
- data/ext/apex_ext/apex_src/src/extensions/emoji.h +14 -0
- data/ext/apex_ext/apex_src/src/extensions/header_ids.c +178 -71
- data/ext/apex_ext/apex_src/src/extensions/highlight.c +37 -5
- data/ext/apex_ext/apex_src/src/extensions/ial.c +416 -47
- data/ext/apex_ext/apex_src/src/extensions/includes.c +241 -10
- data/ext/apex_ext/apex_src/src/extensions/includes.h +1 -0
- data/ext/apex_ext/apex_src/src/extensions/metadata.c +166 -3
- data/ext/apex_ext/apex_src/src/extensions/metadata.h +7 -0
- data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +34 -3
- data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +55 -10
- data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +7 -4
- data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +84 -52
- data/ext/apex_ext/apex_src/src/extensions/toc.c +133 -19
- data/ext/apex_ext/apex_src/src/filters_ast.c +194 -0
- data/ext/apex_ext/apex_src/src/filters_ast.h +36 -0
- data/ext/apex_ext/apex_src/src/html_renderer.c +1265 -35
- data/ext/apex_ext/apex_src/src/html_renderer.h +21 -0
- data/ext/apex_ext/apex_src/src/plugins_remote.c +40 -14
- data/ext/apex_ext/apex_src/tests/CMakeLists.txt +1 -0
- data/ext/apex_ext/apex_src/tests/README.md +11 -5
- data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +13 -2
- data/ext/apex_ext/apex_src/tests/fixtures/filters/filter_output_with_rawblock.json +1 -0
- data/ext/apex_ext/apex_src/tests/fixtures/filters/unwrap.md +7 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/auto-wildcard.md +8 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.avif +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.jpg +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.webp +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.avif +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.jpg +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.webp +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/media_formats_test.md +63 -0
- data/ext/apex_ext/apex_src/tests/fixtures/includes/data-semi.csv +3 -0
- data/ext/apex_ext/apex_src/tests/fixtures/includes/with space.txt +1 -0
- data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +4 -1
- data/ext/apex_ext/apex_src/tests/paginate_cli_test.sh +64 -0
- data/ext/apex_ext/apex_src/tests/terminal_width_test.sh +29 -0
- data/ext/apex_ext/apex_src/tests/test-swift-package.sh +14 -0
- data/ext/apex_ext/apex_src/tests/test_cmark_callback.c +189 -0
- data/ext/apex_ext/apex_src/tests/test_extensions.c +374 -0
- data/ext/apex_ext/apex_src/tests/test_metadata.c +68 -0
- data/ext/apex_ext/apex_src/tests/test_output.c +291 -2
- data/ext/apex_ext/apex_src/tests/test_runner.c +10 -0
- data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +1 -1
- data/ext/apex_ext/apex_src/tests/test_tables.c +17 -1
- data/lib/apex/version.rb +1 -1
- metadata +32 -2
- data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +0 -456
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
#include "apex/apex.h"
|
|
2
2
|
#include <stdlib.h>
|
|
3
3
|
#include <string.h>
|
|
4
|
+
#include <strings.h>
|
|
4
5
|
#include <stdio.h>
|
|
5
6
|
#include <ctype.h>
|
|
6
7
|
#include <sys/stat.h>
|
|
7
8
|
#include <unistd.h>
|
|
8
|
-
#include <libgen.h>
|
|
9
|
-
#include <time.h>
|
|
10
9
|
#include <sys/time.h>
|
|
11
10
|
|
|
12
11
|
/* cmark-gfm headers */
|
|
@@ -42,6 +41,11 @@
|
|
|
42
41
|
#include "extensions/fenced_divs.h"
|
|
43
42
|
#include "extensions/syntax_highlight.h"
|
|
44
43
|
#include "plugins.h"
|
|
44
|
+
#include "ast_json.h"
|
|
45
|
+
#include "apex/ast_markdown.h"
|
|
46
|
+
#include "apex/ast_terminal.h"
|
|
47
|
+
#include "apex/ast_man.h"
|
|
48
|
+
#include "filters_ast.h"
|
|
45
49
|
|
|
46
50
|
/* Custom renderer */
|
|
47
51
|
#include "html_renderer.h"
|
|
@@ -70,6 +74,185 @@ static char *apex_encode_hex_entities(const char *text, size_t len) {
|
|
|
70
74
|
return out;
|
|
71
75
|
}
|
|
72
76
|
|
|
77
|
+
/**
|
|
78
|
+
* Escape string for safe HTML attribute usage.
|
|
79
|
+
*/
|
|
80
|
+
static char *apex_escape_html_attr(const char *input) {
|
|
81
|
+
if (!input) return strdup("");
|
|
82
|
+
|
|
83
|
+
size_t len = strlen(input);
|
|
84
|
+
size_t max_len = len * 6 + 1; /* Worst case for " */
|
|
85
|
+
char *out = malloc(max_len);
|
|
86
|
+
if (!out) return NULL;
|
|
87
|
+
|
|
88
|
+
char *w = out;
|
|
89
|
+
for (const char *p = input; *p; p++) {
|
|
90
|
+
switch (*p) {
|
|
91
|
+
case '&':
|
|
92
|
+
memcpy(w, "&", 5);
|
|
93
|
+
w += 5;
|
|
94
|
+
break;
|
|
95
|
+
case '<':
|
|
96
|
+
memcpy(w, "<", 4);
|
|
97
|
+
w += 4;
|
|
98
|
+
break;
|
|
99
|
+
case '>':
|
|
100
|
+
memcpy(w, ">", 4);
|
|
101
|
+
w += 4;
|
|
102
|
+
break;
|
|
103
|
+
case '"':
|
|
104
|
+
memcpy(w, """, 6);
|
|
105
|
+
w += 6;
|
|
106
|
+
break;
|
|
107
|
+
case '\'':
|
|
108
|
+
memcpy(w, "'", 5);
|
|
109
|
+
w += 5;
|
|
110
|
+
break;
|
|
111
|
+
default:
|
|
112
|
+
*w++ = *p;
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
*w = '\0';
|
|
117
|
+
return out;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Normalize metadata key by removing spaces and lowercasing.
|
|
122
|
+
*/
|
|
123
|
+
static char *apex_normalize_meta_key(const char *key) {
|
|
124
|
+
if (!key) return NULL;
|
|
125
|
+
size_t len = strlen(key);
|
|
126
|
+
char *normalized = malloc(len + 1);
|
|
127
|
+
if (!normalized) return NULL;
|
|
128
|
+
|
|
129
|
+
char *out = normalized;
|
|
130
|
+
for (const char *in = key; *in; in++) {
|
|
131
|
+
if (!isspace((unsigned char)*in)) {
|
|
132
|
+
*out++ = (char)tolower((unsigned char)*in);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
*out = '\0';
|
|
136
|
+
return normalized;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Keys handled elsewhere (title/lang/css/html header/footer/etc.) should not
|
|
141
|
+
* be emitted as generic <meta name="..."> tags.
|
|
142
|
+
*/
|
|
143
|
+
static bool apex_skip_generic_meta_key(const char *key) {
|
|
144
|
+
char *normalized = apex_normalize_meta_key(key);
|
|
145
|
+
if (!normalized) return false;
|
|
146
|
+
|
|
147
|
+
static const char *skip_keys[] = {
|
|
148
|
+
"title",
|
|
149
|
+
"css",
|
|
150
|
+
"language",
|
|
151
|
+
"htmlheader",
|
|
152
|
+
"htmlfooter",
|
|
153
|
+
"htmlheaderlevel",
|
|
154
|
+
"baseheaderlevel",
|
|
155
|
+
"quoteslanguage",
|
|
156
|
+
NULL
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
bool skip = false;
|
|
160
|
+
for (int i = 0; skip_keys[i]; i++) {
|
|
161
|
+
if (strcmp(normalized, skip_keys[i]) == 0) {
|
|
162
|
+
skip = true;
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
free(normalized);
|
|
167
|
+
return skip;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Render metadata list to newline-separated generic HTML meta tags.
|
|
172
|
+
*/
|
|
173
|
+
static char *apex_render_generic_meta_tags(apex_metadata_item *metadata) {
|
|
174
|
+
if (!metadata) return NULL;
|
|
175
|
+
|
|
176
|
+
size_t capacity = 256;
|
|
177
|
+
size_t used = 0;
|
|
178
|
+
char *out = malloc(capacity);
|
|
179
|
+
if (!out) return NULL;
|
|
180
|
+
out[0] = '\0';
|
|
181
|
+
|
|
182
|
+
/* Metadata entries are prepended during parsing; reverse iteration restores
|
|
183
|
+
* source declaration order in generated head tags. */
|
|
184
|
+
size_t item_count = 0;
|
|
185
|
+
for (apex_metadata_item *it = metadata; it; it = it->next) item_count++;
|
|
186
|
+
if (item_count == 0) {
|
|
187
|
+
free(out);
|
|
188
|
+
return NULL;
|
|
189
|
+
}
|
|
190
|
+
apex_metadata_item **items = malloc(item_count * sizeof(apex_metadata_item *));
|
|
191
|
+
if (!items) {
|
|
192
|
+
free(out);
|
|
193
|
+
return NULL;
|
|
194
|
+
}
|
|
195
|
+
size_t item_index = 0;
|
|
196
|
+
for (apex_metadata_item *it = metadata; it; it = it->next) {
|
|
197
|
+
items[item_index++] = it;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
for (size_t i = item_count; i > 0; i--) {
|
|
201
|
+
apex_metadata_item *item = items[i - 1];
|
|
202
|
+
if (!item->key || !item->value || apex_skip_generic_meta_key(item->key)) {
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
char *escaped_key = apex_escape_html_attr(item->key);
|
|
207
|
+
char *escaped_value = apex_escape_html_attr(item->value);
|
|
208
|
+
if (!escaped_key || !escaped_value) {
|
|
209
|
+
if (escaped_key) free(escaped_key);
|
|
210
|
+
if (escaped_value) free(escaped_value);
|
|
211
|
+
free(items);
|
|
212
|
+
free(out);
|
|
213
|
+
return NULL;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
size_t needed = strlen(escaped_key) + strlen(escaped_value) + 36;
|
|
217
|
+
if (used + needed + 1 > capacity) {
|
|
218
|
+
size_t new_capacity = capacity * 2;
|
|
219
|
+
while (used + needed + 1 > new_capacity) {
|
|
220
|
+
new_capacity *= 2;
|
|
221
|
+
}
|
|
222
|
+
char *new_out = realloc(out, new_capacity);
|
|
223
|
+
if (!new_out) {
|
|
224
|
+
free(escaped_key);
|
|
225
|
+
free(escaped_value);
|
|
226
|
+
free(items);
|
|
227
|
+
free(out);
|
|
228
|
+
return NULL;
|
|
229
|
+
}
|
|
230
|
+
out = new_out;
|
|
231
|
+
capacity = new_capacity;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
int written = snprintf(out + used, capacity - used,
|
|
235
|
+
" <meta name=\"%s\" content=\"%s\"/>\n",
|
|
236
|
+
escaped_key, escaped_value);
|
|
237
|
+
free(escaped_key);
|
|
238
|
+
free(escaped_value);
|
|
239
|
+
if (written < 0) {
|
|
240
|
+
free(items);
|
|
241
|
+
free(out);
|
|
242
|
+
return NULL;
|
|
243
|
+
}
|
|
244
|
+
used += (size_t)written;
|
|
245
|
+
}
|
|
246
|
+
free(items);
|
|
247
|
+
|
|
248
|
+
if (used == 0) {
|
|
249
|
+
free(out);
|
|
250
|
+
return NULL;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return out;
|
|
254
|
+
}
|
|
255
|
+
|
|
73
256
|
/**
|
|
74
257
|
* Base64 encode binary data
|
|
75
258
|
* Caller must free the returned buffer.
|
|
@@ -526,9 +709,36 @@ static char *apex_preprocess_autolinks(const char *text, const apex_options *opt
|
|
|
526
709
|
continue;
|
|
527
710
|
}
|
|
528
711
|
|
|
529
|
-
/*
|
|
712
|
+
/* At start of line: handle reference definitions and indented code blocks */
|
|
530
713
|
if (r == text || r[-1] == '\n') {
|
|
531
714
|
const char *line_start = r;
|
|
715
|
+
|
|
716
|
+
/* First: skip indented code blocks (4+ spaces or a leading tab) entirely */
|
|
717
|
+
int indent_spaces = 0;
|
|
718
|
+
while (*line_start == ' ' && indent_spaces < 4) {
|
|
719
|
+
line_start++;
|
|
720
|
+
indent_spaces++;
|
|
721
|
+
}
|
|
722
|
+
if (indent_spaces == 4 || *line_start == '\t') {
|
|
723
|
+
const char *line_end = strchr(r, '\n');
|
|
724
|
+
if (!line_end) line_end = r + strlen(r);
|
|
725
|
+
size_t line_len = line_end - r;
|
|
726
|
+
if ((size_t)(w - out) + line_len + 1 > cap) {
|
|
727
|
+
size_t used = (size_t)(w - out);
|
|
728
|
+
cap = (used + line_len + 1) * 2;
|
|
729
|
+
char *new_out = realloc(out, cap);
|
|
730
|
+
if (!new_out) { free(out); return NULL; }
|
|
731
|
+
out = new_out;
|
|
732
|
+
w = out + used;
|
|
733
|
+
}
|
|
734
|
+
memcpy(w, r, line_len);
|
|
735
|
+
w += line_len;
|
|
736
|
+
r = line_end;
|
|
737
|
+
continue;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
/* Then: check for reference link definitions: [id]: URL */
|
|
741
|
+
line_start = r;
|
|
532
742
|
/* Skip leading whitespace */
|
|
533
743
|
while (*line_start == ' ' || *line_start == '\t') {
|
|
534
744
|
line_start++;
|
|
@@ -1293,19 +1503,49 @@ static char *apex_preprocess_table_captions(const char *text) {
|
|
|
1293
1503
|
}
|
|
1294
1504
|
table_check++;
|
|
1295
1505
|
}
|
|
1296
|
-
/* Check for : Caption format (Pandoc-style, only
|
|
1297
|
-
|
|
1506
|
+
/* Check for : Caption format (Pandoc-style, only in table context)
|
|
1507
|
+
* Require prev_line_was_table_row or in_table_section - NOT prev_line_was_blank alone.
|
|
1508
|
+
* prev_line_was_blank alone would wrongly convert "Term\n\n: definition 1" (def list) to caption. */
|
|
1509
|
+
if ((prev_line_was_table_row || in_table_section) &&
|
|
1510
|
+
!is_pandoc_caption_line) {
|
|
1511
|
+
const char *check = p;
|
|
1512
|
+
int spaces = 0;
|
|
1513
|
+
while (spaces < 3 && check < line_end && *check == ' ') {
|
|
1514
|
+
spaces++;
|
|
1515
|
+
check++;
|
|
1516
|
+
}
|
|
1517
|
+
if (check < line_end && *check == ':' &&
|
|
1518
|
+
(check + 1) < line_end &&
|
|
1519
|
+
(check[1] == ' ' || check[1] == '\t')) {
|
|
1520
|
+
is_colon_caption_line = true;
|
|
1521
|
+
}
|
|
1522
|
+
}
|
|
1523
|
+
} else {
|
|
1524
|
+
/* Check for : Caption BEFORE table (next non-blank line is a table row) */
|
|
1298
1525
|
const char *check = p;
|
|
1299
1526
|
int spaces = 0;
|
|
1300
1527
|
while (spaces < 3 && check < line_end && *check == ' ') {
|
|
1301
1528
|
spaces++;
|
|
1302
1529
|
check++;
|
|
1303
1530
|
}
|
|
1304
|
-
/* Must start with : followed by space or tab */
|
|
1305
1531
|
if (check < line_end && *check == ':' &&
|
|
1306
1532
|
(check + 1) < line_end &&
|
|
1307
1533
|
(check[1] == ' ' || check[1] == '\t')) {
|
|
1308
|
-
|
|
1534
|
+
/* Peek ahead: is next non-blank line a table row? */
|
|
1535
|
+
const char *next = line_end;
|
|
1536
|
+
if (next < text + len && *next == '\n') next++;
|
|
1537
|
+
if (next < text + len && *next == '\r') next++;
|
|
1538
|
+
while (next < text + len && (*next == '\n' || *next == '\r' || *next == ' ' || *next == '\t')) {
|
|
1539
|
+
if (*next == '\n' || *next == '\r') {
|
|
1540
|
+
next++;
|
|
1541
|
+
if (next < text + len && next[-1] == '\r' && *next == '\n') next++;
|
|
1542
|
+
} else {
|
|
1543
|
+
next++;
|
|
1544
|
+
}
|
|
1545
|
+
}
|
|
1546
|
+
if (next < text + len && *next == '|') {
|
|
1547
|
+
is_colon_caption_line = true;
|
|
1548
|
+
}
|
|
1309
1549
|
}
|
|
1310
1550
|
}
|
|
1311
1551
|
}
|
|
@@ -1427,10 +1667,9 @@ static char *apex_preprocess_table_captions(const char *text) {
|
|
|
1427
1667
|
*write++ = '\n';
|
|
1428
1668
|
}
|
|
1429
1669
|
}
|
|
1430
|
-
} else if (!in_code_block &&
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
/* Case 3: Pandoc-style ': Caption {#id .class}' -> convert to '[Caption {#id .class}]' */
|
|
1670
|
+
} else if (!in_code_block && is_colon_caption_line) {
|
|
1671
|
+
/* Case 3: Pandoc-style ': Caption {#id .class}' -> convert to '[Caption {#id .class}]'
|
|
1672
|
+
* Handles both: (a) after table, (b) before table (next line is | table row) */
|
|
1434
1673
|
/* Skip leading whitespace (up to 3 spaces) */
|
|
1435
1674
|
const char *caption_start = p;
|
|
1436
1675
|
int spaces = 0;
|
|
@@ -2474,11 +2713,14 @@ apex_options apex_options_default(void) {
|
|
|
2474
2713
|
opts.base_directory = NULL;
|
|
2475
2714
|
|
|
2476
2715
|
/* Output options */
|
|
2716
|
+
opts.output_format = APEX_OUTPUT_HTML; /* Default: HTML output */
|
|
2477
2717
|
opts.unsafe = true;
|
|
2478
2718
|
opts.validate_utf8 = true;
|
|
2479
2719
|
opts.github_pre_lang = true;
|
|
2480
2720
|
opts.standalone = false;
|
|
2481
2721
|
opts.pretty = false;
|
|
2722
|
+
opts.xhtml = false;
|
|
2723
|
+
opts.strict_xhtml = false;
|
|
2482
2724
|
opts.stylesheet_paths = NULL;
|
|
2483
2725
|
opts.stylesheet_count = 0;
|
|
2484
2726
|
opts.document_title = NULL;
|
|
@@ -2552,9 +2794,10 @@ apex_options apex_options_default(void) {
|
|
|
2552
2794
|
opts.enable_emoji_autocorrect = true; /* Enabled by default in unified mode */
|
|
2553
2795
|
|
|
2554
2796
|
/* Syntax highlighting options */
|
|
2555
|
-
opts.code_highlighter = NULL;
|
|
2556
|
-
opts.code_line_numbers = false;
|
|
2557
|
-
opts.highlight_language_only = false;
|
|
2797
|
+
opts.code_highlighter = NULL; /* Default: no external syntax highlighting */
|
|
2798
|
+
opts.code_line_numbers = false; /* Default: no line numbers */
|
|
2799
|
+
opts.highlight_language_only = false; /* Default: highlight all code blocks */
|
|
2800
|
+
opts.code_highlight_theme = NULL; /* Default: no explicit theme */
|
|
2558
2801
|
|
|
2559
2802
|
/* Marked / integration-specific options (unified defaults) */
|
|
2560
2803
|
opts.enable_widont = false;
|
|
@@ -2571,10 +2814,25 @@ apex_options apex_options_default(void) {
|
|
|
2571
2814
|
/* Source file information (used by plugins via APEX_FILE_PATH) */
|
|
2572
2815
|
opts.input_file_path = NULL;
|
|
2573
2816
|
|
|
2817
|
+
/* AST filter options (Pandoc-style JSON filters) */
|
|
2818
|
+
opts.ast_filter_commands = NULL;
|
|
2819
|
+
opts.ast_filter_count = 0;
|
|
2820
|
+
opts.ast_filter_strict = true; /* Default: fail fast on filter errors */
|
|
2821
|
+
|
|
2574
2822
|
/* Progress reporting */
|
|
2575
2823
|
opts.progress_callback = NULL;
|
|
2576
2824
|
opts.progress_user_data = NULL;
|
|
2577
2825
|
|
|
2826
|
+
/* Custom cmark extension callback */
|
|
2827
|
+
opts.cmark_init = NULL;
|
|
2828
|
+
opts.cmark_done = NULL;
|
|
2829
|
+
opts.cmark_user_data = NULL;
|
|
2830
|
+
|
|
2831
|
+
/* Terminal theme and width (for -t terminal/terminal256) */
|
|
2832
|
+
opts.theme_name = NULL;
|
|
2833
|
+
opts.terminal_width = 0;
|
|
2834
|
+
opts.paginate = false;
|
|
2835
|
+
|
|
2578
2836
|
return opts;
|
|
2579
2837
|
}
|
|
2580
2838
|
|
|
@@ -2860,13 +3118,7 @@ static void apex_register_extensions(cmark_parser *parser, const apex_options *o
|
|
|
2860
3118
|
}
|
|
2861
3119
|
}
|
|
2862
3120
|
|
|
2863
|
-
/* Definition lists (
|
|
2864
|
-
if (options->enable_definition_lists) {
|
|
2865
|
-
cmark_syntax_extension *deflist_ext = create_definition_list_extension();
|
|
2866
|
-
if (deflist_ext) {
|
|
2867
|
-
cmark_parser_attach_syntax_extension(parser, deflist_ext);
|
|
2868
|
-
}
|
|
2869
|
-
}
|
|
3121
|
+
/* Definition lists (one-line format: Term :: Definition) - handled by preprocessing only */
|
|
2870
3122
|
|
|
2871
3123
|
/* Advanced footnotes (block-level content support) */
|
|
2872
3124
|
if (options->enable_footnotes) {
|
|
@@ -3913,6 +4165,190 @@ static char *apex_apply_widont_to_headings(const char *html) {
|
|
|
3913
4165
|
return output;
|
|
3914
4166
|
}
|
|
3915
4167
|
|
|
4168
|
+
static const char *apex_find_unquoted_gt(const char *p, const char *end) {
|
|
4169
|
+
int quote = 0;
|
|
4170
|
+
while (p < end) {
|
|
4171
|
+
if (quote) {
|
|
4172
|
+
if (*p == quote) quote = 0;
|
|
4173
|
+
p++;
|
|
4174
|
+
continue;
|
|
4175
|
+
}
|
|
4176
|
+
if (*p == '"' || *p == '\'') {
|
|
4177
|
+
quote = *p;
|
|
4178
|
+
p++;
|
|
4179
|
+
continue;
|
|
4180
|
+
}
|
|
4181
|
+
if (*p == '>') return p;
|
|
4182
|
+
p++;
|
|
4183
|
+
}
|
|
4184
|
+
return NULL;
|
|
4185
|
+
}
|
|
4186
|
+
|
|
4187
|
+
static bool apex_html_void_element_name(const char *name, size_t nlen) {
|
|
4188
|
+
static const char *void_tags[] = {
|
|
4189
|
+
"area", "base", "br", "col", "embed", "hr", "img", "input",
|
|
4190
|
+
"link", "meta", "param", "source", "track", "wbr"
|
|
4191
|
+
};
|
|
4192
|
+
for (size_t t = 0; t < sizeof(void_tags) / sizeof(void_tags[0]); t++) {
|
|
4193
|
+
const char *v = void_tags[t];
|
|
4194
|
+
size_t vl = strlen(v);
|
|
4195
|
+
if (vl != nlen) continue;
|
|
4196
|
+
size_t j;
|
|
4197
|
+
for (j = 0; j < nlen; j++) {
|
|
4198
|
+
if (tolower((unsigned char)name[j]) != tolower((unsigned char)v[j])) break;
|
|
4199
|
+
}
|
|
4200
|
+
if (j == nlen) return true;
|
|
4201
|
+
}
|
|
4202
|
+
return false;
|
|
4203
|
+
}
|
|
4204
|
+
|
|
4205
|
+
static int apex_html_buf_append(char **outp, size_t *cap, size_t *olen, const char *s, size_t n) {
|
|
4206
|
+
while (*olen + n + 1 > *cap) {
|
|
4207
|
+
size_t new_cap = *cap ? *cap * 2 : 8192;
|
|
4208
|
+
char *nbuf = realloc(*outp, new_cap);
|
|
4209
|
+
if (!nbuf) return -1;
|
|
4210
|
+
*outp = nbuf;
|
|
4211
|
+
*cap = new_cap;
|
|
4212
|
+
}
|
|
4213
|
+
memcpy(*outp + *olen, s, n);
|
|
4214
|
+
*olen += n;
|
|
4215
|
+
(*outp)[*olen] = '\0';
|
|
4216
|
+
return 0;
|
|
4217
|
+
}
|
|
4218
|
+
|
|
4219
|
+
/**
|
|
4220
|
+
* Rewrite HTML void/empty elements to XML self-closing form (e.g. <br> -> <br />).
|
|
4221
|
+
* Skips contents of script, style, and HTML comments. Returns newly allocated string or NULL.
|
|
4222
|
+
*/
|
|
4223
|
+
static char *apex_html_apply_xhtml_void_tags(const char *html) {
|
|
4224
|
+
if (!html) return NULL;
|
|
4225
|
+
|
|
4226
|
+
size_t cap = strlen(html) * 2 + 256;
|
|
4227
|
+
if (cap < 8192) cap = 8192;
|
|
4228
|
+
char *out = malloc(cap);
|
|
4229
|
+
if (!out) return NULL;
|
|
4230
|
+
size_t olen = 0;
|
|
4231
|
+
|
|
4232
|
+
const char *r = html;
|
|
4233
|
+
const char *end = html + strlen(html);
|
|
4234
|
+
|
|
4235
|
+
while (r < end) {
|
|
4236
|
+
if (*r != '<') {
|
|
4237
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, 1) != 0) goto fail;
|
|
4238
|
+
r++;
|
|
4239
|
+
continue;
|
|
4240
|
+
}
|
|
4241
|
+
|
|
4242
|
+
/* Comment */
|
|
4243
|
+
if (r + 4 <= end && strncmp(r, "<!--", 4) == 0) {
|
|
4244
|
+
const char *ce = strstr(r + 4, "-->");
|
|
4245
|
+
if (!ce) {
|
|
4246
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
|
|
4247
|
+
break;
|
|
4248
|
+
}
|
|
4249
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(ce + 3 - r)) != 0) goto fail;
|
|
4250
|
+
r = ce + 3;
|
|
4251
|
+
continue;
|
|
4252
|
+
}
|
|
4253
|
+
|
|
4254
|
+
/* CDATA */
|
|
4255
|
+
if (r + 9 <= end && strncmp(r, "<![CDATA[", 9) == 0) {
|
|
4256
|
+
const char *ce = strstr(r + 9, "]]>");
|
|
4257
|
+
if (!ce) {
|
|
4258
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
|
|
4259
|
+
break;
|
|
4260
|
+
}
|
|
4261
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(ce + 3 - r)) != 0) goto fail;
|
|
4262
|
+
r = ce + 3;
|
|
4263
|
+
continue;
|
|
4264
|
+
}
|
|
4265
|
+
|
|
4266
|
+
/* script */
|
|
4267
|
+
if (r + 7 <= end && strncasecmp(r, "<script", 7) == 0) {
|
|
4268
|
+
const char *close = strcasestr(r + 7, "</script>");
|
|
4269
|
+
if (!close) {
|
|
4270
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
|
|
4271
|
+
break;
|
|
4272
|
+
}
|
|
4273
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(close + 9 - r)) != 0) goto fail;
|
|
4274
|
+
r = close + 9;
|
|
4275
|
+
continue;
|
|
4276
|
+
}
|
|
4277
|
+
|
|
4278
|
+
/* style */
|
|
4279
|
+
if (r + 6 <= end && strncasecmp(r, "<style", 6) == 0) {
|
|
4280
|
+
const char *close = strcasestr(r + 6, "</style>");
|
|
4281
|
+
if (!close) {
|
|
4282
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
|
|
4283
|
+
break;
|
|
4284
|
+
}
|
|
4285
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(close + 8 - r)) != 0) goto fail;
|
|
4286
|
+
r = close + 8;
|
|
4287
|
+
continue;
|
|
4288
|
+
}
|
|
4289
|
+
|
|
4290
|
+
/* Declaration <!...> */
|
|
4291
|
+
if (r + 1 < end && r[1] == '!') {
|
|
4292
|
+
const char *gt = apex_find_unquoted_gt(r, end);
|
|
4293
|
+
if (!gt) {
|
|
4294
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
|
|
4295
|
+
break;
|
|
4296
|
+
}
|
|
4297
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt + 1 - r)) != 0) goto fail;
|
|
4298
|
+
r = gt + 1;
|
|
4299
|
+
continue;
|
|
4300
|
+
}
|
|
4301
|
+
|
|
4302
|
+
/* Closing tag */
|
|
4303
|
+
if (r + 1 < end && r[1] == '/') {
|
|
4304
|
+
const char *gt = apex_find_unquoted_gt(r, end);
|
|
4305
|
+
if (!gt) {
|
|
4306
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
|
|
4307
|
+
break;
|
|
4308
|
+
}
|
|
4309
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt + 1 - r)) != 0) goto fail;
|
|
4310
|
+
r = gt + 1;
|
|
4311
|
+
continue;
|
|
4312
|
+
}
|
|
4313
|
+
|
|
4314
|
+
/* Opening tag: extract name */
|
|
4315
|
+
const char *name_start = r + 1;
|
|
4316
|
+
while (name_start < end && isspace((unsigned char)*name_start)) name_start++;
|
|
4317
|
+
const char *name_end = name_start;
|
|
4318
|
+
while (name_end < end && (isalnum((unsigned char)*name_end) || *name_end == '-' || *name_end == '_' || *name_end == ':')) {
|
|
4319
|
+
name_end++;
|
|
4320
|
+
}
|
|
4321
|
+
size_t name_len = (size_t)(name_end - name_start);
|
|
4322
|
+
|
|
4323
|
+
const char *gt = apex_find_unquoted_gt(r, end);
|
|
4324
|
+
if (!gt) {
|
|
4325
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(end - r)) != 0) goto fail;
|
|
4326
|
+
break;
|
|
4327
|
+
}
|
|
4328
|
+
|
|
4329
|
+
if (name_len > 0 && apex_html_void_element_name(name_start, name_len)) {
|
|
4330
|
+
const char *slash = gt - 1;
|
|
4331
|
+
while (slash > r && isspace((unsigned char)*slash)) slash--;
|
|
4332
|
+
if (slash >= r && *slash == '/') {
|
|
4333
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt + 1 - r)) != 0) goto fail;
|
|
4334
|
+
} else {
|
|
4335
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt - r)) != 0) goto fail;
|
|
4336
|
+
if (apex_html_buf_append(&out, &cap, &olen, " />", 3) != 0) goto fail;
|
|
4337
|
+
}
|
|
4338
|
+
r = gt + 1;
|
|
4339
|
+
continue;
|
|
4340
|
+
}
|
|
4341
|
+
|
|
4342
|
+
if (apex_html_buf_append(&out, &cap, &olen, r, (size_t)(gt + 1 - r)) != 0) goto fail;
|
|
4343
|
+
r = gt + 1;
|
|
4344
|
+
}
|
|
4345
|
+
|
|
4346
|
+
return out;
|
|
4347
|
+
fail:
|
|
4348
|
+
free(out);
|
|
4349
|
+
return NULL;
|
|
4350
|
+
}
|
|
4351
|
+
|
|
3916
4352
|
char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options *options) {
|
|
3917
4353
|
if (!markdown || len == 0) {
|
|
3918
4354
|
char *empty = malloc(1);
|
|
@@ -3934,6 +4370,15 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
3934
4370
|
/* Use local_opts for rest of function (mutable) - shadow the const parameter */
|
|
3935
4371
|
#define options (&local_opts)
|
|
3936
4372
|
|
|
4373
|
+
if (local_opts.strict_xhtml) {
|
|
4374
|
+
local_opts.xhtml = true;
|
|
4375
|
+
}
|
|
4376
|
+
|
|
4377
|
+
/* Man/man-html output: force disable smart typography so option names (e.g. --to) stay as literal -- */
|
|
4378
|
+
if (options->output_format == APEX_OUTPUT_MAN || options->output_format == APEX_OUTPUT_MAN_HTML) {
|
|
4379
|
+
local_opts.enable_smart_typography = false;
|
|
4380
|
+
}
|
|
4381
|
+
|
|
3937
4382
|
/* Extract metadata if enabled (preprocessing step) */
|
|
3938
4383
|
/* Safety check: ensure len doesn't exceed actual string length */
|
|
3939
4384
|
size_t actual_len = strlen(markdown);
|
|
@@ -3985,12 +4430,15 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
3985
4430
|
len, text_ptr, len > 200 ? "..." : "");
|
|
3986
4431
|
}
|
|
3987
4432
|
|
|
4433
|
+
/* Create deflist debug log as soon as conversion starts (so it exists even if we exit early or deflists are disabled) */
|
|
4434
|
+
apex_deflist_debug_touch(options->enable_definition_lists);
|
|
4435
|
+
|
|
3988
4436
|
if (options->mode == APEX_MODE_MULTIMARKDOWN ||
|
|
3989
4437
|
options->mode == APEX_MODE_KRAMDOWN ||
|
|
3990
4438
|
options->mode == APEX_MODE_UNIFIED) {
|
|
3991
4439
|
/* Extract metadata FIRST */
|
|
3992
4440
|
PROFILE_START(metadata);
|
|
3993
|
-
metadata =
|
|
4441
|
+
metadata = apex_extract_metadata_for_mode(&text_ptr, options->mode);
|
|
3994
4442
|
PROFILE_END(metadata);
|
|
3995
4443
|
if (getenv("APEX_DEBUG_PIPELINE")) {
|
|
3996
4444
|
size_t len = strlen(text_ptr);
|
|
@@ -4949,12 +5397,16 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
4949
5397
|
/* Register extensions based on mode and options */
|
|
4950
5398
|
apex_register_extensions(parser, options);
|
|
4951
5399
|
|
|
5400
|
+
if (options->cmark_init) {
|
|
5401
|
+
options->cmark_init(parser, options, cmark_opts, options->cmark_user_data);
|
|
5402
|
+
}
|
|
5403
|
+
|
|
4952
5404
|
/* Feed normalized text to parser */
|
|
4953
5405
|
if (getenv("APEX_DEBUG_PIPELINE")) {
|
|
4954
5406
|
fprintf(stderr, "[APEX_DEBUG] markdown to parse (len=%zu): %.350s%s\n",
|
|
4955
5407
|
text_len, text_ptr, text_len > 350 ? "..." : "");
|
|
4956
5408
|
}
|
|
4957
|
-
cmark_parser_feed(parser, text_ptr, text_len);
|
|
5409
|
+
cmark_parser_feed(parser, text_len ? text_ptr : "", text_len);
|
|
4958
5410
|
cmark_node *document = cmark_parser_finish(parser);
|
|
4959
5411
|
PROFILE_END(parsing);
|
|
4960
5412
|
|
|
@@ -4964,12 +5416,62 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
4964
5416
|
}
|
|
4965
5417
|
|
|
4966
5418
|
if (!document) {
|
|
5419
|
+
if (options->cmark_done) {
|
|
5420
|
+
options->cmark_done(parser, options, cmark_opts, options->cmark_user_data);
|
|
5421
|
+
}
|
|
4967
5422
|
cmark_parser_free(parser);
|
|
4968
5423
|
free(working_text);
|
|
4969
5424
|
apex_free_metadata(metadata);
|
|
4970
5425
|
return NULL;
|
|
4971
5426
|
}
|
|
4972
5427
|
|
|
5428
|
+
/* If output format is JSON, emit JSON right after parsing (before AST filters) */
|
|
5429
|
+
if (options->output_format == APEX_OUTPUT_JSON) {
|
|
5430
|
+
char *json = apex_cmark_to_pandoc_json(document, options);
|
|
5431
|
+
cmark_node_free(document);
|
|
5432
|
+
cmark_parser_free(parser);
|
|
5433
|
+
free(working_text);
|
|
5434
|
+
apex_free_metadata(metadata);
|
|
5435
|
+
/* Note: Preprocessing buffers are conditionally allocated and may not be in scope here.
|
|
5436
|
+
* This is acceptable as JSON output is typically used for debugging/inspection. */
|
|
5437
|
+
return json;
|
|
5438
|
+
}
|
|
5439
|
+
|
|
5440
|
+
/* Run AST-level filters (Pandoc-style JSON filters) before any */
|
|
5441
|
+
/* AST post-processing or rendering. */
|
|
5442
|
+
if (options->ast_filter_commands && options->ast_filter_count > 0) {
|
|
5443
|
+
/* Determine target format string for filters based on output format */
|
|
5444
|
+
const char *target_format = "html";
|
|
5445
|
+
if (options->output_format == APEX_OUTPUT_JSON ||
|
|
5446
|
+
options->output_format == APEX_OUTPUT_JSON_FILTERED) {
|
|
5447
|
+
target_format = "json";
|
|
5448
|
+
} else if (options->output_format == APEX_OUTPUT_MARKDOWN ||
|
|
5449
|
+
options->output_format == APEX_OUTPUT_MMD ||
|
|
5450
|
+
options->output_format == APEX_OUTPUT_COMMONMARK ||
|
|
5451
|
+
options->output_format == APEX_OUTPUT_KRAMDOWN ||
|
|
5452
|
+
options->output_format == APEX_OUTPUT_GFM) {
|
|
5453
|
+
target_format = "markdown";
|
|
5454
|
+
} else if (options->output_format == APEX_OUTPUT_TERMINAL ||
|
|
5455
|
+
options->output_format == APEX_OUTPUT_TERMINAL256) {
|
|
5456
|
+
target_format = "terminal";
|
|
5457
|
+
}
|
|
5458
|
+
cmark_node *filtered = apex_run_ast_filters(document, options, target_format);
|
|
5459
|
+
if (!filtered && options->ast_filter_strict) {
|
|
5460
|
+
cmark_node_free(document);
|
|
5461
|
+
if (options->cmark_done) {
|
|
5462
|
+
options->cmark_done(parser, options, cmark_opts, options->cmark_user_data);
|
|
5463
|
+
}
|
|
5464
|
+
cmark_parser_free(parser);
|
|
5465
|
+
free(working_text);
|
|
5466
|
+
apex_free_metadata(metadata);
|
|
5467
|
+
return NULL;
|
|
5468
|
+
}
|
|
5469
|
+
if (filtered && filtered != document) {
|
|
5470
|
+
cmark_node_free(document);
|
|
5471
|
+
document = filtered;
|
|
5472
|
+
}
|
|
5473
|
+
}
|
|
5474
|
+
|
|
4973
5475
|
/* Postprocess wiki links if enabled */
|
|
4974
5476
|
if (options->enable_wiki_links) {
|
|
4975
5477
|
/* Fast path: skip AST walk if no wiki link markers present */
|
|
@@ -4990,20 +5492,10 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
4990
5492
|
apex_process_callouts_in_tree(document);
|
|
4991
5493
|
}
|
|
4992
5494
|
|
|
4993
|
-
/* Process
|
|
4994
|
-
|
|
4995
|
-
|
|
4996
|
-
|
|
4997
|
-
while ((event = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
|
4998
|
-
cmark_node *node = cmark_iter_get_node(iter);
|
|
4999
|
-
if (event == CMARK_EVENT_ENTER && cmark_node_get_type(node) == CMARK_NODE_HEADING) {
|
|
5000
|
-
apex_process_manual_header_id(node);
|
|
5001
|
-
}
|
|
5002
|
-
}
|
|
5003
|
-
cmark_iter_free(iter);
|
|
5004
|
-
}
|
|
5005
|
-
|
|
5006
|
-
/* Process IAL (Inline Attribute Lists) if in Kramdown or Unified mode */
|
|
5495
|
+
/* Process IAL (Inline Attribute Lists) BEFORE manual header IDs.
|
|
5496
|
+
IAL handles {: #id}, {#id}, and {.class} - running first ensures these
|
|
5497
|
+
are extracted and removed from heading text before manual header ID
|
|
5498
|
+
looks for MMD [id] or Kramdown {#id}. Avoids duplicate handling. */
|
|
5007
5499
|
if (alds || options->mode == APEX_MODE_KRAMDOWN || options->mode == APEX_MODE_UNIFIED) {
|
|
5008
5500
|
/* Fast path: skip AST walk if no IAL markers present */
|
|
5009
5501
|
/* Check for both Kramdown-style ({:) and Pandoc-style ({# or {.) IALs */
|
|
@@ -5016,6 +5508,21 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5016
5508
|
}
|
|
5017
5509
|
}
|
|
5018
5510
|
|
|
5511
|
+
/* Process manual header IDs (MMD [id] and Kramdown {#id}) - after IAL
|
|
5512
|
+
so IAL's {#id} handling doesn't conflict; manual ID handles [id] and
|
|
5513
|
+
any {#id} IAL might have missed (e.g. in multi-child headings) */
|
|
5514
|
+
if (options->generate_header_ids) {
|
|
5515
|
+
cmark_iter *iter = cmark_iter_new(document);
|
|
5516
|
+
cmark_event_type event;
|
|
5517
|
+
while ((event = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
|
5518
|
+
cmark_node *node = cmark_iter_get_node(iter);
|
|
5519
|
+
if (event == CMARK_EVENT_ENTER && cmark_node_get_type(node) == CMARK_NODE_HEADING) {
|
|
5520
|
+
apex_process_manual_header_id(node);
|
|
5521
|
+
}
|
|
5522
|
+
}
|
|
5523
|
+
cmark_iter_free(iter);
|
|
5524
|
+
}
|
|
5525
|
+
|
|
5019
5526
|
/* Apply image attributes to image nodes */
|
|
5020
5527
|
if (img_attrs) {
|
|
5021
5528
|
PROFILE_START(image_attrs);
|
|
@@ -5030,6 +5537,56 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5030
5537
|
|
|
5031
5538
|
/* Note: Critic Markup is now handled via preprocessing (before parsing) */
|
|
5032
5539
|
|
|
5540
|
+
/* If output format is JSON (after filters), serialize AST to JSON and return */
|
|
5541
|
+
if (options->output_format == APEX_OUTPUT_JSON_FILTERED) {
|
|
5542
|
+
char *json = apex_cmark_to_pandoc_json(document, options);
|
|
5543
|
+
/* Note: Cleanup happens at end of function - document and other resources
|
|
5544
|
+
* will be freed there. We return the JSON string here. */
|
|
5545
|
+
return json;
|
|
5546
|
+
}
|
|
5547
|
+
|
|
5548
|
+
/* If output format is Markdown, serialize AST to Markdown and return */
|
|
5549
|
+
if (options->output_format == APEX_OUTPUT_MARKDOWN ||
|
|
5550
|
+
options->output_format == APEX_OUTPUT_MMD ||
|
|
5551
|
+
options->output_format == APEX_OUTPUT_COMMONMARK ||
|
|
5552
|
+
options->output_format == APEX_OUTPUT_KRAMDOWN ||
|
|
5553
|
+
options->output_format == APEX_OUTPUT_GFM) {
|
|
5554
|
+
apex_markdown_dialect_t dialect;
|
|
5555
|
+
if (options->output_format == APEX_OUTPUT_MARKDOWN) {
|
|
5556
|
+
dialect = APEX_MD_DIALECT_UNIFIED;
|
|
5557
|
+
} else if (options->output_format == APEX_OUTPUT_MMD) {
|
|
5558
|
+
dialect = APEX_MD_DIALECT_MMD;
|
|
5559
|
+
} else if (options->output_format == APEX_OUTPUT_COMMONMARK) {
|
|
5560
|
+
dialect = APEX_MD_DIALECT_COMMONMARK;
|
|
5561
|
+
} else if (options->output_format == APEX_OUTPUT_KRAMDOWN) {
|
|
5562
|
+
dialect = APEX_MD_DIALECT_KRAMDOWN;
|
|
5563
|
+
} else { /* APEX_OUTPUT_GFM */
|
|
5564
|
+
dialect = APEX_MD_DIALECT_GFM;
|
|
5565
|
+
}
|
|
5566
|
+
char *markdown = apex_cmark_to_markdown(document, options, dialect);
|
|
5567
|
+
/* Note: Cleanup happens at end of function - document and other resources
|
|
5568
|
+
* will be freed there. We return the markdown string here. */
|
|
5569
|
+
return markdown;
|
|
5570
|
+
}
|
|
5571
|
+
|
|
5572
|
+
/* If output format is terminal/terminal256, serialize AST to ANSI terminal and return */
|
|
5573
|
+
if (options->output_format == APEX_OUTPUT_TERMINAL ||
|
|
5574
|
+
options->output_format == APEX_OUTPUT_TERMINAL256) {
|
|
5575
|
+
bool use_256 = (options->output_format == APEX_OUTPUT_TERMINAL256);
|
|
5576
|
+
char *tty = apex_cmark_to_terminal(document, options, use_256);
|
|
5577
|
+
return tty;
|
|
5578
|
+
}
|
|
5579
|
+
|
|
5580
|
+
/* If output format is man (roff) or man-html, serialize AST and return */
|
|
5581
|
+
if (options->output_format == APEX_OUTPUT_MAN) {
|
|
5582
|
+
char *roff = apex_cmark_to_man_roff(document, options);
|
|
5583
|
+
return roff ? roff : strdup(".TH stub 1 \"\" \"\"\n");
|
|
5584
|
+
}
|
|
5585
|
+
if (options->output_format == APEX_OUTPUT_MAN_HTML) {
|
|
5586
|
+
char *man_html = apex_cmark_to_man_html(document, options);
|
|
5587
|
+
return man_html ? man_html : strdup("<!DOCTYPE html><html><body><p>stub</p></body></html>");
|
|
5588
|
+
}
|
|
5589
|
+
|
|
5033
5590
|
/* Render to HTML
|
|
5034
5591
|
* Use custom renderer when we have attributes (IAL, ALDs, or image attributes)
|
|
5035
5592
|
* Otherwise use standard renderer
|
|
@@ -5167,6 +5724,7 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5167
5724
|
char *html_footer_metadata = NULL;
|
|
5168
5725
|
char *language_metadata = NULL;
|
|
5169
5726
|
char *quotes_lang_metadata = NULL;
|
|
5727
|
+
char *generic_meta_tags = NULL;
|
|
5170
5728
|
int base_header_level = 1; /* Default is 1 */
|
|
5171
5729
|
|
|
5172
5730
|
if (metadata) {
|
|
@@ -5215,6 +5773,9 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5215
5773
|
base_header_level = (int)level;
|
|
5216
5774
|
}
|
|
5217
5775
|
}
|
|
5776
|
+
|
|
5777
|
+
/* Collect remaining metadata as generic head meta tags. */
|
|
5778
|
+
generic_meta_tags = apex_render_generic_meta_tags(metadata);
|
|
5218
5779
|
}
|
|
5219
5780
|
|
|
5220
5781
|
/* Adjust header levels and quote language based on metadata */
|
|
@@ -5240,6 +5801,21 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5240
5801
|
}
|
|
5241
5802
|
}
|
|
5242
5803
|
|
|
5804
|
+
/* Expand auto media (discover formats from filesystem for img with auto attribute).
|
|
5805
|
+
* Use base_directory when set (e.g. from file path or metadata); otherwise use "."
|
|
5806
|
+
* so auto expansion runs when piping stdin (images resolved relative to cwd). */
|
|
5807
|
+
if (html && strstr(html, "data-apex-replace-auto=1")) {
|
|
5808
|
+
PROFILE_START(expand_auto_media);
|
|
5809
|
+
const char *base = options->base_directory && options->base_directory[0]
|
|
5810
|
+
? options->base_directory : ".";
|
|
5811
|
+
char *expanded = apex_expand_auto_media(html, base);
|
|
5812
|
+
PROFILE_END(expand_auto_media);
|
|
5813
|
+
if (expanded) {
|
|
5814
|
+
free(html);
|
|
5815
|
+
html = expanded;
|
|
5816
|
+
}
|
|
5817
|
+
}
|
|
5818
|
+
|
|
5243
5819
|
/* Convert images to figures with captions (caption="..." always wraps; otherwise when enable_image_captions) */
|
|
5244
5820
|
if (html) {
|
|
5245
5821
|
PROFILE_START(image_captions);
|
|
@@ -5251,6 +5827,24 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5251
5827
|
}
|
|
5252
5828
|
}
|
|
5253
5829
|
|
|
5830
|
+
/* Strip redundant <p> around single <img> inside <figure> (e.g. from ::: >figure with "< ") */
|
|
5831
|
+
if (html) {
|
|
5832
|
+
char *stripped = apex_strip_figure_paragraph_wrapper(html);
|
|
5833
|
+
if (stripped) {
|
|
5834
|
+
free(html);
|
|
5835
|
+
html = stripped;
|
|
5836
|
+
}
|
|
5837
|
+
}
|
|
5838
|
+
|
|
5839
|
+
/* Strip <p> that wraps only a single block element (figure, video, picture) - invalid HTML5 */
|
|
5840
|
+
if (html) {
|
|
5841
|
+
char *stripped = apex_strip_block_paragraph_wrapper(html);
|
|
5842
|
+
if (stripped) {
|
|
5843
|
+
free(html);
|
|
5844
|
+
html = stripped;
|
|
5845
|
+
}
|
|
5846
|
+
}
|
|
5847
|
+
|
|
5254
5848
|
/* Inject header IDs if enabled */
|
|
5255
5849
|
if (options->generate_header_ids && html) {
|
|
5256
5850
|
PROFILE_START(header_ids);
|
|
@@ -5325,7 +5919,13 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5325
5919
|
/* Apply external syntax highlighting if requested */
|
|
5326
5920
|
if (options->code_highlighter && html) {
|
|
5327
5921
|
PROFILE_START(syntax_highlight);
|
|
5328
|
-
|
|
5922
|
+
bool ansi_out = (options->output_format == APEX_OUTPUT_TERMINAL || options->output_format == APEX_OUTPUT_TERMINAL256);
|
|
5923
|
+
char *highlighted = apex_apply_syntax_highlighting(html,
|
|
5924
|
+
options->code_highlighter,
|
|
5925
|
+
options->code_line_numbers,
|
|
5926
|
+
options->highlight_language_only,
|
|
5927
|
+
ansi_out,
|
|
5928
|
+
options->code_highlight_theme);
|
|
5329
5929
|
PROFILE_END(syntax_highlight);
|
|
5330
5930
|
if (highlighted && highlighted != html) {
|
|
5331
5931
|
free(html);
|
|
@@ -5480,6 +6080,9 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5480
6080
|
|
|
5481
6081
|
/* Clean up */
|
|
5482
6082
|
cmark_node_free(document);
|
|
6083
|
+
if (options->cmark_done) {
|
|
6084
|
+
options->cmark_done(parser, options, cmark_opts, options->cmark_user_data);
|
|
6085
|
+
}
|
|
5483
6086
|
cmark_parser_free(parser);
|
|
5484
6087
|
free(working_text);
|
|
5485
6088
|
if (ial_preprocessed) free(ial_preprocessed);
|
|
@@ -5627,10 +6230,36 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5627
6230
|
|
|
5628
6231
|
const char *footer_to_use = footer_with_scripts ? footer_with_scripts : html_footer_metadata;
|
|
5629
6232
|
|
|
6233
|
+
/* Combine generated generic meta tags with any explicit HTML Header metadata. */
|
|
6234
|
+
char *combined_head_metadata = NULL;
|
|
6235
|
+
const char *head_to_use = html_header_metadata;
|
|
6236
|
+
if (generic_meta_tags || html_header_metadata) {
|
|
6237
|
+
size_t generic_len = generic_meta_tags ? strlen(generic_meta_tags) : 0;
|
|
6238
|
+
size_t header_len = html_header_metadata ? strlen(html_header_metadata) : 0;
|
|
6239
|
+
size_t newline_len = (generic_len > 0 && header_len > 0) ? 1 : 0;
|
|
6240
|
+
combined_head_metadata = malloc(generic_len + newline_len + header_len + 1);
|
|
6241
|
+
if (combined_head_metadata) {
|
|
6242
|
+
size_t pos = 0;
|
|
6243
|
+
if (generic_len > 0) {
|
|
6244
|
+
memcpy(combined_head_metadata + pos, generic_meta_tags, generic_len);
|
|
6245
|
+
pos += generic_len;
|
|
6246
|
+
}
|
|
6247
|
+
if (newline_len) {
|
|
6248
|
+
combined_head_metadata[pos++] = '\n';
|
|
6249
|
+
}
|
|
6250
|
+
if (header_len > 0) {
|
|
6251
|
+
memcpy(combined_head_metadata + pos, html_header_metadata, header_len);
|
|
6252
|
+
pos += header_len;
|
|
6253
|
+
}
|
|
6254
|
+
combined_head_metadata[pos] = '\0';
|
|
6255
|
+
head_to_use = combined_head_metadata;
|
|
6256
|
+
}
|
|
6257
|
+
}
|
|
6258
|
+
|
|
5630
6259
|
PROFILE_START(standalone_wrap);
|
|
5631
6260
|
char *document = apex_wrap_html_document(html, local_opts.document_title, css_paths, css_count,
|
|
5632
|
-
local_opts.code_highlighter,
|
|
5633
|
-
language_metadata);
|
|
6261
|
+
local_opts.code_highlighter, head_to_use, footer_to_use,
|
|
6262
|
+
language_metadata, local_opts.strict_xhtml);
|
|
5634
6263
|
PROFILE_END(standalone_wrap);
|
|
5635
6264
|
|
|
5636
6265
|
/* Free temporary metadata stylesheet array if we allocated it */
|
|
@@ -5645,6 +6274,9 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5645
6274
|
if (footer_with_scripts) {
|
|
5646
6275
|
free(footer_with_scripts);
|
|
5647
6276
|
}
|
|
6277
|
+
if (combined_head_metadata) {
|
|
6278
|
+
free(combined_head_metadata);
|
|
6279
|
+
}
|
|
5648
6280
|
|
|
5649
6281
|
/* If requested, replace stylesheet links with embedded CSS contents */
|
|
5650
6282
|
if (html && css_paths && css_count > 0 && local_opts.embed_stylesheet) {
|
|
@@ -5773,6 +6405,7 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5773
6405
|
if (html_footer_metadata) free(html_footer_metadata);
|
|
5774
6406
|
if (language_metadata) free(language_metadata);
|
|
5775
6407
|
if (quotes_lang_metadata) free(quotes_lang_metadata);
|
|
6408
|
+
if (generic_meta_tags) free(generic_meta_tags);
|
|
5776
6409
|
if (h1_title) free(h1_title);
|
|
5777
6410
|
|
|
5778
6411
|
/* Remove blank lines within tables (applies to both pretty and non-pretty) */
|
|
@@ -5810,6 +6443,17 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5810
6443
|
}
|
|
5811
6444
|
}
|
|
5812
6445
|
|
|
6446
|
+
/* XHTML-style void elements (--xhtml / --strict-xhtml); run after pretty-print (HTML only) */
|
|
6447
|
+
if (local_opts.xhtml && html && options->output_format == APEX_OUTPUT_HTML) {
|
|
6448
|
+
PROFILE_START(xhtml_void_tags);
|
|
6449
|
+
char *xhtml_out = apex_html_apply_xhtml_void_tags(html);
|
|
6450
|
+
PROFILE_END(xhtml_void_tags);
|
|
6451
|
+
if (xhtml_out) {
|
|
6452
|
+
free(html);
|
|
6453
|
+
html = xhtml_out;
|
|
6454
|
+
}
|
|
6455
|
+
}
|
|
6456
|
+
|
|
5813
6457
|
PROFILE_END(total);
|
|
5814
6458
|
|
|
5815
6459
|
if (profiling_enabled()) {
|
|
@@ -5822,7 +6466,7 @@ char *apex_markdown_to_html(const char *markdown, size_t len, const apex_options
|
|
|
5822
6466
|
/**
|
|
5823
6467
|
* Wrap HTML content in complete HTML5 document structure
|
|
5824
6468
|
*/
|
|
5825
|
-
char *apex_wrap_html_document(const char *content, const char *title, const char **stylesheet_paths, size_t stylesheet_count, const char *code_highlighter, const char *html_header, const char *html_footer, const char *language) {
|
|
6469
|
+
char *apex_wrap_html_document(const char *content, const char *title, const char **stylesheet_paths, size_t stylesheet_count, const char *code_highlighter, const char *html_header, const char *html_footer, const char *language, bool strict_xhtml) {
|
|
5826
6470
|
if (!content) return NULL;
|
|
5827
6471
|
|
|
5828
6472
|
const char *doc_title = title ? title : "Document";
|
|
@@ -5906,10 +6550,19 @@ char *apex_wrap_html_document(const char *content, const char *title, const char
|
|
|
5906
6550
|
const char *version_str = APEX_VERSION_STRING;
|
|
5907
6551
|
if (!version_str) version_str = "unknown";
|
|
5908
6552
|
|
|
5909
|
-
/* HTML5 doctype and opening */
|
|
6553
|
+
/* HTML5 doctype and opening (polyglot XHTML when strict_xhtml) */
|
|
5910
6554
|
/* Add body class if code highlighting is enabled */
|
|
5911
6555
|
const char *body_class = code_highlighter ? " class=\"code-highlighted\"" : "";
|
|
5912
|
-
int n
|
|
6556
|
+
int n;
|
|
6557
|
+
if (strict_xhtml) {
|
|
6558
|
+
n = snprintf(write, remaining,
|
|
6559
|
+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
|
6560
|
+
"<!DOCTYPE html>\n"
|
|
6561
|
+
"<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"%s\" lang=\"%s\">\n<head>\n",
|
|
6562
|
+
lang, lang);
|
|
6563
|
+
} else {
|
|
6564
|
+
n = snprintf(write, remaining, "<!DOCTYPE html>\n<html lang=\"%s\">\n<head>\n", lang);
|
|
6565
|
+
}
|
|
5913
6566
|
if (n < 0 || (size_t)n >= remaining) {
|
|
5914
6567
|
free(output);
|
|
5915
6568
|
return strdup(content);
|
|
@@ -5918,7 +6571,12 @@ char *apex_wrap_html_document(const char *content, const char *title, const char
|
|
|
5918
6571
|
remaining -= n;
|
|
5919
6572
|
|
|
5920
6573
|
/* Meta tags */
|
|
5921
|
-
|
|
6574
|
+
if (strict_xhtml) {
|
|
6575
|
+
n = snprintf(write, remaining,
|
|
6576
|
+
" <meta http-equiv=\"Content-Type\" content=\"application/xhtml+xml; charset=UTF-8\" />\n");
|
|
6577
|
+
} else {
|
|
6578
|
+
n = snprintf(write, remaining, " <meta charset=\"UTF-8\">\n");
|
|
6579
|
+
}
|
|
5922
6580
|
if (n < 0 || (size_t)n >= remaining) {
|
|
5923
6581
|
free(output);
|
|
5924
6582
|
return strdup(content);
|