apex-ruby 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/apex_ext/apex_ext.c +6 -0
- data/ext/apex_ext/apex_src/AGENTS.md +41 -0
- data/ext/apex_ext/apex_src/CHANGELOG.md +412 -2
- data/ext/apex_ext/apex_src/CMakeLists.txt +41 -29
- data/ext/apex_ext/apex_src/Formula/apex.rb +2 -2
- data/ext/apex_ext/apex_src/Package.swift +9 -0
- data/ext/apex_ext/apex_src/README.md +31 -9
- data/ext/apex_ext/apex_src/ROADMAP.md +5 -0
- data/ext/apex_ext/apex_src/VERSION +1 -1
- data/ext/apex_ext/apex_src/cli/main.c +1125 -13
- data/ext/apex_ext/apex_src/docs/index.md +459 -0
- data/ext/apex_ext/apex_src/include/apex/apex.h +67 -5
- data/ext/apex_ext/apex_src/include/apex/ast_man.h +20 -0
- data/ext/apex_ext/apex_src/include/apex/ast_markdown.h +39 -0
- data/ext/apex_ext/apex_src/include/apex/ast_terminal.h +40 -0
- data/ext/apex_ext/apex_src/include/apex/module.modulemap +1 -1
- data/ext/apex_ext/apex_src/man/apex-config.5 +333 -258
- data/ext/apex_ext/apex_src/man/apex-config.5.md +3 -1
- data/ext/apex_ext/apex_src/man/apex-plugins.7 +401 -316
- data/ext/apex_ext/apex_src/man/apex.1 +663 -620
- data/ext/apex_ext/apex_src/man/apex.1.html +703 -0
- data/ext/apex_ext/apex_src/man/apex.1.md +160 -90
- data/ext/apex_ext/apex_src/objc/Apex.swift +6 -0
- data/ext/apex_ext/apex_src/objc/NSString+Apex.h +12 -0
- data/ext/apex_ext/apex_src/objc/NSString+Apex.m +9 -0
- data/ext/apex_ext/apex_src/pages/index.md +459 -0
- data/ext/apex_ext/apex_src/src/_README.md +4 -4
- data/ext/apex_ext/apex_src/src/apex.c +702 -44
- data/ext/apex_ext/apex_src/src/ast_json.c +1130 -0
- data/ext/apex_ext/apex_src/src/ast_json.h +46 -0
- data/ext/apex_ext/apex_src/src/ast_man.c +948 -0
- data/ext/apex_ext/apex_src/src/ast_markdown.c +409 -0
- data/ext/apex_ext/apex_src/src/ast_terminal.c +2516 -0
- data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +8 -5
- data/ext/apex_ext/apex_src/src/extensions/definition_list.c +491 -1514
- data/ext/apex_ext/apex_src/src/extensions/definition_list.h +8 -15
- data/ext/apex_ext/apex_src/src/extensions/emoji.c +207 -0
- data/ext/apex_ext/apex_src/src/extensions/emoji.h +14 -0
- data/ext/apex_ext/apex_src/src/extensions/header_ids.c +178 -71
- data/ext/apex_ext/apex_src/src/extensions/highlight.c +37 -5
- data/ext/apex_ext/apex_src/src/extensions/ial.c +416 -47
- data/ext/apex_ext/apex_src/src/extensions/includes.c +241 -10
- data/ext/apex_ext/apex_src/src/extensions/includes.h +1 -0
- data/ext/apex_ext/apex_src/src/extensions/metadata.c +166 -3
- data/ext/apex_ext/apex_src/src/extensions/metadata.h +7 -0
- data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +34 -3
- data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +55 -10
- data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +7 -4
- data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +84 -52
- data/ext/apex_ext/apex_src/src/extensions/toc.c +133 -19
- data/ext/apex_ext/apex_src/src/filters_ast.c +194 -0
- data/ext/apex_ext/apex_src/src/filters_ast.h +36 -0
- data/ext/apex_ext/apex_src/src/html_renderer.c +1265 -35
- data/ext/apex_ext/apex_src/src/html_renderer.h +21 -0
- data/ext/apex_ext/apex_src/src/plugins_remote.c +40 -14
- data/ext/apex_ext/apex_src/tests/CMakeLists.txt +1 -0
- data/ext/apex_ext/apex_src/tests/README.md +11 -5
- data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +13 -2
- data/ext/apex_ext/apex_src/tests/fixtures/filters/filter_output_with_rawblock.json +1 -0
- data/ext/apex_ext/apex_src/tests/fixtures/filters/unwrap.md +7 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/auto-wildcard.md +8 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.avif +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.jpg +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.webp +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.avif +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.jpg +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.webp +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/media_formats_test.md +63 -0
- data/ext/apex_ext/apex_src/tests/fixtures/includes/data-semi.csv +3 -0
- data/ext/apex_ext/apex_src/tests/fixtures/includes/with space.txt +1 -0
- data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +4 -1
- data/ext/apex_ext/apex_src/tests/paginate_cli_test.sh +64 -0
- data/ext/apex_ext/apex_src/tests/terminal_width_test.sh +29 -0
- data/ext/apex_ext/apex_src/tests/test-swift-package.sh +14 -0
- data/ext/apex_ext/apex_src/tests/test_cmark_callback.c +189 -0
- data/ext/apex_ext/apex_src/tests/test_extensions.c +374 -0
- data/ext/apex_ext/apex_src/tests/test_metadata.c +68 -0
- data/ext/apex_ext/apex_src/tests/test_output.c +291 -2
- data/ext/apex_ext/apex_src/tests/test_runner.c +10 -0
- data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +1 -1
- data/ext/apex_ext/apex_src/tests/test_tables.c +17 -1
- data/lib/apex/version.rb +1 -1
- metadata +32 -2
- data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +0 -456
|
@@ -1,19 +1,24 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Definition List Extension for Apex
|
|
3
|
-
* Implementation
|
|
4
3
|
*
|
|
5
|
-
* Supports
|
|
6
|
-
* Term
|
|
7
|
-
* : Definition 1
|
|
8
|
-
* : Definition 2
|
|
4
|
+
* Supports four formats (all produce <dl><dt>term</dt><dd>definition</dd></dl>):
|
|
9
5
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
6
|
+
* 1. Kramdown single colon:
|
|
7
|
+
* term
|
|
8
|
+
* : definition
|
|
13
9
|
*
|
|
14
|
-
*
|
|
10
|
+
* 2. Kramdown double colon:
|
|
11
|
+
* term
|
|
12
|
+
* :: definition
|
|
15
13
|
*
|
|
16
|
-
*
|
|
14
|
+
* 3. One-line no space: term::definition
|
|
15
|
+
*
|
|
16
|
+
* 4. One-line with space: term :: definition
|
|
17
|
+
*
|
|
18
|
+
* For one-line format, :: must NOT be at line start (that's Kramdown).
|
|
19
|
+
* Whitespace around :: is allowed in one-line format.
|
|
20
|
+
*
|
|
21
|
+
* Both formats enabled by default in unified mode.
|
|
17
22
|
*/
|
|
18
23
|
|
|
19
24
|
#include "definition_list.h"
|
|
@@ -24,1647 +29,619 @@
|
|
|
24
29
|
#include "render.h"
|
|
25
30
|
#include <string.h>
|
|
26
31
|
#include <stdlib.h>
|
|
27
|
-
#include <ctype.h>
|
|
28
32
|
#include <stdbool.h>
|
|
29
|
-
#include <stdio.h>
|
|
30
|
-
|
|
31
|
-
/* Node type IDs */
|
|
32
|
-
cmark_node_type APEX_NODE_DEFINITION_LIST;
|
|
33
|
-
cmark_node_type APEX_NODE_DEFINITION_TERM;
|
|
34
|
-
cmark_node_type APEX_NODE_DEFINITION_DATA;
|
|
35
33
|
|
|
36
34
|
/**
|
|
37
|
-
* Check if a line
|
|
35
|
+
* Check if a line matches the one-line definition format: Term :: Definition
|
|
36
|
+
* The line must contain :: with optional whitespace around it.
|
|
37
|
+
* Uses the last :: to avoid splitting URLs (e.g. http://example.com).
|
|
38
|
+
* Returns the position of :: or -1 if not a match.
|
|
38
39
|
*/
|
|
39
|
-
static
|
|
40
|
-
if (!
|
|
41
|
-
|
|
42
|
-
int
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
40
|
+
static int find_def_separator(const unsigned char *line, int len) {
|
|
41
|
+
if (!line || len < 3) return -1;
|
|
42
|
+
|
|
43
|
+
int last_sep = -1;
|
|
44
|
+
for (int i = 0; i < len - 1; i++) {
|
|
45
|
+
if (line[i] == ':' && line[i + 1] == ':') {
|
|
46
|
+
/* Skip :: that's part of URL (://) */
|
|
47
|
+
if (i + 3 <= len && line[i + 2] == '/') continue;
|
|
48
|
+
/* Skip :: that's part of div/custom element (::: or more) */
|
|
49
|
+
if (i > 0 && line[i - 1] == ':') continue;
|
|
50
|
+
if (i + 2 < len && line[i + 2] == ':') continue;
|
|
51
|
+
last_sep = i;
|
|
52
|
+
}
|
|
47
53
|
}
|
|
54
|
+
if (last_sep < 0) return -1;
|
|
48
55
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
56
|
+
/* Ensure we have content before (at least one non-space) */
|
|
57
|
+
int before = 0;
|
|
58
|
+
for (int j = 0; j < last_sep; j++) {
|
|
59
|
+
if (line[j] != ' ' && line[j] != '\t') {
|
|
60
|
+
before = 1;
|
|
61
|
+
break;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
/* After: at least one character */
|
|
65
|
+
int after = (last_sep + 2 < len);
|
|
66
|
+
if (before && after) return last_sep;
|
|
67
|
+
return -1;
|
|
68
|
+
}
|
|
57
69
|
|
|
58
|
-
|
|
70
|
+
/**
|
|
71
|
+
* Check if a line is a Kramdown-style definition line (starts with : or :: after optional spaces).
|
|
72
|
+
* Reject ::: or more - those are div/custom element fences, not definition lists.
|
|
73
|
+
*/
|
|
74
|
+
static bool is_kramdown_def_line(const char *line, size_t len) {
|
|
75
|
+
if (!line || len == 0) return false;
|
|
76
|
+
size_t i = 0;
|
|
77
|
+
while (i < len && (line[i] == ' ' || line[i] == '\t')) i++;
|
|
78
|
+
if (i >= len) return false;
|
|
79
|
+
if (line[i] != ':') return false;
|
|
80
|
+
int colon_len = 1;
|
|
81
|
+
if (i + 2 <= len && line[i + 1] == ':') colon_len = 2;
|
|
82
|
+
/* Reject 3+ colons (::: is div fence) */
|
|
83
|
+
if (i + 3 <= len && line[i + 2] == ':') return false;
|
|
84
|
+
if (i + (size_t)colon_len >= len) return false;
|
|
85
|
+
if (line[i + colon_len] != ' ' && line[i + colon_len] != '\t') return false;
|
|
59
86
|
return true;
|
|
60
87
|
}
|
|
61
88
|
|
|
62
89
|
/**
|
|
63
|
-
*
|
|
64
|
-
*
|
|
65
|
-
* Caller must free each string and the array itself
|
|
90
|
+
* Check if a line looks like a table row (starts with | after optional indent).
|
|
91
|
+
* Used to avoid treating : Caption as a definition when it's a table caption.
|
|
66
92
|
*/
|
|
67
|
-
static
|
|
68
|
-
if (!
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
char **ids = malloc(capacity * sizeof(char*));
|
|
73
|
-
if (!ids) return NULL;
|
|
74
|
-
|
|
75
|
-
const char *p = text;
|
|
76
|
-
while (*p) {
|
|
77
|
-
if (*p == '[') {
|
|
78
|
-
const char *text_start = p + 1;
|
|
79
|
-
const char *text_end = strchr(text_start, ']');
|
|
80
|
-
if (text_end) {
|
|
81
|
-
if (text_end[1] == '[' && text_end[2] == ']') {
|
|
82
|
-
/* Found shortcut reference [text][] - use text as the ID */
|
|
83
|
-
size_t text_len = text_end - text_start;
|
|
84
|
-
if (text_len > 0) {
|
|
85
|
-
char *id = malloc(text_len + 1);
|
|
86
|
-
if (id) {
|
|
87
|
-
memcpy(id, text_start, text_len);
|
|
88
|
-
id[text_len] = '\0';
|
|
89
|
-
|
|
90
|
-
/* Check if we already have this ID */
|
|
91
|
-
bool found = false;
|
|
92
|
-
for (size_t i = 0; i < *count; i++) {
|
|
93
|
-
if (strcmp(ids[i], id) == 0) {
|
|
94
|
-
found = true;
|
|
95
|
-
free(id);
|
|
96
|
-
break;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
if (!found) {
|
|
101
|
-
/* Add to array */
|
|
102
|
-
if (*count >= capacity) {
|
|
103
|
-
capacity *= 2;
|
|
104
|
-
char **new_ids = realloc(ids, capacity * sizeof(char*));
|
|
105
|
-
if (!new_ids) {
|
|
106
|
-
free(id);
|
|
107
|
-
break;
|
|
108
|
-
}
|
|
109
|
-
ids = new_ids;
|
|
110
|
-
}
|
|
111
|
-
ids[*count] = id;
|
|
112
|
-
(*count)++;
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
p = text_end + 3; /* Skip past ]] */
|
|
117
|
-
continue;
|
|
118
|
-
} else if (text_end[1] == '[') {
|
|
119
|
-
/* Found [text][ref] pattern */
|
|
120
|
-
const char *ref_start = text_end + 2;
|
|
121
|
-
const char *ref_end = strchr(ref_start, ']');
|
|
122
|
-
if (ref_end) {
|
|
123
|
-
/* Extract the reference ID */
|
|
124
|
-
size_t ref_len = ref_end - ref_start;
|
|
125
|
-
if (ref_len > 0) {
|
|
126
|
-
char *id = malloc(ref_len + 1);
|
|
127
|
-
if (id) {
|
|
128
|
-
memcpy(id, ref_start, ref_len);
|
|
129
|
-
id[ref_len] = '\0';
|
|
130
|
-
|
|
131
|
-
/* Check if we already have this ID */
|
|
132
|
-
bool found = false;
|
|
133
|
-
for (size_t i = 0; i < *count; i++) {
|
|
134
|
-
if (strcmp(ids[i], id) == 0) {
|
|
135
|
-
found = true;
|
|
136
|
-
free(id);
|
|
137
|
-
break;
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
if (!found) {
|
|
142
|
-
/* Add to array */
|
|
143
|
-
if (*count >= capacity) {
|
|
144
|
-
capacity *= 2;
|
|
145
|
-
char **new_ids = realloc(ids, capacity * sizeof(char*));
|
|
146
|
-
if (!new_ids) {
|
|
147
|
-
free(id);
|
|
148
|
-
break;
|
|
149
|
-
}
|
|
150
|
-
ids = new_ids;
|
|
151
|
-
}
|
|
152
|
-
ids[*count] = id;
|
|
153
|
-
(*count)++;
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
p = ref_end + 1;
|
|
158
|
-
continue;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
p++;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
/* Add NULL terminator */
|
|
167
|
-
if (*count >= capacity) {
|
|
168
|
-
char **new_ids = realloc(ids, (capacity + 1) * sizeof(char*));
|
|
169
|
-
if (new_ids) ids = new_ids;
|
|
170
|
-
}
|
|
171
|
-
if (ids) ids[*count] = NULL;
|
|
172
|
-
|
|
173
|
-
return ids;
|
|
93
|
+
static bool is_table_row_line(const char *line, size_t len) {
|
|
94
|
+
if (!line || len == 0) return false;
|
|
95
|
+
size_t i = 0;
|
|
96
|
+
while (i < len && (line[i] == ' ' || line[i] == '\t')) i++;
|
|
97
|
+
return i < len && line[i] == '|';
|
|
174
98
|
}
|
|
175
99
|
|
|
176
100
|
/**
|
|
177
|
-
*
|
|
178
|
-
* based on a list of reference IDs
|
|
179
|
-
* Returns a string containing only the needed definitions, or NULL if none found
|
|
180
|
-
* Caller must free the returned string
|
|
101
|
+
* Check if the next non-blank line after pos is a table row. Used for "caption before table".
|
|
181
102
|
*/
|
|
182
|
-
static
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
const char *p = all_refs;
|
|
192
|
-
while (*p) {
|
|
193
|
-
const char *line_start = p;
|
|
194
|
-
const char *line_end = strchr(p, '\n');
|
|
195
|
-
if (!line_end) line_end = p + strlen(p);
|
|
196
|
-
|
|
197
|
-
/* Skip leading whitespace */
|
|
198
|
-
const char *content_start = line_start;
|
|
199
|
-
while (content_start < line_end && (*content_start == ' ' || *content_start == '\t')) {
|
|
200
|
-
content_start++;
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
/* Check if this is a reference link definition: [id]: URL */
|
|
204
|
-
if (content_start < line_end && *content_start == '[') {
|
|
205
|
-
const char *id_end = strchr(content_start + 1, ']');
|
|
206
|
-
if (id_end && id_end < line_end && id_end[1] == ':') {
|
|
207
|
-
/* Extract the ID from this definition */
|
|
208
|
-
size_t def_id_len = id_end - (content_start + 1);
|
|
209
|
-
char *def_id = malloc(def_id_len + 1);
|
|
210
|
-
if (def_id) {
|
|
211
|
-
memcpy(def_id, content_start + 1, def_id_len);
|
|
212
|
-
def_id[def_id_len] = '\0';
|
|
213
|
-
|
|
214
|
-
/* Check if this ID is in our needed list */
|
|
215
|
-
bool needed = false;
|
|
216
|
-
for (size_t i = 0; needed_ids[i]; i++) {
|
|
217
|
-
if (strcmp(needed_ids[i], def_id) == 0) {
|
|
218
|
-
needed = true;
|
|
219
|
-
break;
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
if (needed) {
|
|
224
|
-
/* Include this definition */
|
|
225
|
-
size_t line_len = line_end - line_start;
|
|
226
|
-
if (line_end < p + strlen(p) && *line_end == '\n') {
|
|
227
|
-
line_len++; /* Include newline */
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
/* Expand buffer if needed */
|
|
231
|
-
if (result_len + line_len + 1 >= result_capacity) {
|
|
232
|
-
result_capacity = (result_len + line_len + 1) * 2;
|
|
233
|
-
char *new_result = realloc(result, result_capacity);
|
|
234
|
-
if (!new_result) {
|
|
235
|
-
free(def_id);
|
|
236
|
-
break;
|
|
237
|
-
}
|
|
238
|
-
result = new_result;
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
/* Copy the line */
|
|
242
|
-
memcpy(result + result_len, line_start, line_len);
|
|
243
|
-
result_len += line_len;
|
|
244
|
-
result[result_len] = '\0';
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
free(def_id);
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
/* Move to next line */
|
|
253
|
-
p = line_end;
|
|
254
|
-
if (*p == '\n') p++;
|
|
103
|
+
static bool next_nonblank_line_is_table(const char *pos, const char *text_end) {
|
|
104
|
+
while (pos < text_end) {
|
|
105
|
+
if (*pos == '\n') { pos++; continue; }
|
|
106
|
+
const char *line_end = strchr(pos, '\n');
|
|
107
|
+
if (!line_end) line_end = text_end;
|
|
108
|
+
const char *p = pos;
|
|
109
|
+
while (p < line_end && (*p == ' ' || *p == '\t')) p++;
|
|
110
|
+
if (p < line_end) return *p == '|';
|
|
111
|
+
pos = line_end + (line_end < text_end && *line_end == '\n' ? 1 : 0);
|
|
255
112
|
}
|
|
113
|
+
return false;
|
|
114
|
+
}
|
|
256
115
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
116
|
+
/** True if content at p looks like a list marker (- , * , + , or digit+. ) */
|
|
117
|
+
static bool looks_like_list_marker(const char *p) {
|
|
118
|
+
if (*p == '-' || *p == '*' || *p == '+')
|
|
119
|
+
return (p[1] == ' ' || p[1] == '\t');
|
|
120
|
+
if (isdigit((unsigned char)*p)) {
|
|
121
|
+
while (isdigit((unsigned char)*p)) p++;
|
|
122
|
+
return (*p == '.' && (p[1] == ' ' || p[1] == '\t'));
|
|
260
123
|
}
|
|
261
|
-
|
|
262
|
-
return result;
|
|
124
|
+
return false;
|
|
263
125
|
}
|
|
264
126
|
|
|
265
127
|
/**
|
|
266
|
-
*
|
|
267
|
-
*
|
|
268
|
-
* Caller must free the returned string
|
|
128
|
+
* True if line is an indented code block (4+ spaces or tab at start, not a list line).
|
|
129
|
+
* Used to skip definition list processing inside indented code blocks.
|
|
269
130
|
*/
|
|
270
|
-
static
|
|
271
|
-
if (
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
size_t refs_len = 0;
|
|
281
|
-
|
|
282
|
-
while (*p) {
|
|
283
|
-
const char *line_start = p;
|
|
284
|
-
const char *line_end = strchr(p, '\n');
|
|
285
|
-
if (!line_end) line_end = p + strlen(p);
|
|
286
|
-
|
|
287
|
-
/* Skip leading whitespace */
|
|
288
|
-
const char *content_start = line_start;
|
|
289
|
-
while (content_start < line_end && (*content_start == ' ' || *content_start == '\t')) {
|
|
290
|
-
content_start++;
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
/* Check if this is a reference link definition: [id]: URL */
|
|
294
|
-
if (content_start < line_end && *content_start == '[') {
|
|
295
|
-
const char *id_end = strchr(content_start + 1, ']');
|
|
296
|
-
if (id_end && id_end < line_end && id_end[1] == ':') {
|
|
297
|
-
/* This is a reference definition - extract the entire line */
|
|
298
|
-
size_t line_len = line_end - line_start;
|
|
299
|
-
if (line_end < p + strlen(p) && *line_end == '\n') {
|
|
300
|
-
line_len++; /* Include newline */
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
/* Check if we need to expand the buffer */
|
|
304
|
-
if (refs_len + line_len + 1 >= refs_capacity) {
|
|
305
|
-
refs_capacity = (refs_len + line_len + 1) * 2;
|
|
306
|
-
char *new_refs = realloc(refs, refs_capacity);
|
|
307
|
-
if (!new_refs) {
|
|
308
|
-
free(refs);
|
|
309
|
-
return NULL;
|
|
310
|
-
}
|
|
311
|
-
refs = new_refs;
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
/* Copy the line */
|
|
315
|
-
memcpy(refs + refs_len, line_start, line_len);
|
|
316
|
-
refs_len += line_len;
|
|
317
|
-
refs[refs_len] = '\0';
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
/* Move to next line */
|
|
322
|
-
p = line_end;
|
|
323
|
-
if (*p == '\n') p++;
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
if (refs_len == 0) {
|
|
327
|
-
free(refs);
|
|
328
|
-
return NULL;
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
return refs;
|
|
131
|
+
static bool line_is_indented_code_block(const char *line, size_t len) {
|
|
132
|
+
if (len == 0) return false;
|
|
133
|
+
if (line[0] == '\t') {
|
|
134
|
+
return len > 1 && !looks_like_list_marker(line + 1);
|
|
135
|
+
}
|
|
136
|
+
if (len < 4 || line[0] != ' ' || line[1] != ' ' || line[2] != ' ' || line[3] != ' ')
|
|
137
|
+
return false;
|
|
138
|
+
const char *content = line + 4;
|
|
139
|
+
while (content < line + len && *content == ' ') content++;
|
|
140
|
+
return (content < line + len) && !looks_like_list_marker(content);
|
|
332
141
|
}
|
|
333
142
|
|
|
334
143
|
/**
|
|
335
|
-
*
|
|
144
|
+
* Scans line for inline code backticks, updates state for next line, and returns
|
|
145
|
+
* whether sep_pos is inside an inline code span. Single backticks toggle; 3+ are
|
|
146
|
+
* fenced blocks (handled elsewhere). Used to skip definition processing inside
|
|
147
|
+
* inline code spans, including multi-line spans like `term::def\n :more:`.
|
|
336
148
|
*/
|
|
337
|
-
static
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
/* Check for 4+ leading spaces - these are table captions, not definition lists */
|
|
349
|
-
int leading_spaces = 0;
|
|
350
|
-
while (leading_spaces < len && leading_spaces < 10 && input[leading_spaces] == ' ') {
|
|
351
|
-
leading_spaces++;
|
|
352
|
-
}
|
|
353
|
-
if (leading_spaces >= 4) {
|
|
354
|
-
return NULL; /* Table caption, not definition list */
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
int def_indent;
|
|
358
|
-
if (!is_definition_line(input, len, &def_indent)) {
|
|
359
|
-
return NULL;
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
/* Check if the line contains an IAL (Inline Attribute List) like {#id .class} */
|
|
363
|
-
/* Lines with IALs are almost always table captions, not definition lists */
|
|
364
|
-
const unsigned char *p = input;
|
|
365
|
-
const unsigned char *end = input + len;
|
|
366
|
-
while (p < end) {
|
|
367
|
-
if (*p == '{') {
|
|
368
|
-
p++;
|
|
369
|
-
/* Check if it looks like an IAL: {# or {. or {: */
|
|
370
|
-
if (p < end && (*p == '#' || *p == '.' || *p == ':')) {
|
|
371
|
-
/* Look for closing } */
|
|
372
|
-
while (p < end && *p != '}') {
|
|
373
|
-
p++;
|
|
374
|
-
}
|
|
375
|
-
if (p < end && *p == '}') {
|
|
376
|
-
return NULL; /* This is a table caption, not a definition list */
|
|
377
|
-
}
|
|
378
|
-
}
|
|
149
|
+
static bool scan_inline_code_for_sep(const char *line, size_t len, int sep_pos,
|
|
150
|
+
bool in_span_at_start, bool *out_in_span_at_end) {
|
|
151
|
+
bool in = in_span_at_start;
|
|
152
|
+
bool sep_inside = false;
|
|
153
|
+
for (size_t i = 0; i < len; i++) {
|
|
154
|
+
if ((int)i == sep_pos) sep_inside = in;
|
|
155
|
+
if (line[i] == '`') {
|
|
156
|
+
int count = 1;
|
|
157
|
+
while (i + (size_t)count < len && line[i + count] == '`') count++;
|
|
158
|
+
if (count == 1) in = !in;
|
|
159
|
+
i += (size_t)(count - 1);
|
|
379
160
|
}
|
|
380
|
-
p++;
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
/* Safety check: parent_container must be valid */
|
|
384
|
-
if (!parent_container) {
|
|
385
|
-
return NULL;
|
|
386
161
|
}
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
cmark_node_type parent_type = cmark_node_get_type(parent_container);
|
|
390
|
-
|
|
391
|
-
/* Check if previous block was a paragraph (term) */
|
|
392
|
-
/* Only try to get last child for node types that support children */
|
|
393
|
-
cmark_node *prev = NULL;
|
|
394
|
-
if (parent_type == CMARK_NODE_DOCUMENT ||
|
|
395
|
-
parent_type == CMARK_NODE_BLOCK_QUOTE ||
|
|
396
|
-
parent_type == CMARK_NODE_LIST ||
|
|
397
|
-
parent_type == CMARK_NODE_ITEM ||
|
|
398
|
-
parent_type == APEX_NODE_DEFINITION_LIST ||
|
|
399
|
-
parent_type == APEX_NODE_DEFINITION_DATA) {
|
|
400
|
-
prev = cmark_node_last_child(parent_container);
|
|
401
|
-
} else {
|
|
402
|
-
/* For other node types, don't try to get last child */
|
|
403
|
-
return NULL;
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
if (!prev) {
|
|
407
|
-
return NULL;
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
cmark_node_type prev_type = cmark_node_get_type(prev);
|
|
411
|
-
if (prev_type != CMARK_NODE_PARAGRAPH) {
|
|
412
|
-
return NULL;
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
/* Additional safety: verify prev is still valid and attached to parent */
|
|
416
|
-
cmark_node *prev_parent = cmark_node_parent(prev);
|
|
417
|
-
if (prev_parent != parent_container) {
|
|
418
|
-
return NULL;
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
/* Create definition list container */
|
|
422
|
-
cmark_node *def_list = cmark_node_new_with_mem(APEX_NODE_DEFINITION_LIST, parser->mem);
|
|
423
|
-
if (!def_list) return NULL;
|
|
424
|
-
|
|
425
|
-
/* Convert previous paragraph to term */
|
|
426
|
-
cmark_node *term = cmark_node_new_with_mem(APEX_NODE_DEFINITION_TERM, parser->mem);
|
|
427
|
-
if (!term) {
|
|
428
|
-
cmark_node_free(def_list);
|
|
429
|
-
return NULL;
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
/* Move paragraph children to term - but DON'T unlink prev itself */
|
|
433
|
-
/* Unlinking prev during parsing causes segfaults because the parser is still using it */
|
|
434
|
-
cmark_node *child;
|
|
435
|
-
while ((child = cmark_node_first_child(prev))) {
|
|
436
|
-
cmark_node_unlink(child);
|
|
437
|
-
cmark_node_append_child(term, child);
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
cmark_node_append_child(def_list, term);
|
|
441
|
-
return def_list;
|
|
162
|
+
*out_in_span_at_end = in;
|
|
163
|
+
return (sep_pos >= 0 && (size_t)sep_pos < len) ? sep_inside : false;
|
|
442
164
|
}
|
|
443
165
|
|
|
444
166
|
/**
|
|
445
|
-
*
|
|
167
|
+
* Check if we're inside a fenced code block (```) - don't process definition lists there
|
|
446
168
|
*/
|
|
447
|
-
static
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
(void)ext;
|
|
453
|
-
(void)parser;
|
|
454
|
-
if (cmark_node_get_type(container) != APEX_NODE_DEFINITION_LIST &&
|
|
455
|
-
cmark_node_get_type(container) != APEX_NODE_DEFINITION_DATA) {
|
|
456
|
-
return 0;
|
|
169
|
+
static bool is_code_fence_line(const char *line, size_t len) {
|
|
170
|
+
const char *p = line;
|
|
171
|
+
while (p < line + len && (*p == ' ' || *p == '\t')) p++;
|
|
172
|
+
if (p + 3 <= line + len && p[0] == '`' && p[1] == '`' && p[2] == '`') {
|
|
173
|
+
return true;
|
|
457
174
|
}
|
|
458
|
-
|
|
459
|
-
int def_indent;
|
|
460
|
-
if (is_definition_line(input, len, &def_indent)) {
|
|
461
|
-
return 1; /* This line continues the definition list */
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
/* Also continue if line is blank or indented (block content in definition) */
|
|
465
|
-
if (len == 0 || (len > 0 && (input[0] == ' ' || input[0] == '\t'))) {
|
|
466
|
-
if (cmark_node_get_type(container) == APEX_NODE_DEFINITION_DATA) {
|
|
467
|
-
return 1; /* Block content in definition */
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
|
|
471
|
-
return 0;
|
|
175
|
+
return false;
|
|
472
176
|
}
|
|
473
177
|
|
|
474
178
|
/**
|
|
475
|
-
*
|
|
179
|
+
* Render inline content (term or definition) with full document context so cmark
|
|
180
|
+
* can resolve reference links. Parses full_doc + "\n\n" + content so ref defs are
|
|
181
|
+
* available. Returns HTML of the last block (our content), stripping <p></p>.
|
|
182
|
+
* Caller must free the returned string.
|
|
476
183
|
*/
|
|
477
|
-
static
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
(
|
|
481
|
-
if (
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
184
|
+
static char *render_inline_with_doc(const char *content, size_t content_len,
|
|
185
|
+
const char *full_doc, size_t full_doc_len, bool unsafe) {
|
|
186
|
+
size_t buf_len = full_doc_len + 2 + content_len + 1;
|
|
187
|
+
char *buf = malloc(buf_len);
|
|
188
|
+
if (!buf) return NULL;
|
|
189
|
+
memcpy(buf, full_doc, full_doc_len);
|
|
190
|
+
buf[full_doc_len] = '\n';
|
|
191
|
+
buf[full_doc_len + 1] = '\n';
|
|
192
|
+
memcpy(buf + full_doc_len + 2, content, content_len);
|
|
193
|
+
buf[buf_len - 1] = '\0';
|
|
194
|
+
|
|
195
|
+
int opts = CMARK_OPT_DEFAULT | CMARK_OPT_SMART;
|
|
196
|
+
if (unsafe) opts |= CMARK_OPT_UNSAFE | CMARK_OPT_LIBERAL_HTML_TAG;
|
|
197
|
+
cmark_parser *cp = cmark_parser_new(opts);
|
|
198
|
+
if (!cp) { free(buf); return NULL; }
|
|
199
|
+
cmark_parser_feed(cp, buf, (int)(buf_len - 1));
|
|
200
|
+
free(buf);
|
|
201
|
+
cmark_node *doc = cmark_parser_finish(cp);
|
|
202
|
+
cmark_parser_free(cp);
|
|
203
|
+
if (!doc) return NULL;
|
|
204
|
+
|
|
205
|
+
cmark_node *last = cmark_node_last_child(doc);
|
|
206
|
+
if (!last) {
|
|
207
|
+
cmark_node_free(doc);
|
|
208
|
+
return NULL;
|
|
489
209
|
}
|
|
490
|
-
|
|
210
|
+
char *html = cmark_render_html(last, opts, NULL);
|
|
211
|
+
cmark_node_free(doc);
|
|
212
|
+
if (!html) return NULL;
|
|
213
|
+
|
|
214
|
+
/* Strip <p> and </p> wrapper, return inner content */
|
|
215
|
+
char *content_start = html;
|
|
216
|
+
if (strncmp(html, "<p>", 3) == 0) content_start = html + 3;
|
|
217
|
+
size_t html_len = strlen(content_start);
|
|
218
|
+
if (html_len > 5 && strcmp(content_start + html_len - 5, "</p>\n") == 0)
|
|
219
|
+
html_len -= 5;
|
|
220
|
+
else if (html_len > 4 && strcmp(content_start + html_len - 4, "</p>") == 0)
|
|
221
|
+
html_len -= 4;
|
|
222
|
+
char *result = malloc(html_len + 1);
|
|
223
|
+
if (result) {
|
|
224
|
+
memcpy(result, content_start, html_len);
|
|
225
|
+
result[html_len] = '\0';
|
|
226
|
+
}
|
|
227
|
+
free(html);
|
|
228
|
+
return result;
|
|
491
229
|
}
|
|
492
230
|
|
|
493
231
|
/**
|
|
494
|
-
* Process definition lists
|
|
495
|
-
*
|
|
232
|
+
* Process one-line definition lists: Term :: Definition -> <dl><dt>Term</dt><dd>Definition</dd></dl>
|
|
233
|
+
* Returns newly allocated string with HTML, or NULL if no changes (caller uses original).
|
|
496
234
|
*/
|
|
497
235
|
char *apex_process_definition_lists(const char *text, bool unsafe) {
|
|
498
236
|
if (!text) return NULL;
|
|
499
237
|
|
|
500
238
|
size_t text_len = strlen(text);
|
|
501
239
|
|
|
502
|
-
/* Quick scan: check
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
while (check < end && *check == '>') {
|
|
522
|
-
check++;
|
|
523
|
-
/* Skip optional space after > */
|
|
524
|
-
if (check < end && (*check == ' ' || *check == '\t')) {
|
|
525
|
-
check++;
|
|
526
|
-
}
|
|
527
|
-
}
|
|
528
|
-
|
|
529
|
-
/* Check if this line starts with : followed by space/tab */
|
|
530
|
-
if (check < end && *check == ':' && (check + 1) < end && (check[1] == ' ' || check[1] == '\t')) {
|
|
531
|
-
has_def_list_pattern = true;
|
|
240
|
+
/* Quick scan: check for :: or : at line start (skip reference defs [id]: url) */
|
|
241
|
+
bool has_pattern = false;
|
|
242
|
+
const char *scan = text;
|
|
243
|
+
while (*scan) {
|
|
244
|
+
if (scan[0] == ':' && scan[1] == ':') {
|
|
245
|
+
/* Skip ::: or more (div/custom element fence) - only match exactly :: */
|
|
246
|
+
if (scan > text && scan[-1] == ':') { scan++; continue; }
|
|
247
|
+
if (scan[2] == ':') { scan++; continue; }
|
|
248
|
+
const char *line_start = scan;
|
|
249
|
+
while (line_start > text && line_start[-1] != '\n') line_start--;
|
|
250
|
+
const char *p = line_start;
|
|
251
|
+
while (p < scan && (*p == ' ' || *p == '\t')) p++;
|
|
252
|
+
if (p >= scan || *p != '[') { has_pattern = true; break; }
|
|
253
|
+
}
|
|
254
|
+
if ((scan == text || scan[-1] == '\n') && *scan) {
|
|
255
|
+
const char *p = scan;
|
|
256
|
+
while (*p == ' ' || *p == '\t') p++;
|
|
257
|
+
if (*p == ':' && (p[1] == ' ' || p[1] == '\t' || (p[1] == ':' && (p[2] == ' ' || p[2] == '\t')))) {
|
|
258
|
+
has_pattern = true;
|
|
532
259
|
break;
|
|
533
260
|
}
|
|
534
|
-
|
|
535
|
-
/* Move to next line */
|
|
536
|
-
while (p < end && *p != '\n') {
|
|
537
|
-
p++;
|
|
538
|
-
}
|
|
539
|
-
if (p < end) p++; /* Skip the newline */
|
|
540
|
-
} else {
|
|
541
|
-
p++;
|
|
542
261
|
}
|
|
262
|
+
scan++;
|
|
543
263
|
}
|
|
264
|
+
if (!has_pattern) return NULL;
|
|
544
265
|
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
return NULL;
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
size_t output_capacity = text_len * 3; /* Generous for HTML tags */
|
|
551
|
-
char *output = malloc(output_capacity + 1); /* +1 for null terminator */
|
|
266
|
+
size_t output_capacity = text_len * 3;
|
|
267
|
+
char *output = malloc(output_capacity + 1);
|
|
552
268
|
if (!output) return NULL;
|
|
553
269
|
|
|
554
|
-
/* Extract all reference link definitions from the document */
|
|
555
|
-
char *ref_definitions = extract_reference_definitions(text);
|
|
556
|
-
|
|
557
270
|
const char *read = text;
|
|
558
271
|
char *write = output;
|
|
559
|
-
/* Reserve 1 byte for null terminator, so we have output_capacity bytes to write */
|
|
560
272
|
size_t remaining = output_capacity;
|
|
561
273
|
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
free(output); \
|
|
575
|
-
free(ref_definitions); \
|
|
576
|
-
return NULL; \
|
|
577
|
-
} \
|
|
578
|
-
output = new_output; \
|
|
579
|
-
write = output + used; \
|
|
580
|
-
remaining = output_capacity - used; \
|
|
581
|
-
} \
|
|
582
|
-
} while(0)
|
|
274
|
+
#define ENSURE_SPACE(needed) do { \
|
|
275
|
+
if (remaining <= (needed)) { \
|
|
276
|
+
size_t used = write - output; \
|
|
277
|
+
size_t min_capacity = used + (needed) + 1; \
|
|
278
|
+
output_capacity = (min_capacity < 1024) ? 2048 : min_capacity * 2; \
|
|
279
|
+
char *new_output = realloc(output, output_capacity + 1); \
|
|
280
|
+
if (!new_output) { free(output); return NULL; } \
|
|
281
|
+
output = new_output; \
|
|
282
|
+
write = output + used; \
|
|
283
|
+
remaining = output_capacity - used; \
|
|
284
|
+
} \
|
|
285
|
+
} while(0)
|
|
583
286
|
|
|
584
287
|
bool in_def_list = false;
|
|
585
|
-
bool
|
|
586
|
-
|
|
288
|
+
bool in_code_block = false;
|
|
289
|
+
bool in_indented_code_block = false;
|
|
290
|
+
bool in_inline_code_span = false;
|
|
587
291
|
char term_buffer[4096];
|
|
588
292
|
int term_len = 0;
|
|
589
|
-
bool
|
|
590
|
-
|
|
591
|
-
bool found_any_def_list = false; /* Track if we actually created any definition lists */
|
|
592
|
-
bool in_code_block = false; /* Track if we're inside a fenced code block */
|
|
593
|
-
bool skipped_blank_after_term = false; /* Track if we skipped a blank line after a buffered term */
|
|
594
|
-
|
|
595
|
-
const char *prev_read_pos = NULL;
|
|
596
|
-
int iteration_count = 0;
|
|
597
|
-
const int MAX_ITERATIONS = 1000000; /* Safety limit */
|
|
293
|
+
bool dd_open = false; /* True when we output <dd> but not yet </dd> (for Kramdown continuation) */
|
|
294
|
+
bool prev_line_was_table_row = false;
|
|
598
295
|
|
|
599
296
|
while (*read) {
|
|
600
|
-
/* Safety: prevent infinite loops */
|
|
601
|
-
if (++iteration_count > MAX_ITERATIONS) {
|
|
602
|
-
/* Something is wrong - return original text to avoid hanging */
|
|
603
|
-
free(output);
|
|
604
|
-
free(ref_definitions);
|
|
605
|
-
return strdup(text);
|
|
606
|
-
}
|
|
607
|
-
|
|
608
|
-
/* Safety: if we haven't advanced, break to prevent infinite loop */
|
|
609
|
-
if (prev_read_pos == read) {
|
|
610
|
-
break;
|
|
611
|
-
}
|
|
612
|
-
prev_read_pos = read;
|
|
613
|
-
|
|
614
297
|
const char *line_start = read;
|
|
615
298
|
const char *line_end = strchr(read, '\n');
|
|
616
|
-
if (!line_end)
|
|
617
|
-
/* No newline found - we're at the last line */
|
|
618
|
-
/* Find the end by looking for null terminator */
|
|
619
|
-
line_end = read;
|
|
620
|
-
while (*line_end != '\0') line_end++;
|
|
621
|
-
/* If line_end == read, we're at the end - break */
|
|
622
|
-
if (line_end == read) {
|
|
623
|
-
break;
|
|
624
|
-
}
|
|
625
|
-
}
|
|
299
|
+
if (!line_end) line_end = read + strlen(read);
|
|
626
300
|
|
|
627
|
-
size_t line_length = line_end - line_start;
|
|
301
|
+
size_t line_length = (size_t)(line_end - line_start);
|
|
302
|
+
int sep = -1; /* One-line def separator pos; -1 = none (used for inline code state update) */
|
|
628
303
|
|
|
629
|
-
/*
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
while (after_fence < line_end && (*after_fence == ' ' || *after_fence == '\t')) after_fence++;
|
|
640
|
-
is_closing_fence = (after_fence >= line_end || *after_fence == '\n' || *after_fence == '\r');
|
|
304
|
+
/* Track indented code blocks (4+ spaces or tab, not list continuation) */
|
|
305
|
+
if (read == text || read[-1] == '\n') {
|
|
306
|
+
bool this_line_indented = line_is_indented_code_block(line_start, line_length);
|
|
307
|
+
if (this_line_indented) {
|
|
308
|
+
in_indented_code_block = true;
|
|
309
|
+
} else {
|
|
310
|
+
/* Non-blank line without indent ends the block */
|
|
311
|
+
bool is_blank = (line_length == 0 || (line_length == 1 && (*line_start == '\r' || *line_start == '\n')));
|
|
312
|
+
if (!is_blank) in_indented_code_block = false;
|
|
313
|
+
}
|
|
641
314
|
}
|
|
642
315
|
|
|
643
|
-
/*
|
|
644
|
-
if (
|
|
645
|
-
|
|
646
|
-
if (
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
}
|
|
654
|
-
/* If it's an opening fence with language identifier inside a code block, treat as content (don't change state) */
|
|
655
|
-
} else {
|
|
656
|
-
in_code_block = true;
|
|
657
|
-
}
|
|
658
|
-
/* If we're entering a code block, clear any pending definition list state */
|
|
659
|
-
if (in_code_block && !was_in_code_block) {
|
|
660
|
-
if (in_def_list) {
|
|
661
|
-
/* Close any open definition list */
|
|
662
|
-
const char *dl_end = "</dl>\n";
|
|
663
|
-
size_t dl_end_len = strlen(dl_end);
|
|
664
|
-
ENSURE_SPACE(dl_end_len + 1);
|
|
665
|
-
memcpy(write, dl_end, dl_end_len);
|
|
666
|
-
write += dl_end_len;
|
|
667
|
-
remaining -= dl_end_len;
|
|
668
|
-
}
|
|
669
|
-
in_def_list = false;
|
|
670
|
-
term_len = 0;
|
|
671
|
-
term_has_blockquote = false;
|
|
672
|
-
term_blockquote_depth = 0;
|
|
673
|
-
skipped_blank_after_term = false;
|
|
674
|
-
}
|
|
675
|
-
/* If we're exiting a code block, clear any pending definition list state */
|
|
676
|
-
if (!in_code_block && was_in_code_block) {
|
|
677
|
-
in_def_list = false;
|
|
678
|
-
term_len = 0;
|
|
679
|
-
term_has_blockquote = false;
|
|
680
|
-
term_blockquote_depth = 0;
|
|
681
|
-
skipped_blank_after_term = false;
|
|
682
|
-
}
|
|
316
|
+
/* Track code blocks */
|
|
317
|
+
if (is_code_fence_line(line_start, line_length)) {
|
|
318
|
+
in_code_block = !in_code_block;
|
|
319
|
+
if (in_def_list && in_code_block) {
|
|
320
|
+
/* Close def list before code block */
|
|
321
|
+
ENSURE_SPACE(10);
|
|
322
|
+
memcpy(write, "</dl>\n", 6);
|
|
323
|
+
write += 6;
|
|
324
|
+
remaining -= 6;
|
|
325
|
+
in_def_list = false;
|
|
683
326
|
}
|
|
684
|
-
ENSURE_SPACE(line_length +
|
|
327
|
+
ENSURE_SPACE(line_length + 2);
|
|
685
328
|
memcpy(write, line_start, line_length);
|
|
686
329
|
write += line_length;
|
|
687
330
|
remaining -= line_length;
|
|
688
331
|
*write++ = '\n';
|
|
689
332
|
remaining--;
|
|
690
|
-
read = line_end;
|
|
691
|
-
if (*read == '\n') {
|
|
692
|
-
read++;
|
|
693
|
-
}
|
|
333
|
+
read = line_end + (line_end < text + text_len && *line_end == '\n' ? 1 : 0);
|
|
694
334
|
continue;
|
|
695
335
|
}
|
|
696
336
|
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
list_spaces++;
|
|
707
|
-
list_check++;
|
|
708
|
-
}
|
|
709
|
-
bool is_list_item = false;
|
|
710
|
-
if (list_check < line_end) {
|
|
711
|
-
if (*list_check == '-' || *list_check == '*' || *list_check == '+') {
|
|
712
|
-
/* Check if followed by space or tab */
|
|
713
|
-
if (list_check + 1 < line_end && (list_check[1] == ' ' || list_check[1] == '\t')) {
|
|
714
|
-
is_list_item = true;
|
|
715
|
-
}
|
|
716
|
-
} else if (*list_check >= '0' && *list_check <= '9') {
|
|
717
|
-
/* Check for numbered list (digit followed by . and space) */
|
|
718
|
-
const char *num_check = list_check;
|
|
719
|
-
while (num_check < line_end && *num_check >= '0' && *num_check <= '9') {
|
|
720
|
-
num_check++;
|
|
721
|
-
}
|
|
722
|
-
if (num_check < line_end && *num_check == '.' &&
|
|
723
|
-
num_check + 1 < line_end && (num_check[1] == ' ' || num_check[1] == '\t')) {
|
|
724
|
-
is_list_item = true;
|
|
725
|
-
}
|
|
726
|
-
}
|
|
337
|
+
if (in_code_block) {
|
|
338
|
+
ENSURE_SPACE(line_length + 2);
|
|
339
|
+
memcpy(write, line_start, line_length);
|
|
340
|
+
write += line_length;
|
|
341
|
+
remaining -= line_length;
|
|
342
|
+
*write++ = '\n';
|
|
343
|
+
remaining--;
|
|
344
|
+
read = line_end + (line_end < text + text_len && *line_end == '\n' ? 1 : 0);
|
|
345
|
+
continue;
|
|
727
346
|
}
|
|
728
347
|
|
|
729
|
-
/*
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
348
|
+
/* Skip definition processing inside indented code blocks */
|
|
349
|
+
if (in_indented_code_block) {
|
|
350
|
+
ENSURE_SPACE(line_length + 2);
|
|
351
|
+
memcpy(write, line_start, line_length);
|
|
352
|
+
write += line_length;
|
|
353
|
+
remaining -= line_length;
|
|
354
|
+
*write++ = '\n';
|
|
355
|
+
remaining--;
|
|
356
|
+
read = line_end + (line_end < text + text_len && *line_end == '\n' ? 1 : 0);
|
|
357
|
+
continue;
|
|
736
358
|
}
|
|
737
359
|
|
|
738
|
-
/* Check for
|
|
739
|
-
bool
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
p
|
|
745
|
-
|
|
746
|
-
if (p < line_end && (*p == ' ' || *p == '\t')) {
|
|
747
|
-
p++;
|
|
360
|
+
/* Check for Kramdown-style definition: : Definition (requires buffered term) */
|
|
361
|
+
bool is_kramdown_def = !in_code_block && !in_inline_code_span && is_kramdown_def_line(line_start, line_length);
|
|
362
|
+
if (is_kramdown_def) {
|
|
363
|
+
/* Skip reference link definitions [id]: url */
|
|
364
|
+
const char *p = line_start;
|
|
365
|
+
while (p < line_end && (*p == ' ' || *p == '\t')) p++;
|
|
366
|
+
if (p < line_end && *p == '[') {
|
|
367
|
+
is_kramdown_def = false; /* Reference def, not a definition line */
|
|
748
368
|
}
|
|
749
369
|
}
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
/* Definition lines must start with : (after whitespace/blockquote), not contain : */
|
|
754
|
-
/* Also skip if we're inside a code block (shouldn't happen due to early continue, but be safe) */
|
|
755
|
-
if (!in_code_block && !is_table_row && !is_list_item && p < line_end && *p == ':' && (p + 1) < line_end &&
|
|
756
|
-
(p[1] == ' ' || p[1] == '\t')) {
|
|
757
|
-
/* Double-check: make sure : is at the start of the line content (after whitespace/blockquote) */
|
|
758
|
-
/* p already points after whitespace and blockquote, so if *p == ':', it's a definition line */
|
|
759
|
-
is_def_line = true;
|
|
760
|
-
|
|
761
|
-
/* Check if this : Caption line is followed by a table */
|
|
762
|
-
/* If so, skip processing it as a definition list - let table caption detection handle it */
|
|
763
|
-
/* Calculate end of text buffer safely - use original text parameter */
|
|
764
|
-
const char *text_end = text + text_len; /* End of entire text buffer */
|
|
765
|
-
const char *next_line_start = line_end;
|
|
766
|
-
if (next_line_start < text_end && *next_line_start == '\n') {
|
|
767
|
-
next_line_start++; /* Skip the newline */
|
|
768
|
-
}
|
|
769
|
-
|
|
770
|
-
/* Skip blank lines to find the next non-blank line */
|
|
771
|
-
/* Safety: limit look-ahead to prevent infinite loops or buffer overruns */
|
|
772
|
-
int look_ahead_count = 0;
|
|
773
|
-
const int MAX_LOOK_AHEAD = 100;
|
|
774
|
-
bool found_table = false;
|
|
775
|
-
while (next_line_start < text_end && *next_line_start != '\0' && look_ahead_count < MAX_LOOK_AHEAD) {
|
|
776
|
-
look_ahead_count++;
|
|
777
|
-
const char *check_line = next_line_start;
|
|
778
|
-
|
|
779
|
-
/* Find end of line - only search within remaining buffer */
|
|
780
|
-
const char *check_line_end = NULL;
|
|
781
|
-
if (check_line < text_end) {
|
|
782
|
-
/* Search for newline only within remaining buffer */
|
|
783
|
-
const char *search_start = check_line;
|
|
784
|
-
while (search_start < text_end && *search_start != '\n' && *search_start != '\0') {
|
|
785
|
-
search_start++;
|
|
786
|
-
}
|
|
787
|
-
if (search_start < text_end && *search_start == '\n') {
|
|
788
|
-
check_line_end = search_start;
|
|
789
|
-
} else {
|
|
790
|
-
/* No newline found - this is the last line */
|
|
791
|
-
check_line_end = text_end;
|
|
792
|
-
}
|
|
793
|
-
} else {
|
|
794
|
-
/* Already past end */
|
|
795
|
-
check_line_end = text_end;
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
/* Skip whitespace on this line - ensure we don't go past check_line_end */
|
|
799
|
-
while (check_line < check_line_end && check_line < text_end &&
|
|
800
|
-
(*check_line == ' ' || *check_line == '\t')) {
|
|
801
|
-
check_line++;
|
|
802
|
-
}
|
|
803
|
-
|
|
804
|
-
/* If line is empty (just whitespace), continue to next line */
|
|
805
|
-
if (check_line >= check_line_end || check_line >= text_end ||
|
|
806
|
-
*check_line == '\r' || *check_line == '\0') {
|
|
807
|
-
next_line_start = check_line_end;
|
|
808
|
-
if (next_line_start < text_end && *next_line_start == '\n') {
|
|
809
|
-
next_line_start++;
|
|
810
|
-
}
|
|
811
|
-
continue;
|
|
812
|
-
}
|
|
813
|
-
|
|
814
|
-
/* Check if this line starts with | (table row) - ensure we're within bounds */
|
|
815
|
-
if (check_line < text_end && *check_line == '|') {
|
|
816
|
-
/* This : Caption is followed by a table - treat it as a table caption */
|
|
817
|
-
/* Add 4 spaces to prevent definition list processing */
|
|
818
|
-
is_def_line = false;
|
|
819
|
-
found_table = true;
|
|
820
|
-
/* Output line with 4 spaces added at the very beginning to prevent definition list matching */
|
|
821
|
-
/* Calculate how many spaces we already have at the start */
|
|
822
|
-
int existing_spaces = spaces;
|
|
823
|
-
/* We need 4 total spaces at the start, so add (4 - existing_spaces) more */
|
|
824
|
-
int spaces_to_add = 4 - existing_spaces;
|
|
825
|
-
if (spaces_to_add < 0) spaces_to_add = 0;
|
|
826
|
-
|
|
827
|
-
ENSURE_SPACE(spaces_to_add + line_length + 1);
|
|
828
|
-
/* Add extra spaces at the very beginning */
|
|
829
|
-
for (int i = 0; i < spaces_to_add; i++) {
|
|
830
|
-
*write++ = ' ';
|
|
831
|
-
remaining--;
|
|
832
|
-
}
|
|
833
|
-
/* Copy the entire original line */
|
|
834
|
-
memcpy(write, line_start, line_length);
|
|
835
|
-
write += line_length;
|
|
836
|
-
remaining -= line_length;
|
|
837
|
-
*write++ = '\n';
|
|
838
|
-
remaining--;
|
|
839
|
-
read = line_end;
|
|
840
|
-
if (*read == '\n') {
|
|
841
|
-
read++;
|
|
842
|
-
}
|
|
843
|
-
/* Break out of look-ahead loop since we found the table */
|
|
844
|
-
break;
|
|
845
|
-
}
|
|
846
|
-
|
|
847
|
-
/* Found a non-blank line, stop looking */
|
|
848
|
-
break;
|
|
849
|
-
}
|
|
850
|
-
|
|
851
|
-
/* If we found a table, skip the rest of the line processing */
|
|
852
|
-
if (found_table) {
|
|
853
|
-
continue; /* Skip to next iteration of main while loop */
|
|
854
|
-
}
|
|
855
|
-
} else if (in_code_block && p < line_end && *p == ':') {
|
|
856
|
-
/* Found ':' in code block, skip definition list processing */
|
|
370
|
+
if (is_kramdown_def && prev_line_was_table_row) {
|
|
371
|
+
/* : Caption after table is a table caption, not a definition */
|
|
372
|
+
is_kramdown_def = false;
|
|
857
373
|
}
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
if (
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
in_blockquote_context = has_blockquote_prefix || term_has_blockquote;
|
|
865
|
-
/* Use the maximum depth from current line or buffered term */
|
|
866
|
-
blockquote_depth = term_has_blockquote ? term_blockquote_depth : current_blockquote_depth;
|
|
867
|
-
if (has_blockquote_prefix && current_blockquote_depth > blockquote_depth) {
|
|
868
|
-
blockquote_depth = current_blockquote_depth;
|
|
869
|
-
}
|
|
870
|
-
|
|
871
|
-
/* Clear the skipped blank flag - we're using the term now, blank line is ignored */
|
|
872
|
-
skipped_blank_after_term = false;
|
|
873
|
-
|
|
874
|
-
/* Start new definition list */
|
|
875
|
-
const char *dl_start = "<dl>\n";
|
|
876
|
-
size_t dl_len = strlen(dl_start);
|
|
877
|
-
if (in_blockquote_context) {
|
|
878
|
-
/* Add > prefix(es) at start of line for blockquote context */
|
|
879
|
-
size_t prefix_needed = blockquote_depth * 2;
|
|
880
|
-
/* Need prefix_needed + 1 for null terminator */
|
|
881
|
-
ENSURE_SPACE(prefix_needed + 1);
|
|
882
|
-
for (int i = 0; i < blockquote_depth && remaining > 2; i++) {
|
|
883
|
-
*write++ = '>';
|
|
884
|
-
*write++ = ' ';
|
|
885
|
-
remaining -= 2;
|
|
886
|
-
}
|
|
887
|
-
}
|
|
888
|
-
/* Need dl_len + 1 for null terminator */
|
|
889
|
-
ENSURE_SPACE(dl_len + 1);
|
|
890
|
-
memcpy(write, dl_start, dl_len);
|
|
891
|
-
write += dl_len;
|
|
892
|
-
remaining -= dl_len;
|
|
893
|
-
|
|
894
|
-
/* Write term from buffer */
|
|
895
|
-
if (term_len > 0) {
|
|
896
|
-
/* Strip blockquote prefix from term if present */
|
|
897
|
-
const char *term_content = term_buffer;
|
|
898
|
-
int term_content_len = term_len;
|
|
899
|
-
if (term_has_blockquote) {
|
|
900
|
-
/* Skip > and optional space */
|
|
901
|
-
term_content = term_buffer;
|
|
902
|
-
while (term_content < term_buffer + term_len &&
|
|
903
|
-
(*term_content == '>' || *term_content == ' ' || *term_content == '\t')) {
|
|
904
|
-
term_content++;
|
|
905
|
-
term_content_len--;
|
|
906
|
-
}
|
|
907
|
-
}
|
|
908
|
-
|
|
909
|
-
if (in_blockquote_context) {
|
|
910
|
-
/* Add > prefix(es) at start of line for blockquote context */
|
|
911
|
-
size_t prefix_needed = blockquote_depth * 2;
|
|
912
|
-
/* Need prefix_needed + 1 for null terminator */
|
|
913
|
-
ENSURE_SPACE(prefix_needed + 1);
|
|
914
|
-
for (int i = 0; i < blockquote_depth && remaining > 2; i++) {
|
|
915
|
-
*write++ = '>';
|
|
916
|
-
*write++ = ' ';
|
|
917
|
-
remaining -= 2;
|
|
918
|
-
}
|
|
919
|
-
}
|
|
920
|
-
|
|
921
|
-
const char *dt_start = "<dt>";
|
|
922
|
-
size_t dt_start_len = strlen(dt_start);
|
|
923
|
-
/* Need dt_start_len + 1 for null terminator */
|
|
924
|
-
ENSURE_SPACE(dt_start_len + 1);
|
|
925
|
-
memcpy(write, dt_start, dt_start_len);
|
|
926
|
-
write += dt_start_len;
|
|
927
|
-
remaining -= dt_start_len;
|
|
928
|
-
|
|
929
|
-
/* Parse term text as inline Markdown */
|
|
930
|
-
char *term_html = NULL;
|
|
931
|
-
if (term_content_len > 0) {
|
|
932
|
-
/* Quick check: does this text contain any markdown syntax? */
|
|
933
|
-
bool has_markdown = false;
|
|
934
|
-
const char *p = term_content;
|
|
935
|
-
const char *end = term_content + term_content_len;
|
|
936
|
-
while (p < end) {
|
|
937
|
-
char c = *p++;
|
|
938
|
-
/* Check for common markdown patterns */
|
|
939
|
-
if (c == '*' || c == '_' || c == '[' || c == ']' || c == '!' ||
|
|
940
|
-
c == '`' || c == '\\' || (c == '<' && p < end && (*p == '!' || isalnum((unsigned char)*p)))) {
|
|
941
|
-
has_markdown = true;
|
|
942
|
-
break;
|
|
943
|
-
}
|
|
944
|
-
}
|
|
945
|
-
|
|
946
|
-
if (!has_markdown) {
|
|
947
|
-
/* Plain text - just HTML escape */
|
|
948
|
-
size_t escaped_len = 0;
|
|
949
|
-
for (const char *p = term_content; p < term_content + term_content_len; p++) {
|
|
950
|
-
if (*p == '&') escaped_len += 5; /* & */
|
|
951
|
-
else if (*p == '<' || *p == '>') escaped_len += 4; /* < > */
|
|
952
|
-
else if (*p == '"') escaped_len += 6; /* " */
|
|
953
|
-
else escaped_len += 1;
|
|
954
|
-
}
|
|
955
|
-
term_html = malloc(escaped_len + 1);
|
|
956
|
-
if (term_html) {
|
|
957
|
-
char *out = term_html;
|
|
958
|
-
for (const char *p = term_content; p < term_content + term_content_len; p++) {
|
|
959
|
-
if (*p == '&') { memcpy(out, "&", 5); out += 5; }
|
|
960
|
-
else if (*p == '<') { memcpy(out, "<", 4); out += 4; }
|
|
961
|
-
else if (*p == '>') { memcpy(out, ">", 4); out += 4; }
|
|
962
|
-
else if (*p == '"') { memcpy(out, """, 6); out += 6; }
|
|
963
|
-
else *out++ = *p;
|
|
964
|
-
}
|
|
965
|
-
*out = '\0';
|
|
966
|
-
}
|
|
967
|
-
} else {
|
|
968
|
-
/* Note: We don't prepend reference definitions here because:
|
|
969
|
-
* 1. It causes cmark to hang on large files with many references
|
|
970
|
-
* 2. Reference definitions are already available in the main document context
|
|
971
|
-
* 3. Inline parsing should work with references from the main document
|
|
972
|
-
*/
|
|
973
|
-
char *term_text = malloc(term_content_len + 1);
|
|
974
|
-
if (term_text) {
|
|
975
|
-
memcpy(term_text, term_content, term_content_len);
|
|
976
|
-
term_text[term_content_len] = '\0';
|
|
977
|
-
|
|
978
|
-
/* Parse as Markdown and render to HTML */
|
|
979
|
-
int parser_opts = CMARK_OPT_DEFAULT | CMARK_OPT_SMART; /* Enable smart typography */
|
|
980
|
-
int render_opts = CMARK_OPT_DEFAULT;
|
|
981
|
-
if (unsafe) {
|
|
982
|
-
parser_opts |= CMARK_OPT_UNSAFE;
|
|
983
|
-
parser_opts |= CMARK_OPT_LIBERAL_HTML_TAG; /* Be liberal in interpreting inline HTML tags */
|
|
984
|
-
render_opts |= CMARK_OPT_UNSAFE;
|
|
985
|
-
}
|
|
986
|
-
|
|
987
|
-
/* Extract only the reference definitions actually used in this term */
|
|
988
|
-
size_t final_term_len = term_content_len;
|
|
989
|
-
char *final_term_text = term_text;
|
|
990
|
-
if (ref_definitions) {
|
|
991
|
-
size_t id_count = 0;
|
|
992
|
-
char **needed_ids = extract_reference_link_ids(term_text, &id_count);
|
|
993
|
-
if (needed_ids && id_count > 0) {
|
|
994
|
-
char *selected_refs = extract_specific_reference_definitions(ref_definitions, needed_ids);
|
|
995
|
-
if (selected_refs) {
|
|
996
|
-
size_t ref_len = strlen(selected_refs);
|
|
997
|
-
size_t new_size = ref_len + term_content_len + 2; /* +2 for newline and null */
|
|
998
|
-
char *new_term_text = malloc(new_size);
|
|
999
|
-
if (new_term_text) {
|
|
1000
|
-
size_t offset = 0;
|
|
1001
|
-
memcpy(new_term_text, selected_refs, ref_len);
|
|
1002
|
-
offset = ref_len;
|
|
1003
|
-
if (ref_len > 0 && selected_refs[ref_len - 1] != '\n') {
|
|
1004
|
-
new_term_text[offset++] = '\n';
|
|
1005
|
-
}
|
|
1006
|
-
memcpy(new_term_text + offset, term_text, term_content_len);
|
|
1007
|
-
new_term_text[offset + term_content_len] = '\0';
|
|
1008
|
-
free(term_text);
|
|
1009
|
-
final_term_text = new_term_text;
|
|
1010
|
-
final_term_len = offset + term_content_len;
|
|
1011
|
-
}
|
|
1012
|
-
free(selected_refs);
|
|
1013
|
-
}
|
|
1014
|
-
/* Free the IDs array */
|
|
1015
|
-
for (size_t i = 0; i < id_count; i++) {
|
|
1016
|
-
free(needed_ids[i]);
|
|
1017
|
-
}
|
|
1018
|
-
free(needed_ids);
|
|
1019
|
-
}
|
|
1020
|
-
}
|
|
1021
|
-
cmark_parser *temp_parser = cmark_parser_new(parser_opts);
|
|
1022
|
-
if (temp_parser) {
|
|
1023
|
-
cmark_parser_feed(temp_parser, final_term_text, final_term_len);
|
|
1024
|
-
cmark_node *doc = cmark_parser_finish(temp_parser);
|
|
1025
|
-
if (doc) {
|
|
1026
|
-
/* Render and extract just the content (strip <p> tags) */
|
|
1027
|
-
char *full_html = cmark_render_html(doc, render_opts, NULL);
|
|
1028
|
-
if (full_html) {
|
|
1029
|
-
/* Strip <p> and </p> tags if present */
|
|
1030
|
-
char *content_start = full_html;
|
|
1031
|
-
if (strncmp(content_start, "<p>", 3) == 0) {
|
|
1032
|
-
content_start += 3;
|
|
1033
|
-
}
|
|
1034
|
-
char *content_end = content_start + strlen(content_start);
|
|
1035
|
-
if (content_end > content_start + 4 &&
|
|
1036
|
-
strcmp(content_end - 5, "</p>\n") == 0) {
|
|
1037
|
-
content_end -= 5;
|
|
1038
|
-
*content_end = '\0';
|
|
1039
|
-
}
|
|
1040
|
-
term_html = strdup(content_start);
|
|
1041
|
-
free(full_html);
|
|
1042
|
-
}
|
|
1043
|
-
cmark_node_free(doc);
|
|
1044
|
-
}
|
|
1045
|
-
cmark_parser_free(temp_parser);
|
|
1046
|
-
}
|
|
1047
|
-
free(final_term_text);
|
|
1048
|
-
}
|
|
1049
|
-
}
|
|
1050
|
-
}
|
|
1051
|
-
|
|
1052
|
-
/* Write processed HTML or original text */
|
|
1053
|
-
if (term_html) {
|
|
1054
|
-
size_t html_len = strlen(term_html);
|
|
1055
|
-
/* Need html_len + 1 for null terminator */
|
|
1056
|
-
ENSURE_SPACE(html_len + 1);
|
|
1057
|
-
memcpy(write, term_html, html_len);
|
|
1058
|
-
write += html_len;
|
|
1059
|
-
remaining -= html_len;
|
|
1060
|
-
free(term_html);
|
|
1061
|
-
} else {
|
|
1062
|
-
/* Need term_content_len + 1 for null terminator */
|
|
1063
|
-
ENSURE_SPACE((size_t)term_content_len + 1);
|
|
1064
|
-
memcpy(write, term_content, term_content_len);
|
|
1065
|
-
write += term_content_len;
|
|
1066
|
-
remaining -= (size_t)term_content_len;
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
const char *dt_end = "</dt>\n";
|
|
1070
|
-
size_t dt_end_len = strlen(dt_end);
|
|
1071
|
-
/* Need dt_end_len + 1 for null terminator */
|
|
1072
|
-
ENSURE_SPACE(dt_end_len + 1);
|
|
1073
|
-
memcpy(write, dt_end, dt_end_len);
|
|
1074
|
-
write += dt_end_len;
|
|
1075
|
-
remaining -= dt_end_len;
|
|
1076
|
-
|
|
1077
|
-
term_len = 0;
|
|
1078
|
-
term_has_blockquote = false;
|
|
1079
|
-
skipped_blank_after_term = false; /* Clear flag - we used the term */
|
|
1080
|
-
}
|
|
1081
|
-
|
|
1082
|
-
in_def_list = true;
|
|
1083
|
-
}
|
|
1084
|
-
|
|
1085
|
-
/* Write definition */
|
|
1086
|
-
if (in_blockquote_context) {
|
|
1087
|
-
/* Add > prefix(es) at start of line for blockquote context */
|
|
1088
|
-
size_t prefix_needed = blockquote_depth * 2;
|
|
1089
|
-
/* Need prefix_needed + 1 for null terminator */
|
|
1090
|
-
ENSURE_SPACE(prefix_needed + 1);
|
|
1091
|
-
for (int i = 0; i < blockquote_depth && remaining > 2; i++) {
|
|
1092
|
-
*write++ = '>';
|
|
1093
|
-
*write++ = ' ';
|
|
1094
|
-
remaining -= 2;
|
|
1095
|
-
}
|
|
1096
|
-
}
|
|
1097
|
-
|
|
1098
|
-
const char *dd_start = "<dd>";
|
|
1099
|
-
size_t dd_start_len = strlen(dd_start);
|
|
1100
|
-
/* Need dd_start_len + 1 for null terminator */
|
|
1101
|
-
ENSURE_SPACE(dd_start_len + 1);
|
|
1102
|
-
memcpy(write, dd_start, dd_start_len);
|
|
1103
|
-
write += dd_start_len;
|
|
1104
|
-
remaining -= dd_start_len;
|
|
1105
|
-
|
|
1106
|
-
/* Extract definition text (after : and space) */
|
|
1107
|
-
p++; /* Skip : */
|
|
1108
|
-
while (p < line_end && (*p == ' ' || *p == '\t')) p++;
|
|
1109
|
-
|
|
1110
|
-
size_t def_text_len = line_end - p;
|
|
1111
|
-
|
|
1112
|
-
/* Parse definition text as inline Markdown */
|
|
1113
|
-
char *def_html = NULL;
|
|
1114
|
-
if (def_text_len > 0) {
|
|
1115
|
-
/* Quick check: does this text contain any markdown syntax? */
|
|
1116
|
-
bool has_markdown = false;
|
|
1117
|
-
const char *check_p = p;
|
|
1118
|
-
const char *check_end = p + def_text_len;
|
|
1119
|
-
while (check_p < check_end) {
|
|
1120
|
-
char c = *check_p++;
|
|
1121
|
-
/* Check for common markdown patterns */
|
|
1122
|
-
if (c == '*' || c == '_' || c == '[' || c == ']' || c == '!' ||
|
|
1123
|
-
c == '`' || c == '\\' || (c == '<' && check_p < check_end && (*check_p == '!' || isalnum((unsigned char)*check_p)))) {
|
|
1124
|
-
has_markdown = true;
|
|
1125
|
-
break;
|
|
1126
|
-
}
|
|
1127
|
-
}
|
|
1128
|
-
|
|
1129
|
-
if (!has_markdown) {
|
|
1130
|
-
/* Plain text - just HTML escape */
|
|
1131
|
-
size_t escaped_len = 0;
|
|
1132
|
-
for (const char *esc_p = p; esc_p < p + def_text_len; esc_p++) {
|
|
1133
|
-
if (*esc_p == '&') escaped_len += 5; /* & */
|
|
1134
|
-
else if (*esc_p == '<' || *esc_p == '>') escaped_len += 4; /* < > */
|
|
1135
|
-
else if (*esc_p == '"') escaped_len += 6; /* " */
|
|
1136
|
-
else escaped_len += 1;
|
|
1137
|
-
}
|
|
1138
|
-
def_html = malloc(escaped_len + 1);
|
|
1139
|
-
if (def_html) {
|
|
1140
|
-
char *out = def_html;
|
|
1141
|
-
for (const char *esc_p = p; esc_p < p + def_text_len; esc_p++) {
|
|
1142
|
-
if (*esc_p == '&') { memcpy(out, "&", 5); out += 5; }
|
|
1143
|
-
else if (*esc_p == '<') { memcpy(out, "<", 4); out += 4; }
|
|
1144
|
-
else if (*esc_p == '>') { memcpy(out, ">", 4); out += 4; }
|
|
1145
|
-
else if (*esc_p == '"') { memcpy(out, """, 6); out += 6; }
|
|
1146
|
-
else *out++ = *esc_p;
|
|
1147
|
-
}
|
|
1148
|
-
*out = '\0';
|
|
1149
|
-
}
|
|
1150
|
-
} else {
|
|
1151
|
-
char *def_text = malloc(def_text_len + 1);
|
|
1152
|
-
if (def_text) {
|
|
1153
|
-
memcpy(def_text, p, def_text_len);
|
|
1154
|
-
def_text[def_text_len] = '\0';
|
|
1155
|
-
|
|
1156
|
-
/* Parse as Markdown and render to HTML */
|
|
1157
|
-
int parser_opts = CMARK_OPT_DEFAULT | CMARK_OPT_SMART; /* Enable smart typography */
|
|
1158
|
-
int render_opts = CMARK_OPT_DEFAULT;
|
|
1159
|
-
if (unsafe) {
|
|
1160
|
-
parser_opts |= CMARK_OPT_UNSAFE;
|
|
1161
|
-
parser_opts |= CMARK_OPT_LIBERAL_HTML_TAG; /* Be liberal in interpreting inline HTML tags */
|
|
1162
|
-
render_opts |= CMARK_OPT_UNSAFE;
|
|
1163
|
-
}
|
|
1164
|
-
|
|
1165
|
-
/* Extract only the reference definitions actually used in this definition */
|
|
1166
|
-
size_t final_def_len = def_text_len;
|
|
1167
|
-
char *final_def_text = def_text;
|
|
1168
|
-
if (ref_definitions) {
|
|
1169
|
-
size_t id_count = 0;
|
|
1170
|
-
char **needed_ids = extract_reference_link_ids(def_text, &id_count);
|
|
1171
|
-
if (needed_ids && id_count > 0) {
|
|
1172
|
-
char *selected_refs = extract_specific_reference_definitions(ref_definitions, needed_ids);
|
|
1173
|
-
if (selected_refs) {
|
|
1174
|
-
size_t ref_len = strlen(selected_refs);
|
|
1175
|
-
size_t new_size = ref_len + def_text_len + 2; /* +2 for newline and null */
|
|
1176
|
-
char *new_def_text = malloc(new_size);
|
|
1177
|
-
if (new_def_text) {
|
|
1178
|
-
size_t offset = 0;
|
|
1179
|
-
memcpy(new_def_text, selected_refs, ref_len);
|
|
1180
|
-
offset = ref_len;
|
|
1181
|
-
if (ref_len > 0 && selected_refs[ref_len - 1] != '\n') {
|
|
1182
|
-
new_def_text[offset++] = '\n';
|
|
1183
|
-
}
|
|
1184
|
-
memcpy(new_def_text + offset, def_text, def_text_len);
|
|
1185
|
-
new_def_text[offset + def_text_len] = '\0';
|
|
1186
|
-
free(def_text);
|
|
1187
|
-
final_def_text = new_def_text;
|
|
1188
|
-
final_def_len = offset + def_text_len;
|
|
1189
|
-
}
|
|
1190
|
-
free(selected_refs);
|
|
1191
|
-
}
|
|
1192
|
-
/* Free the IDs array */
|
|
1193
|
-
for (size_t i = 0; i < id_count; i++) {
|
|
1194
|
-
free(needed_ids[i]);
|
|
1195
|
-
}
|
|
1196
|
-
free(needed_ids);
|
|
1197
|
-
}
|
|
1198
|
-
}
|
|
1199
|
-
|
|
1200
|
-
cmark_parser *temp_parser = cmark_parser_new(parser_opts);
|
|
1201
|
-
if (temp_parser) {
|
|
1202
|
-
cmark_parser_feed(temp_parser, final_def_text, final_def_len);
|
|
1203
|
-
cmark_node *doc = cmark_parser_finish(temp_parser);
|
|
1204
|
-
if (doc) {
|
|
1205
|
-
/* Render and extract just the content (strip <p> tags) */
|
|
1206
|
-
char *full_html = cmark_render_html(doc, render_opts, NULL);
|
|
1207
|
-
if (full_html) {
|
|
1208
|
-
/* Strip <p> and </p> tags if present */
|
|
1209
|
-
char *content_start = full_html;
|
|
1210
|
-
if (strncmp(content_start, "<p>", 3) == 0) {
|
|
1211
|
-
content_start += 3;
|
|
1212
|
-
}
|
|
1213
|
-
char *content_end = content_start + strlen(content_start);
|
|
1214
|
-
if (content_end > content_start + 4 &&
|
|
1215
|
-
strcmp(content_end - 5, "</p>\n") == 0) {
|
|
1216
|
-
content_end -= 5;
|
|
1217
|
-
*content_end = '\0';
|
|
1218
|
-
}
|
|
1219
|
-
def_html = strdup(content_start);
|
|
1220
|
-
free(full_html);
|
|
1221
|
-
}
|
|
1222
|
-
cmark_node_free(doc);
|
|
1223
|
-
}
|
|
1224
|
-
cmark_parser_free(temp_parser);
|
|
1225
|
-
}
|
|
1226
|
-
free(final_def_text);
|
|
1227
|
-
}
|
|
1228
|
-
}
|
|
374
|
+
if (is_kramdown_def) {
|
|
375
|
+
const char *next_start = line_end;
|
|
376
|
+
if (next_start < text + text_len && *next_start == '\n') next_start++;
|
|
377
|
+
if (next_nonblank_line_is_table(next_start, text + text_len)) {
|
|
378
|
+
/* : Caption before table is a table caption, not a definition */
|
|
379
|
+
is_kramdown_def = false;
|
|
1229
380
|
}
|
|
381
|
+
}
|
|
1230
382
|
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
free(def_html);
|
|
1240
|
-
} else {
|
|
1241
|
-
/* Need def_text_len + 1 for null terminator */
|
|
1242
|
-
ENSURE_SPACE(def_text_len + 1);
|
|
1243
|
-
memcpy(write, p, def_text_len);
|
|
1244
|
-
write += def_text_len;
|
|
1245
|
-
remaining -= def_text_len;
|
|
383
|
+
if (is_kramdown_def) {
|
|
384
|
+
/* Extract definition text (after : or :: and space) */
|
|
385
|
+
const char *def_start = line_start;
|
|
386
|
+
while (def_start < line_end && (*def_start == ' ' || *def_start == '\t')) def_start++;
|
|
387
|
+
if (def_start < line_end && *def_start == ':') {
|
|
388
|
+
def_start++;
|
|
389
|
+
if (def_start < line_end && *def_start == ':') def_start++;
|
|
390
|
+
while (def_start < line_end && (*def_start == ' ' || *def_start == '\t')) def_start++;
|
|
1246
391
|
}
|
|
392
|
+
size_t def_len = (size_t)(line_end - def_start);
|
|
1247
393
|
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
ENSURE_SPACE(
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
394
|
+
if (term_len > 0) {
|
|
395
|
+
/* We have a buffered term - output <dl><dt>term</dt><dd>def</dd> */
|
|
396
|
+
if (!in_def_list) {
|
|
397
|
+
ENSURE_SPACE(20);
|
|
398
|
+
memcpy(write, "<dl>\n", 5);
|
|
399
|
+
write += 5;
|
|
400
|
+
remaining -= 5;
|
|
401
|
+
in_def_list = true;
|
|
402
|
+
}
|
|
403
|
+
if (dd_open) {
|
|
404
|
+
memcpy(write, "</dd>\n", 6);
|
|
405
|
+
write += 6;
|
|
406
|
+
remaining -= 6;
|
|
407
|
+
dd_open = false;
|
|
408
|
+
}
|
|
409
|
+
/* <dt>term</dt> */
|
|
410
|
+
ENSURE_SPACE(10);
|
|
411
|
+
memcpy(write, "<dt>", 4);
|
|
412
|
+
write += 4;
|
|
413
|
+
remaining -= 4;
|
|
414
|
+
char *term_html = render_inline_with_doc(term_buffer, (size_t)term_len, text, text_len, unsafe);
|
|
415
|
+
if (term_html) {
|
|
416
|
+
size_t html_len = strlen(term_html);
|
|
417
|
+
ENSURE_SPACE(html_len + 20);
|
|
418
|
+
memcpy(write, term_html, html_len);
|
|
419
|
+
write += html_len;
|
|
420
|
+
remaining -= html_len;
|
|
421
|
+
free(term_html);
|
|
422
|
+
}
|
|
423
|
+
memcpy(write, "</dt>\n", 6);
|
|
424
|
+
write += 6;
|
|
425
|
+
remaining -= 6;
|
|
426
|
+
term_len = 0;
|
|
427
|
+
} else if (in_def_list) {
|
|
428
|
+
/* Another : definition for same term */
|
|
429
|
+
if (dd_open) {
|
|
430
|
+
memcpy(write, "</dd>\n", 6);
|
|
431
|
+
write += 6;
|
|
432
|
+
remaining -= 6;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/* <dd>definition</dd> */
|
|
437
|
+
ENSURE_SPACE(20 + def_len * 2);
|
|
438
|
+
memcpy(write, "<dd>", 4);
|
|
439
|
+
write += 4;
|
|
440
|
+
remaining -= 4;
|
|
441
|
+
dd_open = true;
|
|
442
|
+
|
|
443
|
+
if (def_len > 0) {
|
|
444
|
+
char *def_html = render_inline_with_doc(def_start, def_len, text, text_len, unsafe);
|
|
445
|
+
if (def_html) {
|
|
446
|
+
size_t html_len = strlen(def_html);
|
|
447
|
+
ENSURE_SPACE(html_len + 20);
|
|
448
|
+
memcpy(write, def_html, html_len);
|
|
449
|
+
write += html_len;
|
|
450
|
+
remaining -= html_len;
|
|
451
|
+
free(def_html);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
/* Don't close </dd> yet - allow indented continuation lines */
|
|
455
|
+
}
|
|
456
|
+
/* Check for one-line definition: Term :: Definition */
|
|
457
|
+
else if (!in_code_block) {
|
|
458
|
+
sep = find_def_separator((const unsigned char *)line_start, (int)line_length);
|
|
459
|
+
bool sep_inside_inline = false;
|
|
460
|
+
if (sep >= 0) {
|
|
461
|
+
sep_inside_inline = scan_inline_code_for_sep(line_start, line_length, sep, in_inline_code_span, &in_inline_code_span);
|
|
462
|
+
}
|
|
463
|
+
if (sep >= 0 && !sep_inside_inline) {
|
|
464
|
+
/* Close any open Kramdown dd and flush unused term buffer */
|
|
465
|
+
if (dd_open) {
|
|
466
|
+
memcpy(write, "</dd>\n", 6);
|
|
467
|
+
write += 6;
|
|
468
|
+
remaining -= 6;
|
|
469
|
+
dd_open = false;
|
|
470
|
+
}
|
|
471
|
+
if (in_def_list && term_len > 0) {
|
|
472
|
+
/* Buffered term wasn't used - output as regular line, close list */
|
|
473
|
+
memcpy(write, "</dl>\n\n", 7);
|
|
474
|
+
write += 7;
|
|
475
|
+
remaining -= 7;
|
|
1314
476
|
in_def_list = false;
|
|
1315
|
-
in_blockquote_context = false;
|
|
1316
|
-
blockquote_depth = 0;
|
|
1317
477
|
}
|
|
1318
|
-
|
|
1319
|
-
/* If we have a buffered term that wasn't used, write it first */
|
|
1320
478
|
if (term_len > 0) {
|
|
1321
|
-
/* Need term_len bytes + 1 for newline + 1 for null terminator */
|
|
1322
479
|
ENSURE_SPACE((size_t)term_len + 2);
|
|
1323
|
-
memcpy(write, term_buffer, term_len);
|
|
480
|
+
memcpy(write, term_buffer, (size_t)term_len);
|
|
1324
481
|
write += term_len;
|
|
1325
482
|
remaining -= (size_t)term_len;
|
|
1326
483
|
*write++ = '\n';
|
|
1327
484
|
remaining--;
|
|
1328
|
-
/* If we skipped a blank line after the term, output it now */
|
|
1329
|
-
if (skipped_blank_after_term) {
|
|
1330
|
-
ENSURE_SPACE(1);
|
|
1331
|
-
*write++ = '\n';
|
|
1332
|
-
remaining--;
|
|
1333
|
-
skipped_blank_after_term = false;
|
|
1334
|
-
}
|
|
1335
485
|
term_len = 0;
|
|
1336
486
|
}
|
|
487
|
+
/* Extract term (before ::) and definition (after ::) */
|
|
488
|
+
const char *term_start = line_start;
|
|
489
|
+
const char *term_end = line_start + sep;
|
|
490
|
+
const char *def_start = line_start + sep + 2;
|
|
491
|
+
const char *def_end = line_end;
|
|
492
|
+
|
|
493
|
+
/* Trim term */
|
|
494
|
+
while (term_start < term_end && (*term_start == ' ' || *term_start == '\t')) term_start++;
|
|
495
|
+
while (term_end > term_start && (term_end[-1] == ' ' || term_end[-1] == '\t')) term_end--;
|
|
496
|
+
|
|
497
|
+
/* Trim definition */
|
|
498
|
+
while (def_start < def_end && (*def_start == ' ' || *def_start == '\t')) def_start++;
|
|
1337
499
|
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
500
|
+
size_t term_len = (size_t)(term_end - term_start);
|
|
501
|
+
size_t def_len = (size_t)(def_end - def_start);
|
|
502
|
+
|
|
503
|
+
if (!in_def_list) {
|
|
504
|
+
ENSURE_SPACE(10);
|
|
505
|
+
memcpy(write, "<dl>\n", 5);
|
|
506
|
+
write += 5;
|
|
507
|
+
remaining -= 5;
|
|
508
|
+
in_def_list = true;
|
|
1342
509
|
}
|
|
1343
|
-
bool is_header = (header_check < line_end && *header_check == '#');
|
|
1344
510
|
|
|
1345
|
-
/*
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
511
|
+
/* <dt>term</dt> */
|
|
512
|
+
ENSURE_SPACE(20 + term_len * 2);
|
|
513
|
+
memcpy(write, "<dt>", 4);
|
|
514
|
+
write += 4;
|
|
515
|
+
remaining -= 4;
|
|
516
|
+
|
|
517
|
+
/* Parse term as inline markdown */
|
|
518
|
+
if (term_len > 0) {
|
|
519
|
+
char *term_html = render_inline_with_doc(term_start, term_len, text, text_len, unsafe);
|
|
520
|
+
if (term_html) {
|
|
521
|
+
size_t html_len = strlen(term_html);
|
|
522
|
+
ENSURE_SPACE(html_len + 20);
|
|
523
|
+
memcpy(write, term_html, html_len);
|
|
524
|
+
write += html_len;
|
|
525
|
+
remaining -= html_len;
|
|
526
|
+
free(term_html);
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
memcpy(write, "</dt>\n", 6);
|
|
531
|
+
write += 6;
|
|
532
|
+
remaining -= 6;
|
|
533
|
+
|
|
534
|
+
/* <dd>definition</dd> */
|
|
535
|
+
ENSURE_SPACE(20 + def_len * 2);
|
|
536
|
+
memcpy(write, "<dd>", 4);
|
|
537
|
+
write += 4;
|
|
538
|
+
remaining -= 4;
|
|
539
|
+
|
|
540
|
+
if (def_len > 0) {
|
|
541
|
+
char *def_html = render_inline_with_doc(def_start, def_len, text, text_len, unsafe);
|
|
542
|
+
if (def_html) {
|
|
543
|
+
size_t html_len = strlen(def_html);
|
|
544
|
+
ENSURE_SPACE(html_len + 20);
|
|
545
|
+
memcpy(write, def_html, html_len);
|
|
546
|
+
write += html_len;
|
|
547
|
+
remaining -= html_len;
|
|
548
|
+
free(def_html);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
memcpy(write, "</dd>\n", 6);
|
|
553
|
+
write += 6;
|
|
554
|
+
remaining -= 6;
|
|
555
|
+
} else {
|
|
556
|
+
/* Not one-line def (sep < 0) - buffer as potential Kramdown term */
|
|
557
|
+
if (dd_open) {
|
|
558
|
+
memcpy(write, "</dd>\n", 6);
|
|
559
|
+
write += 6;
|
|
560
|
+
remaining -= 6;
|
|
561
|
+
dd_open = false;
|
|
562
|
+
}
|
|
563
|
+
bool is_blank = (line_length == 0 || (line_length == 1 && (*line_start == '\r' || *line_start == '\n')));
|
|
564
|
+
if (is_blank) {
|
|
565
|
+
/* Blank line: keep def list open (next line might be : definition for same term) */
|
|
566
|
+
if (term_len > 0) {
|
|
567
|
+
/* Skip blank, keep term buffered */
|
|
568
|
+
} else if (!in_def_list) {
|
|
569
|
+
ENSURE_SPACE(2);
|
|
1354
570
|
*write++ = '\n';
|
|
1355
571
|
remaining--;
|
|
1356
572
|
}
|
|
1357
|
-
/*
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
break;
|
|
573
|
+
/* else: in_def_list, skip blank, list stays open */
|
|
574
|
+
} else {
|
|
575
|
+
if (in_def_list) {
|
|
576
|
+
memcpy(write, "</dl>\n\n", 7);
|
|
577
|
+
write += 7;
|
|
578
|
+
remaining -= 7;
|
|
579
|
+
in_def_list = false;
|
|
1365
580
|
}
|
|
1366
|
-
continue;
|
|
1367
|
-
}
|
|
1368
|
-
/* If this is a header, write it through immediately without buffering */
|
|
1369
|
-
else if (is_header) {
|
|
1370
|
-
/* But first, flush any buffered term that wasn't used */
|
|
1371
581
|
if (term_len > 0) {
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
memcpy(write, term_buffer, term_len);
|
|
582
|
+
ENSURE_SPACE((size_t)term_len + 2);
|
|
583
|
+
memcpy(write, term_buffer, (size_t)term_len);
|
|
1375
584
|
write += term_len;
|
|
1376
585
|
remaining -= (size_t)term_len;
|
|
1377
586
|
*write++ = '\n';
|
|
1378
587
|
remaining--;
|
|
1379
|
-
/* If we skipped a blank line after the term, output it now */
|
|
1380
|
-
if (skipped_blank_after_term) {
|
|
1381
|
-
ENSURE_SPACE(1);
|
|
1382
|
-
*write++ = '\n';
|
|
1383
|
-
remaining--;
|
|
1384
|
-
skipped_blank_after_term = false;
|
|
1385
|
-
}
|
|
1386
588
|
term_len = 0;
|
|
1387
589
|
}
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
remaining--;
|
|
1397
|
-
}
|
|
1398
|
-
/* Move to next line and continue */
|
|
1399
|
-
const char *old_read_continue = read;
|
|
1400
|
-
read = line_end;
|
|
1401
|
-
if (*read == '\n') read++;
|
|
1402
|
-
/* Safety: ensure we advanced */
|
|
1403
|
-
if (read <= old_read_continue) {
|
|
1404
|
-
/* We're stuck - break instead of continue */
|
|
1405
|
-
break;
|
|
1406
|
-
}
|
|
1407
|
-
continue;
|
|
1408
|
-
}
|
|
1409
|
-
/* If this is a list item, write it through immediately without buffering */
|
|
1410
|
-
else if (is_list_item) {
|
|
1411
|
-
size_t needed = line_length + (*line_end == '\n' ? 1 : 0);
|
|
1412
|
-
/* Need needed + 1 for null terminator */
|
|
1413
|
-
ENSURE_SPACE(needed + 1);
|
|
1414
|
-
memcpy(write, line_start, line_length);
|
|
1415
|
-
write += line_length;
|
|
1416
|
-
remaining -= line_length;
|
|
1417
|
-
if (*line_end == '\n' && remaining > 0) {
|
|
590
|
+
const char *p = line_start;
|
|
591
|
+
while (p < line_end && (*p == ' ' || *p == '\t')) p++;
|
|
592
|
+
bool is_ref_def = (p < line_end && *p == '[' && memchr(p, ':', (size_t)(line_end - p)) != NULL);
|
|
593
|
+
if (is_ref_def || line_length >= sizeof(term_buffer) - 1) {
|
|
594
|
+
ENSURE_SPACE(line_length + 2);
|
|
595
|
+
memcpy(write, line_start, line_length);
|
|
596
|
+
write += line_length;
|
|
597
|
+
remaining -= line_length;
|
|
1418
598
|
*write++ = '\n';
|
|
1419
599
|
remaining--;
|
|
600
|
+
} else {
|
|
601
|
+
memcpy(term_buffer, line_start, line_length);
|
|
602
|
+
term_len = (int)line_length;
|
|
603
|
+
term_buffer[term_len] = '\0';
|
|
1420
604
|
}
|
|
1421
|
-
/* Move to next line and continue */
|
|
1422
|
-
const char *old_read_continue = read;
|
|
1423
|
-
read = line_end;
|
|
1424
|
-
if (*read == '\n') read++;
|
|
1425
|
-
/* Safety: ensure we advanced */
|
|
1426
|
-
if (read <= old_read_continue) {
|
|
1427
|
-
/* We're stuck - break instead of continue */
|
|
1428
|
-
break;
|
|
1429
|
-
}
|
|
1430
|
-
continue;
|
|
1431
605
|
}
|
|
1432
|
-
/* Check if line contains IAL syntax - if so, write immediately without buffering */
|
|
1433
|
-
else if (strstr(line_start, "{:") != NULL) {
|
|
1434
|
-
/* Contains IAL - don't buffer it */
|
|
1435
|
-
size_t needed = line_length + (*line_end == '\n' ? 1 : 0);
|
|
1436
|
-
/* Need needed + 1 for null terminator */
|
|
1437
|
-
ENSURE_SPACE(needed + 1);
|
|
1438
|
-
memcpy(write, line_start, line_length);
|
|
1439
|
-
write += line_length;
|
|
1440
|
-
remaining -= line_length;
|
|
1441
|
-
if (*line_end == '\n' && remaining > 0) {
|
|
1442
|
-
*write++ = '\n';
|
|
1443
|
-
remaining--;
|
|
1444
|
-
}
|
|
1445
|
-
/* Move to next line and continue */
|
|
1446
|
-
const char *old_read_continue = read;
|
|
1447
|
-
read = line_end;
|
|
1448
|
-
if (*read == '\n') read++;
|
|
1449
|
-
/* Safety: ensure we advanced */
|
|
1450
|
-
if (read <= old_read_continue) {
|
|
1451
|
-
/* We're stuck - break instead of continue */
|
|
1452
|
-
break;
|
|
1453
|
-
}
|
|
1454
|
-
continue;
|
|
1455
606
|
}
|
|
1456
|
-
|
|
1457
|
-
if (line_length < sizeof(term_buffer) - 1) {
|
|
1458
|
-
/* Check if line has blockquote prefix and count depth */
|
|
1459
|
-
const char *term_check = line_start;
|
|
1460
|
-
while (term_check < line_end && (*term_check == ' ' || *term_check == '\t')) term_check++;
|
|
1461
|
-
term_has_blockquote = false;
|
|
1462
|
-
term_blockquote_depth = 0;
|
|
1463
|
-
const char *depth_check = term_check;
|
|
1464
|
-
while (depth_check < line_end && *depth_check == '>') {
|
|
1465
|
-
term_has_blockquote = true;
|
|
1466
|
-
term_blockquote_depth++;
|
|
1467
|
-
depth_check++;
|
|
1468
|
-
/* Skip optional space after > */
|
|
1469
|
-
if (depth_check < line_end && (*depth_check == ' ' || *depth_check == '\t')) {
|
|
1470
|
-
depth_check++;
|
|
1471
|
-
}
|
|
1472
|
-
}
|
|
607
|
+
}
|
|
1473
608
|
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
/* Need needed + 1 for null terminator */
|
|
1482
|
-
ENSURE_SPACE(needed + 1);
|
|
1483
|
-
memcpy(write, line_start, line_length);
|
|
1484
|
-
write += line_length;
|
|
1485
|
-
remaining -= line_length;
|
|
1486
|
-
if (*line_end == '\n' && remaining > 0) {
|
|
1487
|
-
*write++ = '\n';
|
|
1488
|
-
remaining--;
|
|
1489
|
-
}
|
|
1490
|
-
}
|
|
609
|
+
/* Track if this line was a table row (for : Caption after table detection) */
|
|
610
|
+
bool is_blank = (line_length == 0 || (line_length == 1 && (*line_start == '\r' || *line_start == '\n')));
|
|
611
|
+
if (!is_blank) prev_line_was_table_row = is_table_row_line(line_start, line_length);
|
|
612
|
+
|
|
613
|
+
/* Update inline code span state for next line (if not already updated in one-line def path) */
|
|
614
|
+
if (sep < 0) {
|
|
615
|
+
scan_inline_code_for_sep(line_start, line_length, -1, in_inline_code_span, &in_inline_code_span);
|
|
1491
616
|
}
|
|
1492
617
|
|
|
1493
|
-
/* Move to next line - ensure we always advance */
|
|
1494
|
-
const char *old_read = read;
|
|
1495
618
|
read = line_end;
|
|
1496
|
-
if (*read == '\n')
|
|
1497
|
-
read++;
|
|
1498
|
-
}
|
|
1499
|
-
/* If we're at the end of the string, break */
|
|
1500
|
-
if (*read == '\0') {
|
|
1501
|
-
break;
|
|
1502
|
-
}
|
|
1503
|
-
/* Critical safety check: if we haven't advanced, break immediately */
|
|
1504
|
-
if (read <= old_read) {
|
|
1505
|
-
/* We're stuck - this should never happen, but break to prevent infinite loop */
|
|
1506
|
-
/* Force advance one character as last resort */
|
|
1507
|
-
if (*read != '\0') {
|
|
1508
|
-
read++;
|
|
1509
|
-
} else {
|
|
1510
|
-
break;
|
|
1511
|
-
}
|
|
1512
|
-
}
|
|
1513
|
-
/* Additional safety: if we've processed more than the text length, something is wrong */
|
|
1514
|
-
if (read > text + text_len) {
|
|
1515
|
-
break;
|
|
1516
|
-
}
|
|
619
|
+
if (read < text + text_len && *read == '\n') read++;
|
|
1517
620
|
}
|
|
1518
621
|
|
|
1519
|
-
|
|
622
|
+
|
|
623
|
+
if (dd_open) {
|
|
624
|
+
memcpy(write, "</dd>\n", 6);
|
|
625
|
+
write += 6;
|
|
626
|
+
}
|
|
1520
627
|
if (in_def_list) {
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
if (in_blockquote_context) {
|
|
1524
|
-
/* Add > prefix(es) at start of line for blockquote context */
|
|
1525
|
-
size_t prefix_needed = blockquote_depth * 2;
|
|
1526
|
-
/* Need prefix_needed + 1 for null terminator */
|
|
1527
|
-
ENSURE_SPACE(prefix_needed + 1);
|
|
1528
|
-
for (int i = 0; i < blockquote_depth && remaining > 2; i++) {
|
|
1529
|
-
*write++ = '>';
|
|
1530
|
-
*write++ = ' ';
|
|
1531
|
-
remaining -= 2;
|
|
1532
|
-
}
|
|
1533
|
-
}
|
|
1534
|
-
/* Need dl_end_len + 1 for null terminator */
|
|
1535
|
-
ENSURE_SPACE(dl_end_len + 1);
|
|
1536
|
-
memcpy(write, dl_end, dl_end_len);
|
|
1537
|
-
write += dl_end_len;
|
|
1538
|
-
remaining -= dl_end_len;
|
|
628
|
+
memcpy(write, "</dl>\n", 6);
|
|
629
|
+
write += 6;
|
|
1539
630
|
}
|
|
1540
|
-
|
|
1541
|
-
/* Write any remaining term */
|
|
1542
631
|
if (term_len > 0) {
|
|
1543
|
-
/* Need term_len bytes + 1 for newline + 1 for null terminator */
|
|
1544
632
|
ENSURE_SPACE((size_t)term_len + 2);
|
|
1545
|
-
memcpy(write, term_buffer, term_len);
|
|
633
|
+
memcpy(write, term_buffer, (size_t)term_len);
|
|
1546
634
|
write += term_len;
|
|
1547
|
-
remaining -= (size_t)term_len;
|
|
1548
635
|
*write++ = '\n';
|
|
1549
|
-
remaining--;
|
|
1550
|
-
/* If we skipped a blank line after the term, output it now */
|
|
1551
|
-
if (skipped_blank_after_term) {
|
|
1552
|
-
ENSURE_SPACE(1);
|
|
1553
|
-
*write++ = '\n';
|
|
1554
|
-
remaining--;
|
|
1555
|
-
skipped_blank_after_term = false;
|
|
1556
|
-
}
|
|
1557
|
-
}
|
|
1558
|
-
|
|
1559
|
-
/* Free reference definitions if we extracted them */
|
|
1560
|
-
if (ref_definitions) {
|
|
1561
|
-
free(ref_definitions);
|
|
1562
|
-
ref_definitions = NULL; /* Prevent double-free */
|
|
1563
|
-
}
|
|
1564
|
-
|
|
1565
|
-
/* Ensure space for null terminator */
|
|
1566
|
-
if (remaining < 1) {
|
|
1567
|
-
size_t used = write - output;
|
|
1568
|
-
output_capacity = (used + 2) * 2;
|
|
1569
|
-
char *new_output = realloc(output, output_capacity + 1);
|
|
1570
|
-
if (!new_output) {
|
|
1571
|
-
free(output);
|
|
1572
|
-
return NULL;
|
|
1573
|
-
}
|
|
1574
|
-
output = new_output;
|
|
1575
|
-
write = output + used;
|
|
1576
|
-
remaining = output_capacity - used;
|
|
1577
|
-
}
|
|
1578
|
-
/* Safety check: if we processed but didn't actually create any definition lists,
|
|
1579
|
-
* return NULL to use original text. This handles cases where the early exit
|
|
1580
|
-
* incorrectly detected a pattern but no definition lists were actually created. */
|
|
1581
|
-
if (!found_any_def_list) {
|
|
1582
|
-
/* No definition lists were created - if we processed but didn't create any DLs,
|
|
1583
|
-
* something went wrong. Return NULL to use original text. */
|
|
1584
|
-
free(output);
|
|
1585
|
-
if (ref_definitions) {
|
|
1586
|
-
free(ref_definitions);
|
|
1587
|
-
}
|
|
1588
|
-
return NULL;
|
|
1589
636
|
}
|
|
1590
637
|
|
|
1591
638
|
*write = '\0';
|
|
1592
|
-
|
|
1593
|
-
#undef ENSURE_SPACE
|
|
1594
|
-
|
|
1595
|
-
/* If we didn't write anything, return original text to avoid empty output */
|
|
1596
|
-
if (write == output) {
|
|
1597
|
-
free(output);
|
|
1598
|
-
if (ref_definitions) {
|
|
1599
|
-
free(ref_definitions);
|
|
1600
|
-
}
|
|
1601
|
-
return NULL; /* Return NULL to indicate no processing was done */
|
|
1602
|
-
}
|
|
639
|
+
#undef ENSURE_SPACE
|
|
1603
640
|
|
|
1604
641
|
return output;
|
|
1605
642
|
}
|
|
1606
643
|
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
*/
|
|
1610
|
-
static cmark_node *postprocess(cmark_syntax_extension *ext,
|
|
1611
|
-
cmark_parser *parser,
|
|
1612
|
-
cmark_node *root) {
|
|
1613
|
-
(void)ext;
|
|
1614
|
-
(void)parser;
|
|
1615
|
-
/* Definition lists are now handled via preprocessing */
|
|
1616
|
-
return root;
|
|
1617
|
-
}
|
|
1618
|
-
|
|
1619
|
-
/**
|
|
1620
|
-
* Render definition list to HTML
|
|
1621
|
-
*/
|
|
1622
|
-
static void html_render(cmark_syntax_extension *ext,
|
|
1623
|
-
struct cmark_html_renderer *renderer,
|
|
1624
|
-
cmark_node *node,
|
|
1625
|
-
cmark_event_type ev_type,
|
|
1626
|
-
int options) {
|
|
1627
|
-
(void)ext;
|
|
1628
|
-
(void)options;
|
|
1629
|
-
cmark_strbuf *html = renderer->html;
|
|
1630
|
-
|
|
1631
|
-
if (ev_type == CMARK_EVENT_ENTER) {
|
|
1632
|
-
if (node->type == APEX_NODE_DEFINITION_LIST) {
|
|
1633
|
-
cmark_strbuf_puts(html, "<dl>\n");
|
|
1634
|
-
} else if (node->type == APEX_NODE_DEFINITION_TERM) {
|
|
1635
|
-
cmark_strbuf_puts(html, "<dt>");
|
|
1636
|
-
} else if (node->type == APEX_NODE_DEFINITION_DATA) {
|
|
1637
|
-
cmark_strbuf_puts(html, "<dd>");
|
|
1638
|
-
}
|
|
1639
|
-
} else if (ev_type == CMARK_EVENT_EXIT) {
|
|
1640
|
-
if (node->type == APEX_NODE_DEFINITION_LIST) {
|
|
1641
|
-
cmark_strbuf_puts(html, "</dl>\n");
|
|
1642
|
-
} else if (node->type == APEX_NODE_DEFINITION_TERM) {
|
|
1643
|
-
cmark_strbuf_puts(html, "</dt>\n");
|
|
1644
|
-
} else if (node->type == APEX_NODE_DEFINITION_DATA) {
|
|
1645
|
-
cmark_strbuf_puts(html, "</dd>\n");
|
|
1646
|
-
}
|
|
1647
|
-
}
|
|
1648
|
-
}
|
|
1649
|
-
|
|
1650
|
-
/**
|
|
1651
|
-
* Create definition list extension
|
|
1652
|
-
*/
|
|
1653
|
-
cmark_syntax_extension *create_definition_list_extension(void) {
|
|
1654
|
-
cmark_syntax_extension *ext = cmark_syntax_extension_new("definition_list");
|
|
1655
|
-
if (!ext) return NULL;
|
|
1656
|
-
|
|
1657
|
-
/* Register node types */
|
|
1658
|
-
APEX_NODE_DEFINITION_LIST = cmark_syntax_extension_add_node(0);
|
|
1659
|
-
APEX_NODE_DEFINITION_TERM = cmark_syntax_extension_add_node(0);
|
|
1660
|
-
APEX_NODE_DEFINITION_DATA = cmark_syntax_extension_add_node(0);
|
|
1661
|
-
|
|
1662
|
-
/* Set callbacks */
|
|
1663
|
-
cmark_syntax_extension_set_open_block_func(ext, open_block);
|
|
1664
|
-
cmark_syntax_extension_set_match_block_func(ext, match_block);
|
|
1665
|
-
cmark_syntax_extension_set_can_contain_func(ext, can_contain);
|
|
1666
|
-
cmark_syntax_extension_set_html_render_func(ext, html_render);
|
|
1667
|
-
cmark_syntax_extension_set_postprocess_func(ext, postprocess);
|
|
1668
|
-
|
|
1669
|
-
return ext;
|
|
644
|
+
void apex_deflist_debug_touch(int enable_definition_lists) {
|
|
645
|
+
(void)enable_definition_lists;
|
|
646
|
+
/* No-op for one-line format - debug was for old Kramdown format */
|
|
1670
647
|
}
|