apex-ruby 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/ext/apex_ext/apex_ext.c +6 -0
  3. data/ext/apex_ext/apex_src/AGENTS.md +41 -0
  4. data/ext/apex_ext/apex_src/CHANGELOG.md +412 -2
  5. data/ext/apex_ext/apex_src/CMakeLists.txt +41 -29
  6. data/ext/apex_ext/apex_src/Formula/apex.rb +2 -2
  7. data/ext/apex_ext/apex_src/Package.swift +9 -0
  8. data/ext/apex_ext/apex_src/README.md +31 -9
  9. data/ext/apex_ext/apex_src/ROADMAP.md +5 -0
  10. data/ext/apex_ext/apex_src/VERSION +1 -1
  11. data/ext/apex_ext/apex_src/cli/main.c +1125 -13
  12. data/ext/apex_ext/apex_src/docs/index.md +459 -0
  13. data/ext/apex_ext/apex_src/include/apex/apex.h +67 -5
  14. data/ext/apex_ext/apex_src/include/apex/ast_man.h +20 -0
  15. data/ext/apex_ext/apex_src/include/apex/ast_markdown.h +39 -0
  16. data/ext/apex_ext/apex_src/include/apex/ast_terminal.h +40 -0
  17. data/ext/apex_ext/apex_src/include/apex/module.modulemap +1 -1
  18. data/ext/apex_ext/apex_src/man/apex-config.5 +333 -258
  19. data/ext/apex_ext/apex_src/man/apex-config.5.md +3 -1
  20. data/ext/apex_ext/apex_src/man/apex-plugins.7 +401 -316
  21. data/ext/apex_ext/apex_src/man/apex.1 +663 -620
  22. data/ext/apex_ext/apex_src/man/apex.1.html +703 -0
  23. data/ext/apex_ext/apex_src/man/apex.1.md +160 -90
  24. data/ext/apex_ext/apex_src/objc/Apex.swift +6 -0
  25. data/ext/apex_ext/apex_src/objc/NSString+Apex.h +12 -0
  26. data/ext/apex_ext/apex_src/objc/NSString+Apex.m +9 -0
  27. data/ext/apex_ext/apex_src/pages/index.md +459 -0
  28. data/ext/apex_ext/apex_src/src/_README.md +4 -4
  29. data/ext/apex_ext/apex_src/src/apex.c +702 -44
  30. data/ext/apex_ext/apex_src/src/ast_json.c +1130 -0
  31. data/ext/apex_ext/apex_src/src/ast_json.h +46 -0
  32. data/ext/apex_ext/apex_src/src/ast_man.c +948 -0
  33. data/ext/apex_ext/apex_src/src/ast_markdown.c +409 -0
  34. data/ext/apex_ext/apex_src/src/ast_terminal.c +2516 -0
  35. data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +8 -5
  36. data/ext/apex_ext/apex_src/src/extensions/definition_list.c +491 -1514
  37. data/ext/apex_ext/apex_src/src/extensions/definition_list.h +8 -15
  38. data/ext/apex_ext/apex_src/src/extensions/emoji.c +207 -0
  39. data/ext/apex_ext/apex_src/src/extensions/emoji.h +14 -0
  40. data/ext/apex_ext/apex_src/src/extensions/header_ids.c +178 -71
  41. data/ext/apex_ext/apex_src/src/extensions/highlight.c +37 -5
  42. data/ext/apex_ext/apex_src/src/extensions/ial.c +416 -47
  43. data/ext/apex_ext/apex_src/src/extensions/includes.c +241 -10
  44. data/ext/apex_ext/apex_src/src/extensions/includes.h +1 -0
  45. data/ext/apex_ext/apex_src/src/extensions/metadata.c +166 -3
  46. data/ext/apex_ext/apex_src/src/extensions/metadata.h +7 -0
  47. data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +34 -3
  48. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +55 -10
  49. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +7 -4
  50. data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +84 -52
  51. data/ext/apex_ext/apex_src/src/extensions/toc.c +133 -19
  52. data/ext/apex_ext/apex_src/src/filters_ast.c +194 -0
  53. data/ext/apex_ext/apex_src/src/filters_ast.h +36 -0
  54. data/ext/apex_ext/apex_src/src/html_renderer.c +1265 -35
  55. data/ext/apex_ext/apex_src/src/html_renderer.h +21 -0
  56. data/ext/apex_ext/apex_src/src/plugins_remote.c +40 -14
  57. data/ext/apex_ext/apex_src/tests/CMakeLists.txt +1 -0
  58. data/ext/apex_ext/apex_src/tests/README.md +11 -5
  59. data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +13 -2
  60. data/ext/apex_ext/apex_src/tests/fixtures/filters/filter_output_with_rawblock.json +1 -0
  61. data/ext/apex_ext/apex_src/tests/fixtures/filters/unwrap.md +7 -0
  62. data/ext/apex_ext/apex_src/tests/fixtures/images/auto-wildcard.md +8 -0
  63. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.avif +0 -0
  64. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.jpg +0 -0
  65. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.webp +0 -0
  66. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.avif +0 -0
  67. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.jpg +0 -0
  68. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.webp +0 -0
  69. data/ext/apex_ext/apex_src/tests/fixtures/images/media_formats_test.md +63 -0
  70. data/ext/apex_ext/apex_src/tests/fixtures/includes/data-semi.csv +3 -0
  71. data/ext/apex_ext/apex_src/tests/fixtures/includes/with space.txt +1 -0
  72. data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +4 -1
  73. data/ext/apex_ext/apex_src/tests/paginate_cli_test.sh +64 -0
  74. data/ext/apex_ext/apex_src/tests/terminal_width_test.sh +29 -0
  75. data/ext/apex_ext/apex_src/tests/test-swift-package.sh +14 -0
  76. data/ext/apex_ext/apex_src/tests/test_cmark_callback.c +189 -0
  77. data/ext/apex_ext/apex_src/tests/test_extensions.c +374 -0
  78. data/ext/apex_ext/apex_src/tests/test_metadata.c +68 -0
  79. data/ext/apex_ext/apex_src/tests/test_output.c +291 -2
  80. data/ext/apex_ext/apex_src/tests/test_runner.c +10 -0
  81. data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +1 -1
  82. data/ext/apex_ext/apex_src/tests/test_tables.c +17 -1
  83. data/lib/apex/version.rb +1 -1
  84. metadata +32 -2
  85. data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +0 -456
@@ -1,19 +1,24 @@
1
1
  /**
2
2
  * Definition List Extension for Apex
3
- * Implementation
4
3
  *
5
- * Supports Kramdown/PHP Markdown Extra style definition lists:
6
- * Term
7
- * : Definition 1
8
- * : Definition 2
4
+ * Supports four formats (all produce <dl><dt>term</dt><dd>definition</dd></dl>):
9
5
  *
10
- * With block-level content in definitions:
11
- * Term
12
- * : Definition with paragraphs
6
+ * 1. Kramdown single colon:
7
+ * term
8
+ * : definition
13
9
  *
14
- * And code blocks
10
+ * 2. Kramdown double colon:
11
+ * term
12
+ * :: definition
15
13
  *
16
- * code here
14
+ * 3. One-line no space: term::definition
15
+ *
16
+ * 4. One-line with space: term :: definition
17
+ *
18
+ * For one-line format, :: must NOT be at line start (that's Kramdown).
19
+ * Whitespace around :: is allowed in one-line format.
20
+ *
21
+ * Both formats enabled by default in unified mode.
17
22
  */
18
23
 
19
24
  #include "definition_list.h"
@@ -24,1647 +29,619 @@
24
29
  #include "render.h"
25
30
  #include <string.h>
26
31
  #include <stdlib.h>
27
- #include <ctype.h>
28
32
  #include <stdbool.h>
29
- #include <stdio.h>
30
-
31
- /* Node type IDs */
32
- cmark_node_type APEX_NODE_DEFINITION_LIST;
33
- cmark_node_type APEX_NODE_DEFINITION_TERM;
34
- cmark_node_type APEX_NODE_DEFINITION_DATA;
35
33
 
36
34
  /**
37
- * Check if a line starts a definition (starts with : optionally indented up to 3 spaces)
35
+ * Check if a line matches the one-line definition format: Term :: Definition
36
+ * The line must contain :: with optional whitespace around it.
37
+ * Uses the last :: to avoid splitting URLs (e.g. http://example.com).
38
+ * Returns the position of :: or -1 if not a match.
38
39
  */
39
- static bool is_definition_line(const unsigned char *input, int len, int *indent) {
40
- if (!input || len == 0) return false;
41
-
42
- int spaces = 0;
43
-
44
- /* Count leading spaces (up to 3 allowed) */
45
- while (spaces < 3 && spaces < len && input[spaces] == ' ') {
46
- spaces++;
40
+ static int find_def_separator(const unsigned char *line, int len) {
41
+ if (!line || len < 3) return -1;
42
+
43
+ int last_sep = -1;
44
+ for (int i = 0; i < len - 1; i++) {
45
+ if (line[i] == ':' && line[i + 1] == ':') {
46
+ /* Skip :: that's part of URL (://) */
47
+ if (i + 3 <= len && line[i + 2] == '/') continue;
48
+ /* Skip :: that's part of div/custom element (::: or more) */
49
+ if (i > 0 && line[i - 1] == ':') continue;
50
+ if (i + 2 < len && line[i + 2] == ':') continue;
51
+ last_sep = i;
52
+ }
47
53
  }
54
+ if (last_sep < 0) return -1;
48
55
 
49
- if (spaces >= len) return false;
50
-
51
- /* Must start with : */
52
- if (input[spaces] != ':') return false;
53
-
54
- /* Must be followed by space or tab */
55
- if (spaces + 1 >= len) return false;
56
- if (input[spaces + 1] != ' ' && input[spaces + 1] != '\t') return false;
56
+ /* Ensure we have content before (at least one non-space) */
57
+ int before = 0;
58
+ for (int j = 0; j < last_sep; j++) {
59
+ if (line[j] != ' ' && line[j] != '\t') {
60
+ before = 1;
61
+ break;
62
+ }
63
+ }
64
+ /* After: at least one character */
65
+ int after = (last_sep + 2 < len);
66
+ if (before && after) return last_sep;
67
+ return -1;
68
+ }
57
69
 
58
- *indent = spaces;
70
+ /**
71
+ * Check if a line is a Kramdown-style definition line (starts with : or :: after optional spaces).
72
+ * Reject ::: or more - those are div/custom element fences, not definition lists.
73
+ */
74
+ static bool is_kramdown_def_line(const char *line, size_t len) {
75
+ if (!line || len == 0) return false;
76
+ size_t i = 0;
77
+ while (i < len && (line[i] == ' ' || line[i] == '\t')) i++;
78
+ if (i >= len) return false;
79
+ if (line[i] != ':') return false;
80
+ int colon_len = 1;
81
+ if (i + 2 <= len && line[i + 1] == ':') colon_len = 2;
82
+ /* Reject 3+ colons (::: is div fence) */
83
+ if (i + 3 <= len && line[i + 2] == ':') return false;
84
+ if (i + (size_t)colon_len >= len) return false;
85
+ if (line[i + colon_len] != ' ' && line[i + colon_len] != '\t') return false;
59
86
  return true;
60
87
  }
61
88
 
62
89
  /**
63
- * Extract reference link IDs from text (e.g., "1670-042" from "[KeyRemap4MacBook][1670-042]")
64
- * Returns a dynamically allocated array of strings (IDs), with NULL terminator
65
- * Caller must free each string and the array itself
90
+ * Check if a line looks like a table row (starts with | after optional indent).
91
+ * Used to avoid treating : Caption as a definition when it's a table caption.
66
92
  */
67
- static char **extract_reference_link_ids(const char *text, size_t *count) {
68
- if (!text || !count) return NULL;
69
-
70
- *count = 0;
71
- size_t capacity = 16;
72
- char **ids = malloc(capacity * sizeof(char*));
73
- if (!ids) return NULL;
74
-
75
- const char *p = text;
76
- while (*p) {
77
- if (*p == '[') {
78
- const char *text_start = p + 1;
79
- const char *text_end = strchr(text_start, ']');
80
- if (text_end) {
81
- if (text_end[1] == '[' && text_end[2] == ']') {
82
- /* Found shortcut reference [text][] - use text as the ID */
83
- size_t text_len = text_end - text_start;
84
- if (text_len > 0) {
85
- char *id = malloc(text_len + 1);
86
- if (id) {
87
- memcpy(id, text_start, text_len);
88
- id[text_len] = '\0';
89
-
90
- /* Check if we already have this ID */
91
- bool found = false;
92
- for (size_t i = 0; i < *count; i++) {
93
- if (strcmp(ids[i], id) == 0) {
94
- found = true;
95
- free(id);
96
- break;
97
- }
98
- }
99
-
100
- if (!found) {
101
- /* Add to array */
102
- if (*count >= capacity) {
103
- capacity *= 2;
104
- char **new_ids = realloc(ids, capacity * sizeof(char*));
105
- if (!new_ids) {
106
- free(id);
107
- break;
108
- }
109
- ids = new_ids;
110
- }
111
- ids[*count] = id;
112
- (*count)++;
113
- }
114
- }
115
- }
116
- p = text_end + 3; /* Skip past ]] */
117
- continue;
118
- } else if (text_end[1] == '[') {
119
- /* Found [text][ref] pattern */
120
- const char *ref_start = text_end + 2;
121
- const char *ref_end = strchr(ref_start, ']');
122
- if (ref_end) {
123
- /* Extract the reference ID */
124
- size_t ref_len = ref_end - ref_start;
125
- if (ref_len > 0) {
126
- char *id = malloc(ref_len + 1);
127
- if (id) {
128
- memcpy(id, ref_start, ref_len);
129
- id[ref_len] = '\0';
130
-
131
- /* Check if we already have this ID */
132
- bool found = false;
133
- for (size_t i = 0; i < *count; i++) {
134
- if (strcmp(ids[i], id) == 0) {
135
- found = true;
136
- free(id);
137
- break;
138
- }
139
- }
140
-
141
- if (!found) {
142
- /* Add to array */
143
- if (*count >= capacity) {
144
- capacity *= 2;
145
- char **new_ids = realloc(ids, capacity * sizeof(char*));
146
- if (!new_ids) {
147
- free(id);
148
- break;
149
- }
150
- ids = new_ids;
151
- }
152
- ids[*count] = id;
153
- (*count)++;
154
- }
155
- }
156
- }
157
- p = ref_end + 1;
158
- continue;
159
- }
160
- }
161
- }
162
- }
163
- p++;
164
- }
165
-
166
- /* Add NULL terminator */
167
- if (*count >= capacity) {
168
- char **new_ids = realloc(ids, (capacity + 1) * sizeof(char*));
169
- if (new_ids) ids = new_ids;
170
- }
171
- if (ids) ids[*count] = NULL;
172
-
173
- return ids;
93
+ static bool is_table_row_line(const char *line, size_t len) {
94
+ if (!line || len == 0) return false;
95
+ size_t i = 0;
96
+ while (i < len && (line[i] == ' ' || line[i] == '\t')) i++;
97
+ return i < len && line[i] == '|';
174
98
  }
175
99
 
176
100
  /**
177
- * Extract specific reference definitions from the full reference definitions string
178
- * based on a list of reference IDs
179
- * Returns a string containing only the needed definitions, or NULL if none found
180
- * Caller must free the returned string
101
+ * Check if the next non-blank line after pos is a table row. Used for "caption before table".
181
102
  */
182
- static char *extract_specific_reference_definitions(const char *all_refs, char **needed_ids) {
183
- if (!all_refs || !needed_ids || !needed_ids[0]) return NULL;
184
-
185
- size_t result_capacity = 1024;
186
- size_t result_len = 0;
187
- char *result = malloc(result_capacity);
188
- if (!result) return NULL;
189
- result[0] = '\0';
190
-
191
- const char *p = all_refs;
192
- while (*p) {
193
- const char *line_start = p;
194
- const char *line_end = strchr(p, '\n');
195
- if (!line_end) line_end = p + strlen(p);
196
-
197
- /* Skip leading whitespace */
198
- const char *content_start = line_start;
199
- while (content_start < line_end && (*content_start == ' ' || *content_start == '\t')) {
200
- content_start++;
201
- }
202
-
203
- /* Check if this is a reference link definition: [id]: URL */
204
- if (content_start < line_end && *content_start == '[') {
205
- const char *id_end = strchr(content_start + 1, ']');
206
- if (id_end && id_end < line_end && id_end[1] == ':') {
207
- /* Extract the ID from this definition */
208
- size_t def_id_len = id_end - (content_start + 1);
209
- char *def_id = malloc(def_id_len + 1);
210
- if (def_id) {
211
- memcpy(def_id, content_start + 1, def_id_len);
212
- def_id[def_id_len] = '\0';
213
-
214
- /* Check if this ID is in our needed list */
215
- bool needed = false;
216
- for (size_t i = 0; needed_ids[i]; i++) {
217
- if (strcmp(needed_ids[i], def_id) == 0) {
218
- needed = true;
219
- break;
220
- }
221
- }
222
-
223
- if (needed) {
224
- /* Include this definition */
225
- size_t line_len = line_end - line_start;
226
- if (line_end < p + strlen(p) && *line_end == '\n') {
227
- line_len++; /* Include newline */
228
- }
229
-
230
- /* Expand buffer if needed */
231
- if (result_len + line_len + 1 >= result_capacity) {
232
- result_capacity = (result_len + line_len + 1) * 2;
233
- char *new_result = realloc(result, result_capacity);
234
- if (!new_result) {
235
- free(def_id);
236
- break;
237
- }
238
- result = new_result;
239
- }
240
-
241
- /* Copy the line */
242
- memcpy(result + result_len, line_start, line_len);
243
- result_len += line_len;
244
- result[result_len] = '\0';
245
- }
246
-
247
- free(def_id);
248
- }
249
- }
250
- }
251
-
252
- /* Move to next line */
253
- p = line_end;
254
- if (*p == '\n') p++;
103
+ static bool next_nonblank_line_is_table(const char *pos, const char *text_end) {
104
+ while (pos < text_end) {
105
+ if (*pos == '\n') { pos++; continue; }
106
+ const char *line_end = strchr(pos, '\n');
107
+ if (!line_end) line_end = text_end;
108
+ const char *p = pos;
109
+ while (p < line_end && (*p == ' ' || *p == '\t')) p++;
110
+ if (p < line_end) return *p == '|';
111
+ pos = line_end + (line_end < text_end && *line_end == '\n' ? 1 : 0);
255
112
  }
113
+ return false;
114
+ }
256
115
 
257
- if (result_len == 0) {
258
- free(result);
259
- return NULL;
116
+ /** True if content at p looks like a list marker (- , * , + , or digit+. ) */
117
+ static bool looks_like_list_marker(const char *p) {
118
+ if (*p == '-' || *p == '*' || *p == '+')
119
+ return (p[1] == ' ' || p[1] == '\t');
120
+ if (isdigit((unsigned char)*p)) {
121
+ while (isdigit((unsigned char)*p)) p++;
122
+ return (*p == '.' && (p[1] == ' ' || p[1] == '\t'));
260
123
  }
261
-
262
- return result;
124
+ return false;
263
125
  }
264
126
 
265
127
  /**
266
- * Extract all reference link definitions from the document
267
- * Returns a string containing all reference definitions, or NULL if none found
268
- * Caller must free the returned string
128
+ * True if line is an indented code block (4+ spaces or tab at start, not a list line).
129
+ * Used to skip definition list processing inside indented code blocks.
269
130
  */
270
- static char *extract_reference_definitions(const char *text) {
271
- if (!text) return NULL;
272
-
273
- size_t text_len = strlen(text);
274
- size_t refs_capacity = text_len + 1;
275
- char *refs = malloc(refs_capacity);
276
- if (!refs) return NULL;
277
- refs[0] = '\0';
278
-
279
- const char *p = text;
280
- size_t refs_len = 0;
281
-
282
- while (*p) {
283
- const char *line_start = p;
284
- const char *line_end = strchr(p, '\n');
285
- if (!line_end) line_end = p + strlen(p);
286
-
287
- /* Skip leading whitespace */
288
- const char *content_start = line_start;
289
- while (content_start < line_end && (*content_start == ' ' || *content_start == '\t')) {
290
- content_start++;
291
- }
292
-
293
- /* Check if this is a reference link definition: [id]: URL */
294
- if (content_start < line_end && *content_start == '[') {
295
- const char *id_end = strchr(content_start + 1, ']');
296
- if (id_end && id_end < line_end && id_end[1] == ':') {
297
- /* This is a reference definition - extract the entire line */
298
- size_t line_len = line_end - line_start;
299
- if (line_end < p + strlen(p) && *line_end == '\n') {
300
- line_len++; /* Include newline */
301
- }
302
-
303
- /* Check if we need to expand the buffer */
304
- if (refs_len + line_len + 1 >= refs_capacity) {
305
- refs_capacity = (refs_len + line_len + 1) * 2;
306
- char *new_refs = realloc(refs, refs_capacity);
307
- if (!new_refs) {
308
- free(refs);
309
- return NULL;
310
- }
311
- refs = new_refs;
312
- }
313
-
314
- /* Copy the line */
315
- memcpy(refs + refs_len, line_start, line_len);
316
- refs_len += line_len;
317
- refs[refs_len] = '\0';
318
- }
319
- }
320
-
321
- /* Move to next line */
322
- p = line_end;
323
- if (*p == '\n') p++;
324
- }
325
-
326
- if (refs_len == 0) {
327
- free(refs);
328
- return NULL;
329
- }
330
-
331
- return refs;
131
+ static bool line_is_indented_code_block(const char *line, size_t len) {
132
+ if (len == 0) return false;
133
+ if (line[0] == '\t') {
134
+ return len > 1 && !looks_like_list_marker(line + 1);
135
+ }
136
+ if (len < 4 || line[0] != ' ' || line[1] != ' ' || line[2] != ' ' || line[3] != ' ')
137
+ return false;
138
+ const char *content = line + 4;
139
+ while (content < line + len && *content == ' ') content++;
140
+ return (content < line + len) && !looks_like_list_marker(content);
332
141
  }
333
142
 
334
143
  /**
335
- * Open block - called when we see a ':' character that might start a definition
144
+ * Scans line for inline code backticks, updates state for next line, and returns
145
+ * whether sep_pos is inside an inline code span. Single backticks toggle; 3+ are
146
+ * fenced blocks (handled elsewhere). Used to skip definition processing inside
147
+ * inline code spans, including multi-line spans like `term::def\n :more:`.
336
148
  */
337
- static cmark_node *open_block(cmark_syntax_extension *ext,
338
- int indented,
339
- cmark_parser *parser,
340
- cmark_node *parent_container,
341
- unsigned char *input,
342
- int len) {
343
- (void)ext;
344
- if (indented > 3) {
345
- return NULL; /* Too indented */
346
- }
347
-
348
- /* Check for 4+ leading spaces - these are table captions, not definition lists */
349
- int leading_spaces = 0;
350
- while (leading_spaces < len && leading_spaces < 10 && input[leading_spaces] == ' ') {
351
- leading_spaces++;
352
- }
353
- if (leading_spaces >= 4) {
354
- return NULL; /* Table caption, not definition list */
355
- }
356
-
357
- int def_indent;
358
- if (!is_definition_line(input, len, &def_indent)) {
359
- return NULL;
360
- }
361
-
362
- /* Check if the line contains an IAL (Inline Attribute List) like {#id .class} */
363
- /* Lines with IALs are almost always table captions, not definition lists */
364
- const unsigned char *p = input;
365
- const unsigned char *end = input + len;
366
- while (p < end) {
367
- if (*p == '{') {
368
- p++;
369
- /* Check if it looks like an IAL: {# or {. or {: */
370
- if (p < end && (*p == '#' || *p == '.' || *p == ':')) {
371
- /* Look for closing } */
372
- while (p < end && *p != '}') {
373
- p++;
374
- }
375
- if (p < end && *p == '}') {
376
- return NULL; /* This is a table caption, not a definition list */
377
- }
378
- }
149
+ static bool scan_inline_code_for_sep(const char *line, size_t len, int sep_pos,
150
+ bool in_span_at_start, bool *out_in_span_at_end) {
151
+ bool in = in_span_at_start;
152
+ bool sep_inside = false;
153
+ for (size_t i = 0; i < len; i++) {
154
+ if ((int)i == sep_pos) sep_inside = in;
155
+ if (line[i] == '`') {
156
+ int count = 1;
157
+ while (i + (size_t)count < len && line[i + count] == '`') count++;
158
+ if (count == 1) in = !in;
159
+ i += (size_t)(count - 1);
379
160
  }
380
- p++;
381
- }
382
-
383
- /* Safety check: parent_container must be valid */
384
- if (!parent_container) {
385
- return NULL;
386
161
  }
387
-
388
- /* Additional safety: verify parent_container is a valid node type */
389
- cmark_node_type parent_type = cmark_node_get_type(parent_container);
390
-
391
- /* Check if previous block was a paragraph (term) */
392
- /* Only try to get last child for node types that support children */
393
- cmark_node *prev = NULL;
394
- if (parent_type == CMARK_NODE_DOCUMENT ||
395
- parent_type == CMARK_NODE_BLOCK_QUOTE ||
396
- parent_type == CMARK_NODE_LIST ||
397
- parent_type == CMARK_NODE_ITEM ||
398
- parent_type == APEX_NODE_DEFINITION_LIST ||
399
- parent_type == APEX_NODE_DEFINITION_DATA) {
400
- prev = cmark_node_last_child(parent_container);
401
- } else {
402
- /* For other node types, don't try to get last child */
403
- return NULL;
404
- }
405
-
406
- if (!prev) {
407
- return NULL;
408
- }
409
-
410
- cmark_node_type prev_type = cmark_node_get_type(prev);
411
- if (prev_type != CMARK_NODE_PARAGRAPH) {
412
- return NULL;
413
- }
414
-
415
- /* Additional safety: verify prev is still valid and attached to parent */
416
- cmark_node *prev_parent = cmark_node_parent(prev);
417
- if (prev_parent != parent_container) {
418
- return NULL;
419
- }
420
-
421
- /* Create definition list container */
422
- cmark_node *def_list = cmark_node_new_with_mem(APEX_NODE_DEFINITION_LIST, parser->mem);
423
- if (!def_list) return NULL;
424
-
425
- /* Convert previous paragraph to term */
426
- cmark_node *term = cmark_node_new_with_mem(APEX_NODE_DEFINITION_TERM, parser->mem);
427
- if (!term) {
428
- cmark_node_free(def_list);
429
- return NULL;
430
- }
431
-
432
- /* Move paragraph children to term - but DON'T unlink prev itself */
433
- /* Unlinking prev during parsing causes segfaults because the parser is still using it */
434
- cmark_node *child;
435
- while ((child = cmark_node_first_child(prev))) {
436
- cmark_node_unlink(child);
437
- cmark_node_append_child(term, child);
438
- }
439
-
440
- cmark_node_append_child(def_list, term);
441
- return def_list;
162
+ *out_in_span_at_end = in;
163
+ return (sep_pos >= 0 && (size_t)sep_pos < len) ? sep_inside : false;
442
164
  }
443
165
 
444
166
  /**
445
- * Match block - check if a line continues a definition list
167
+ * Check if we're inside a fenced code block (```) - don't process definition lists there
446
168
  */
447
- static int match_block(cmark_syntax_extension *ext,
448
- cmark_parser *parser,
449
- unsigned char *input,
450
- int len,
451
- cmark_node *container) {
452
- (void)ext;
453
- (void)parser;
454
- if (cmark_node_get_type(container) != APEX_NODE_DEFINITION_LIST &&
455
- cmark_node_get_type(container) != APEX_NODE_DEFINITION_DATA) {
456
- return 0;
169
+ static bool is_code_fence_line(const char *line, size_t len) {
170
+ const char *p = line;
171
+ while (p < line + len && (*p == ' ' || *p == '\t')) p++;
172
+ if (p + 3 <= line + len && p[0] == '`' && p[1] == '`' && p[2] == '`') {
173
+ return true;
457
174
  }
458
-
459
- int def_indent;
460
- if (is_definition_line(input, len, &def_indent)) {
461
- return 1; /* This line continues the definition list */
462
- }
463
-
464
- /* Also continue if line is blank or indented (block content in definition) */
465
- if (len == 0 || (len > 0 && (input[0] == ' ' || input[0] == '\t'))) {
466
- if (cmark_node_get_type(container) == APEX_NODE_DEFINITION_DATA) {
467
- return 1; /* Block content in definition */
468
- }
469
- }
470
-
471
- return 0;
175
+ return false;
472
176
  }
473
177
 
474
178
  /**
475
- * Can contain - definition data can contain block-level content
179
+ * Render inline content (term or definition) with full document context so cmark
180
+ * can resolve reference links. Parses full_doc + "\n\n" + content so ref defs are
181
+ * available. Returns HTML of the last block (our content), stripping <p></p>.
182
+ * Caller must free the returned string.
476
183
  */
477
- static int can_contain(cmark_syntax_extension *ext,
478
- cmark_node *node,
479
- cmark_node_type child_type) {
480
- (void)ext;
481
- if (cmark_node_get_type(node) == APEX_NODE_DEFINITION_DATA) {
482
- /* Definition data can contain any block-level content */
483
- return child_type == CMARK_NODE_PARAGRAPH ||
484
- child_type == CMARK_NODE_CODE_BLOCK ||
485
- child_type == CMARK_NODE_BLOCK_QUOTE ||
486
- child_type == CMARK_NODE_LIST ||
487
- child_type == CMARK_NODE_HEADING ||
488
- child_type == CMARK_NODE_THEMATIC_BREAK;
184
+ static char *render_inline_with_doc(const char *content, size_t content_len,
185
+ const char *full_doc, size_t full_doc_len, bool unsafe) {
186
+ size_t buf_len = full_doc_len + 2 + content_len + 1;
187
+ char *buf = malloc(buf_len);
188
+ if (!buf) return NULL;
189
+ memcpy(buf, full_doc, full_doc_len);
190
+ buf[full_doc_len] = '\n';
191
+ buf[full_doc_len + 1] = '\n';
192
+ memcpy(buf + full_doc_len + 2, content, content_len);
193
+ buf[buf_len - 1] = '\0';
194
+
195
+ int opts = CMARK_OPT_DEFAULT | CMARK_OPT_SMART;
196
+ if (unsafe) opts |= CMARK_OPT_UNSAFE | CMARK_OPT_LIBERAL_HTML_TAG;
197
+ cmark_parser *cp = cmark_parser_new(opts);
198
+ if (!cp) { free(buf); return NULL; }
199
+ cmark_parser_feed(cp, buf, (int)(buf_len - 1));
200
+ free(buf);
201
+ cmark_node *doc = cmark_parser_finish(cp);
202
+ cmark_parser_free(cp);
203
+ if (!doc) return NULL;
204
+
205
+ cmark_node *last = cmark_node_last_child(doc);
206
+ if (!last) {
207
+ cmark_node_free(doc);
208
+ return NULL;
489
209
  }
490
- return 0;
210
+ char *html = cmark_render_html(last, opts, NULL);
211
+ cmark_node_free(doc);
212
+ if (!html) return NULL;
213
+
214
+ /* Strip <p> and </p> wrapper, return inner content */
215
+ char *content_start = html;
216
+ if (strncmp(html, "<p>", 3) == 0) content_start = html + 3;
217
+ size_t html_len = strlen(content_start);
218
+ if (html_len > 5 && strcmp(content_start + html_len - 5, "</p>\n") == 0)
219
+ html_len -= 5;
220
+ else if (html_len > 4 && strcmp(content_start + html_len - 4, "</p>") == 0)
221
+ html_len -= 4;
222
+ char *result = malloc(html_len + 1);
223
+ if (result) {
224
+ memcpy(result, content_start, html_len);
225
+ result[html_len] = '\0';
226
+ }
227
+ free(html);
228
+ return result;
491
229
  }
492
230
 
493
231
  /**
494
- * Process definition lists - convert : syntax to HTML
495
- * This is a preprocessing approach
232
+ * Process one-line definition lists: Term :: Definition -> <dl><dt>Term</dt><dd>Definition</dd></dl>
233
+ * Returns newly allocated string with HTML, or NULL if no changes (caller uses original).
496
234
  */
497
235
  char *apex_process_definition_lists(const char *text, bool unsafe) {
498
236
  if (!text) return NULL;
499
237
 
500
238
  size_t text_len = strlen(text);
501
239
 
502
- /* Quick scan: check if any definition list patterns exist before processing */
503
- /* Definition lists start with : (after up to 3 spaces or blockquote) */
504
- bool has_def_list_pattern = false;
505
- const char *p = text;
506
- const char *end = text + text_len;
507
-
508
- while (p < end) {
509
- /* Look for start of line (beginning of text or after newline) */
510
- if (p == text || p[-1] == '\n') {
511
- const char *check = p;
512
- int spaces = 0;
513
-
514
- /* Skip up to 3 leading spaces */
515
- while (spaces < 3 && check < end && *check == ' ') {
516
- spaces++;
517
- check++;
518
- }
519
-
520
- /* Skip blockquote prefix (>) */
521
- while (check < end && *check == '>') {
522
- check++;
523
- /* Skip optional space after > */
524
- if (check < end && (*check == ' ' || *check == '\t')) {
525
- check++;
526
- }
527
- }
528
-
529
- /* Check if this line starts with : followed by space/tab */
530
- if (check < end && *check == ':' && (check + 1) < end && (check[1] == ' ' || check[1] == '\t')) {
531
- has_def_list_pattern = true;
240
+ /* Quick scan: check for :: or : at line start (skip reference defs [id]: url) */
241
+ bool has_pattern = false;
242
+ const char *scan = text;
243
+ while (*scan) {
244
+ if (scan[0] == ':' && scan[1] == ':') {
245
+ /* Skip ::: or more (div/custom element fence) - only match exactly :: */
246
+ if (scan > text && scan[-1] == ':') { scan++; continue; }
247
+ if (scan[2] == ':') { scan++; continue; }
248
+ const char *line_start = scan;
249
+ while (line_start > text && line_start[-1] != '\n') line_start--;
250
+ const char *p = line_start;
251
+ while (p < scan && (*p == ' ' || *p == '\t')) p++;
252
+ if (p >= scan || *p != '[') { has_pattern = true; break; }
253
+ }
254
+ if ((scan == text || scan[-1] == '\n') && *scan) {
255
+ const char *p = scan;
256
+ while (*p == ' ' || *p == '\t') p++;
257
+ if (*p == ':' && (p[1] == ' ' || p[1] == '\t' || (p[1] == ':' && (p[2] == ' ' || p[2] == '\t')))) {
258
+ has_pattern = true;
532
259
  break;
533
260
  }
534
-
535
- /* Move to next line */
536
- while (p < end && *p != '\n') {
537
- p++;
538
- }
539
- if (p < end) p++; /* Skip the newline */
540
- } else {
541
- p++;
542
261
  }
262
+ scan++;
543
263
  }
264
+ if (!has_pattern) return NULL;
544
265
 
545
- /* Early exit if no definition list patterns found */
546
- if (!has_def_list_pattern) {
547
- return NULL;
548
- }
549
-
550
- size_t output_capacity = text_len * 3; /* Generous for HTML tags */
551
- char *output = malloc(output_capacity + 1); /* +1 for null terminator */
266
+ size_t output_capacity = text_len * 3;
267
+ char *output = malloc(output_capacity + 1);
552
268
  if (!output) return NULL;
553
269
 
554
- /* Extract all reference link definitions from the document */
555
- char *ref_definitions = extract_reference_definitions(text);
556
-
557
270
  const char *read = text;
558
271
  char *write = output;
559
- /* Reserve 1 byte for null terminator, so we have output_capacity bytes to write */
560
272
  size_t remaining = output_capacity;
561
273
 
562
- /* Helper macro to expand buffer if needed */
563
- /* Always reserves 1 byte for null terminator */
564
- #define ENSURE_SPACE(needed) do { \
565
- /* We need 'needed' bytes for content + 1 for null terminator = needed+1 total */ \
566
- /* So we need remaining > needed (not >=) to have space for both */ \
567
- if (remaining <= (needed)) { \
568
- size_t used = write - output; \
569
- /* Allocate enough space: used + needed + 1 for null terminator, then double for safety */ \
570
- size_t min_capacity = used + (needed) + 1; \
571
- output_capacity = (min_capacity < 1024) ? 2048 : min_capacity * 2; \
572
- char *new_output = realloc(output, output_capacity + 1); \
573
- if (!new_output) { \
574
- free(output); \
575
- free(ref_definitions); \
576
- return NULL; \
577
- } \
578
- output = new_output; \
579
- write = output + used; \
580
- remaining = output_capacity - used; \
581
- } \
582
- } while(0)
274
+ #define ENSURE_SPACE(needed) do { \
275
+ if (remaining <= (needed)) { \
276
+ size_t used = write - output; \
277
+ size_t min_capacity = used + (needed) + 1; \
278
+ output_capacity = (min_capacity < 1024) ? 2048 : min_capacity * 2; \
279
+ char *new_output = realloc(output, output_capacity + 1); \
280
+ if (!new_output) { free(output); return NULL; } \
281
+ output = new_output; \
282
+ write = output + used; \
283
+ remaining = output_capacity - used; \
284
+ } \
285
+ } while(0)
583
286
 
584
287
  bool in_def_list = false;
585
- bool in_blockquote_context = false; /* Track if we're processing blockquote-prefixed definition lists */
586
- int blockquote_depth = 0; /* Track nesting depth of blockquotes (number of > characters) */
288
+ bool in_code_block = false;
289
+ bool in_indented_code_block = false;
290
+ bool in_inline_code_span = false;
587
291
  char term_buffer[4096];
588
292
  int term_len = 0;
589
- bool term_has_blockquote = false; /* Track if buffered term has blockquote prefix */
590
- int term_blockquote_depth = 0; /* Track blockquote depth of buffered term */
591
- bool found_any_def_list = false; /* Track if we actually created any definition lists */
592
- bool in_code_block = false; /* Track if we're inside a fenced code block */
593
- bool skipped_blank_after_term = false; /* Track if we skipped a blank line after a buffered term */
594
-
595
- const char *prev_read_pos = NULL;
596
- int iteration_count = 0;
597
- const int MAX_ITERATIONS = 1000000; /* Safety limit */
293
+ bool dd_open = false; /* True when we output <dd> but not yet </dd> (for Kramdown continuation) */
294
+ bool prev_line_was_table_row = false;
598
295
 
599
296
  while (*read) {
600
- /* Safety: prevent infinite loops */
601
- if (++iteration_count > MAX_ITERATIONS) {
602
- /* Something is wrong - return original text to avoid hanging */
603
- free(output);
604
- free(ref_definitions);
605
- return strdup(text);
606
- }
607
-
608
- /* Safety: if we haven't advanced, break to prevent infinite loop */
609
- if (prev_read_pos == read) {
610
- break;
611
- }
612
- prev_read_pos = read;
613
-
614
297
  const char *line_start = read;
615
298
  const char *line_end = strchr(read, '\n');
616
- if (!line_end) {
617
- /* No newline found - we're at the last line */
618
- /* Find the end by looking for null terminator */
619
- line_end = read;
620
- while (*line_end != '\0') line_end++;
621
- /* If line_end == read, we're at the end - break */
622
- if (line_end == read) {
623
- break;
624
- }
625
- }
299
+ if (!line_end) line_end = read + strlen(read);
626
300
 
627
- size_t line_length = line_end - line_start;
301
+ size_t line_length = (size_t)(line_end - line_start);
302
+ int sep = -1; /* One-line def separator pos; -1 = none (used for inline code state update) */
628
303
 
629
- /* Check for fenced code blocks (```) */
630
- const char *code_check = line_start;
631
- while (code_check < line_end && (*code_check == ' ' || *code_check == '\t')) code_check++;
632
- bool is_code_fence = false;
633
- bool is_closing_fence = false; /* True if this is a closing fence (no language identifier) */
634
- if (code_check + 3 <= line_end &&
635
- code_check[0] == '`' && code_check[1] == '`' && code_check[2] == '`') {
636
- is_code_fence = true;
637
- /* Check if this is a closing fence (just ``` with optional whitespace, no language identifier) */
638
- const char *after_fence = code_check + 3;
639
- while (after_fence < line_end && (*after_fence == ' ' || *after_fence == '\t')) after_fence++;
640
- is_closing_fence = (after_fence >= line_end || *after_fence == '\n' || *after_fence == '\r');
304
+ /* Track indented code blocks (4+ spaces or tab, not list continuation) */
305
+ if (read == text || read[-1] == '\n') {
306
+ bool this_line_indented = line_is_indented_code_block(line_start, line_length);
307
+ if (this_line_indented) {
308
+ in_indented_code_block = true;
309
+ } else {
310
+ /* Non-blank line without indent ends the block */
311
+ bool is_blank = (line_length == 0 || (line_length == 1 && (*line_start == '\r' || *line_start == '\n')));
312
+ if (!is_blank) in_indented_code_block = false;
313
+ }
641
314
  }
642
315
 
643
- /* If we're inside a code block OR this is a code fence line, just copy the line as-is */
644
- if (in_code_block || is_code_fence) {
645
- /* Handle code block state when we see a fence */
646
- if (is_code_fence) {
647
- bool was_in_code_block = in_code_block;
648
- /* If we're inside a code block, only a closing fence (no language identifier) closes it.
649
- Otherwise, any fence opens a new code block. */
650
- if (in_code_block) {
651
- if (is_closing_fence) {
652
- in_code_block = false;
653
- }
654
- /* If it's an opening fence with language identifier inside a code block, treat as content (don't change state) */
655
- } else {
656
- in_code_block = true;
657
- }
658
- /* If we're entering a code block, clear any pending definition list state */
659
- if (in_code_block && !was_in_code_block) {
660
- if (in_def_list) {
661
- /* Close any open definition list */
662
- const char *dl_end = "</dl>\n";
663
- size_t dl_end_len = strlen(dl_end);
664
- ENSURE_SPACE(dl_end_len + 1);
665
- memcpy(write, dl_end, dl_end_len);
666
- write += dl_end_len;
667
- remaining -= dl_end_len;
668
- }
669
- in_def_list = false;
670
- term_len = 0;
671
- term_has_blockquote = false;
672
- term_blockquote_depth = 0;
673
- skipped_blank_after_term = false;
674
- }
675
- /* If we're exiting a code block, clear any pending definition list state */
676
- if (!in_code_block && was_in_code_block) {
677
- in_def_list = false;
678
- term_len = 0;
679
- term_has_blockquote = false;
680
- term_blockquote_depth = 0;
681
- skipped_blank_after_term = false;
682
- }
316
+ /* Track code blocks */
317
+ if (is_code_fence_line(line_start, line_length)) {
318
+ in_code_block = !in_code_block;
319
+ if (in_def_list && in_code_block) {
320
+ /* Close def list before code block */
321
+ ENSURE_SPACE(10);
322
+ memcpy(write, "</dl>\n", 6);
323
+ write += 6;
324
+ remaining -= 6;
325
+ in_def_list = false;
683
326
  }
684
- ENSURE_SPACE(line_length + 1);
327
+ ENSURE_SPACE(line_length + 2);
685
328
  memcpy(write, line_start, line_length);
686
329
  write += line_length;
687
330
  remaining -= line_length;
688
331
  *write++ = '\n';
689
332
  remaining--;
690
- read = line_end;
691
- if (*read == '\n') {
692
- read++;
693
- }
333
+ read = line_end + (line_end < text + text_len && *line_end == '\n' ? 1 : 0);
694
334
  continue;
695
335
  }
696
336
 
697
- /* Skip table rows (lines that start with |) */
698
- const char *p = line_start;
699
- while (p < line_end && (*p == ' ' || *p == '\t')) p++;
700
- bool is_table_row = (p < line_end && *p == '|');
701
-
702
- /* Check if line is a list item (starts with -, *, +, or number.) */
703
- const char *list_check = line_start;
704
- int list_spaces = 0;
705
- while (list_check < line_end && (*list_check == ' ' || *list_check == '\t') && list_spaces < 4) {
706
- list_spaces++;
707
- list_check++;
708
- }
709
- bool is_list_item = false;
710
- if (list_check < line_end) {
711
- if (*list_check == '-' || *list_check == '*' || *list_check == '+') {
712
- /* Check if followed by space or tab */
713
- if (list_check + 1 < line_end && (list_check[1] == ' ' || list_check[1] == '\t')) {
714
- is_list_item = true;
715
- }
716
- } else if (*list_check >= '0' && *list_check <= '9') {
717
- /* Check for numbered list (digit followed by . and space) */
718
- const char *num_check = list_check;
719
- while (num_check < line_end && *num_check >= '0' && *num_check <= '9') {
720
- num_check++;
721
- }
722
- if (num_check < line_end && *num_check == '.' &&
723
- num_check + 1 < line_end && (num_check[1] == ' ' || num_check[1] == '\t')) {
724
- is_list_item = true;
725
- }
726
- }
337
+ if (in_code_block) {
338
+ ENSURE_SPACE(line_length + 2);
339
+ memcpy(write, line_start, line_length);
340
+ write += line_length;
341
+ remaining -= line_length;
342
+ *write++ = '\n';
343
+ remaining--;
344
+ read = line_end + (line_end < text + text_len && *line_end == '\n' ? 1 : 0);
345
+ continue;
727
346
  }
728
347
 
729
- /* Check if line starts with : (definition) */
730
- /* Also handle blockquote prefixes: > : or >: */
731
- p = line_start;
732
- int spaces = 0;
733
- while (*p == ' ' && spaces < 3 && p < line_end) {
734
- spaces++;
735
- p++;
348
+ /* Skip definition processing inside indented code blocks */
349
+ if (in_indented_code_block) {
350
+ ENSURE_SPACE(line_length + 2);
351
+ memcpy(write, line_start, line_length);
352
+ write += line_length;
353
+ remaining -= line_length;
354
+ *write++ = '\n';
355
+ remaining--;
356
+ read = line_end + (line_end < text + text_len && *line_end == '\n' ? 1 : 0);
357
+ continue;
736
358
  }
737
359
 
738
- /* Check for blockquote prefix (may be nested: > > >) */
739
- bool has_blockquote_prefix = false;
740
- int current_blockquote_depth = 0;
741
- while (p < line_end && *p == '>') {
742
- has_blockquote_prefix = true;
743
- current_blockquote_depth++;
744
- p++;
745
- /* Skip optional space after > */
746
- if (p < line_end && (*p == ' ' || *p == '\t')) {
747
- p++;
360
+ /* Check for Kramdown-style definition: : Definition (requires buffered term) */
361
+ bool is_kramdown_def = !in_code_block && !in_inline_code_span && is_kramdown_def_line(line_start, line_length);
362
+ if (is_kramdown_def) {
363
+ /* Skip reference link definitions [id]: url */
364
+ const char *p = line_start;
365
+ while (p < line_end && (*p == ' ' || *p == '\t')) p++;
366
+ if (p < line_end && *p == '[') {
367
+ is_kramdown_def = false; /* Reference def, not a definition line */
748
368
  }
749
369
  }
750
-
751
- bool is_def_line = false;
752
- /* Only check for definition line if it's not a table row or list item */
753
- /* Definition lines must start with : (after whitespace/blockquote), not contain : */
754
- /* Also skip if we're inside a code block (shouldn't happen due to early continue, but be safe) */
755
- if (!in_code_block && !is_table_row && !is_list_item && p < line_end && *p == ':' && (p + 1) < line_end &&
756
- (p[1] == ' ' || p[1] == '\t')) {
757
- /* Double-check: make sure : is at the start of the line content (after whitespace/blockquote) */
758
- /* p already points after whitespace and blockquote, so if *p == ':', it's a definition line */
759
- is_def_line = true;
760
-
761
- /* Check if this : Caption line is followed by a table */
762
- /* If so, skip processing it as a definition list - let table caption detection handle it */
763
- /* Calculate end of text buffer safely - use original text parameter */
764
- const char *text_end = text + text_len; /* End of entire text buffer */
765
- const char *next_line_start = line_end;
766
- if (next_line_start < text_end && *next_line_start == '\n') {
767
- next_line_start++; /* Skip the newline */
768
- }
769
-
770
- /* Skip blank lines to find the next non-blank line */
771
- /* Safety: limit look-ahead to prevent infinite loops or buffer overruns */
772
- int look_ahead_count = 0;
773
- const int MAX_LOOK_AHEAD = 100;
774
- bool found_table = false;
775
- while (next_line_start < text_end && *next_line_start != '\0' && look_ahead_count < MAX_LOOK_AHEAD) {
776
- look_ahead_count++;
777
- const char *check_line = next_line_start;
778
-
779
- /* Find end of line - only search within remaining buffer */
780
- const char *check_line_end = NULL;
781
- if (check_line < text_end) {
782
- /* Search for newline only within remaining buffer */
783
- const char *search_start = check_line;
784
- while (search_start < text_end && *search_start != '\n' && *search_start != '\0') {
785
- search_start++;
786
- }
787
- if (search_start < text_end && *search_start == '\n') {
788
- check_line_end = search_start;
789
- } else {
790
- /* No newline found - this is the last line */
791
- check_line_end = text_end;
792
- }
793
- } else {
794
- /* Already past end */
795
- check_line_end = text_end;
796
- }
797
-
798
- /* Skip whitespace on this line - ensure we don't go past check_line_end */
799
- while (check_line < check_line_end && check_line < text_end &&
800
- (*check_line == ' ' || *check_line == '\t')) {
801
- check_line++;
802
- }
803
-
804
- /* If line is empty (just whitespace), continue to next line */
805
- if (check_line >= check_line_end || check_line >= text_end ||
806
- *check_line == '\r' || *check_line == '\0') {
807
- next_line_start = check_line_end;
808
- if (next_line_start < text_end && *next_line_start == '\n') {
809
- next_line_start++;
810
- }
811
- continue;
812
- }
813
-
814
- /* Check if this line starts with | (table row) - ensure we're within bounds */
815
- if (check_line < text_end && *check_line == '|') {
816
- /* This : Caption is followed by a table - treat it as a table caption */
817
- /* Add 4 spaces to prevent definition list processing */
818
- is_def_line = false;
819
- found_table = true;
820
- /* Output line with 4 spaces added at the very beginning to prevent definition list matching */
821
- /* Calculate how many spaces we already have at the start */
822
- int existing_spaces = spaces;
823
- /* We need 4 total spaces at the start, so add (4 - existing_spaces) more */
824
- int spaces_to_add = 4 - existing_spaces;
825
- if (spaces_to_add < 0) spaces_to_add = 0;
826
-
827
- ENSURE_SPACE(spaces_to_add + line_length + 1);
828
- /* Add extra spaces at the very beginning */
829
- for (int i = 0; i < spaces_to_add; i++) {
830
- *write++ = ' ';
831
- remaining--;
832
- }
833
- /* Copy the entire original line */
834
- memcpy(write, line_start, line_length);
835
- write += line_length;
836
- remaining -= line_length;
837
- *write++ = '\n';
838
- remaining--;
839
- read = line_end;
840
- if (*read == '\n') {
841
- read++;
842
- }
843
- /* Break out of look-ahead loop since we found the table */
844
- break;
845
- }
846
-
847
- /* Found a non-blank line, stop looking */
848
- break;
849
- }
850
-
851
- /* If we found a table, skip the rest of the line processing */
852
- if (found_table) {
853
- continue; /* Skip to next iteration of main while loop */
854
- }
855
- } else if (in_code_block && p < line_end && *p == ':') {
856
- /* Found ':' in code block, skip definition list processing */
370
+ if (is_kramdown_def && prev_line_was_table_row) {
371
+ /* : Caption after table is a table caption, not a definition */
372
+ is_kramdown_def = false;
857
373
  }
858
-
859
- if (is_def_line) {
860
- /* Definition line */
861
- if (!in_def_list) {
862
- found_any_def_list = true; /* We're creating a definition list */
863
- /* Check if this definition list is in a blockquote context */
864
- in_blockquote_context = has_blockquote_prefix || term_has_blockquote;
865
- /* Use the maximum depth from current line or buffered term */
866
- blockquote_depth = term_has_blockquote ? term_blockquote_depth : current_blockquote_depth;
867
- if (has_blockquote_prefix && current_blockquote_depth > blockquote_depth) {
868
- blockquote_depth = current_blockquote_depth;
869
- }
870
-
871
- /* Clear the skipped blank flag - we're using the term now, blank line is ignored */
872
- skipped_blank_after_term = false;
873
-
874
- /* Start new definition list */
875
- const char *dl_start = "<dl>\n";
876
- size_t dl_len = strlen(dl_start);
877
- if (in_blockquote_context) {
878
- /* Add > prefix(es) at start of line for blockquote context */
879
- size_t prefix_needed = blockquote_depth * 2;
880
- /* Need prefix_needed + 1 for null terminator */
881
- ENSURE_SPACE(prefix_needed + 1);
882
- for (int i = 0; i < blockquote_depth && remaining > 2; i++) {
883
- *write++ = '>';
884
- *write++ = ' ';
885
- remaining -= 2;
886
- }
887
- }
888
- /* Need dl_len + 1 for null terminator */
889
- ENSURE_SPACE(dl_len + 1);
890
- memcpy(write, dl_start, dl_len);
891
- write += dl_len;
892
- remaining -= dl_len;
893
-
894
- /* Write term from buffer */
895
- if (term_len > 0) {
896
- /* Strip blockquote prefix from term if present */
897
- const char *term_content = term_buffer;
898
- int term_content_len = term_len;
899
- if (term_has_blockquote) {
900
- /* Skip > and optional space */
901
- term_content = term_buffer;
902
- while (term_content < term_buffer + term_len &&
903
- (*term_content == '>' || *term_content == ' ' || *term_content == '\t')) {
904
- term_content++;
905
- term_content_len--;
906
- }
907
- }
908
-
909
- if (in_blockquote_context) {
910
- /* Add > prefix(es) at start of line for blockquote context */
911
- size_t prefix_needed = blockquote_depth * 2;
912
- /* Need prefix_needed + 1 for null terminator */
913
- ENSURE_SPACE(prefix_needed + 1);
914
- for (int i = 0; i < blockquote_depth && remaining > 2; i++) {
915
- *write++ = '>';
916
- *write++ = ' ';
917
- remaining -= 2;
918
- }
919
- }
920
-
921
- const char *dt_start = "<dt>";
922
- size_t dt_start_len = strlen(dt_start);
923
- /* Need dt_start_len + 1 for null terminator */
924
- ENSURE_SPACE(dt_start_len + 1);
925
- memcpy(write, dt_start, dt_start_len);
926
- write += dt_start_len;
927
- remaining -= dt_start_len;
928
-
929
- /* Parse term text as inline Markdown */
930
- char *term_html = NULL;
931
- if (term_content_len > 0) {
932
- /* Quick check: does this text contain any markdown syntax? */
933
- bool has_markdown = false;
934
- const char *p = term_content;
935
- const char *end = term_content + term_content_len;
936
- while (p < end) {
937
- char c = *p++;
938
- /* Check for common markdown patterns */
939
- if (c == '*' || c == '_' || c == '[' || c == ']' || c == '!' ||
940
- c == '`' || c == '\\' || (c == '<' && p < end && (*p == '!' || isalnum((unsigned char)*p)))) {
941
- has_markdown = true;
942
- break;
943
- }
944
- }
945
-
946
- if (!has_markdown) {
947
- /* Plain text - just HTML escape */
948
- size_t escaped_len = 0;
949
- for (const char *p = term_content; p < term_content + term_content_len; p++) {
950
- if (*p == '&') escaped_len += 5; /* &amp; */
951
- else if (*p == '<' || *p == '>') escaped_len += 4; /* &lt; &gt; */
952
- else if (*p == '"') escaped_len += 6; /* &quot; */
953
- else escaped_len += 1;
954
- }
955
- term_html = malloc(escaped_len + 1);
956
- if (term_html) {
957
- char *out = term_html;
958
- for (const char *p = term_content; p < term_content + term_content_len; p++) {
959
- if (*p == '&') { memcpy(out, "&amp;", 5); out += 5; }
960
- else if (*p == '<') { memcpy(out, "&lt;", 4); out += 4; }
961
- else if (*p == '>') { memcpy(out, "&gt;", 4); out += 4; }
962
- else if (*p == '"') { memcpy(out, "&quot;", 6); out += 6; }
963
- else *out++ = *p;
964
- }
965
- *out = '\0';
966
- }
967
- } else {
968
- /* Note: We don't prepend reference definitions here because:
969
- * 1. It causes cmark to hang on large files with many references
970
- * 2. Reference definitions are already available in the main document context
971
- * 3. Inline parsing should work with references from the main document
972
- */
973
- char *term_text = malloc(term_content_len + 1);
974
- if (term_text) {
975
- memcpy(term_text, term_content, term_content_len);
976
- term_text[term_content_len] = '\0';
977
-
978
- /* Parse as Markdown and render to HTML */
979
- int parser_opts = CMARK_OPT_DEFAULT | CMARK_OPT_SMART; /* Enable smart typography */
980
- int render_opts = CMARK_OPT_DEFAULT;
981
- if (unsafe) {
982
- parser_opts |= CMARK_OPT_UNSAFE;
983
- parser_opts |= CMARK_OPT_LIBERAL_HTML_TAG; /* Be liberal in interpreting inline HTML tags */
984
- render_opts |= CMARK_OPT_UNSAFE;
985
- }
986
-
987
- /* Extract only the reference definitions actually used in this term */
988
- size_t final_term_len = term_content_len;
989
- char *final_term_text = term_text;
990
- if (ref_definitions) {
991
- size_t id_count = 0;
992
- char **needed_ids = extract_reference_link_ids(term_text, &id_count);
993
- if (needed_ids && id_count > 0) {
994
- char *selected_refs = extract_specific_reference_definitions(ref_definitions, needed_ids);
995
- if (selected_refs) {
996
- size_t ref_len = strlen(selected_refs);
997
- size_t new_size = ref_len + term_content_len + 2; /* +2 for newline and null */
998
- char *new_term_text = malloc(new_size);
999
- if (new_term_text) {
1000
- size_t offset = 0;
1001
- memcpy(new_term_text, selected_refs, ref_len);
1002
- offset = ref_len;
1003
- if (ref_len > 0 && selected_refs[ref_len - 1] != '\n') {
1004
- new_term_text[offset++] = '\n';
1005
- }
1006
- memcpy(new_term_text + offset, term_text, term_content_len);
1007
- new_term_text[offset + term_content_len] = '\0';
1008
- free(term_text);
1009
- final_term_text = new_term_text;
1010
- final_term_len = offset + term_content_len;
1011
- }
1012
- free(selected_refs);
1013
- }
1014
- /* Free the IDs array */
1015
- for (size_t i = 0; i < id_count; i++) {
1016
- free(needed_ids[i]);
1017
- }
1018
- free(needed_ids);
1019
- }
1020
- }
1021
- cmark_parser *temp_parser = cmark_parser_new(parser_opts);
1022
- if (temp_parser) {
1023
- cmark_parser_feed(temp_parser, final_term_text, final_term_len);
1024
- cmark_node *doc = cmark_parser_finish(temp_parser);
1025
- if (doc) {
1026
- /* Render and extract just the content (strip <p> tags) */
1027
- char *full_html = cmark_render_html(doc, render_opts, NULL);
1028
- if (full_html) {
1029
- /* Strip <p> and </p> tags if present */
1030
- char *content_start = full_html;
1031
- if (strncmp(content_start, "<p>", 3) == 0) {
1032
- content_start += 3;
1033
- }
1034
- char *content_end = content_start + strlen(content_start);
1035
- if (content_end > content_start + 4 &&
1036
- strcmp(content_end - 5, "</p>\n") == 0) {
1037
- content_end -= 5;
1038
- *content_end = '\0';
1039
- }
1040
- term_html = strdup(content_start);
1041
- free(full_html);
1042
- }
1043
- cmark_node_free(doc);
1044
- }
1045
- cmark_parser_free(temp_parser);
1046
- }
1047
- free(final_term_text);
1048
- }
1049
- }
1050
- }
1051
-
1052
- /* Write processed HTML or original text */
1053
- if (term_html) {
1054
- size_t html_len = strlen(term_html);
1055
- /* Need html_len + 1 for null terminator */
1056
- ENSURE_SPACE(html_len + 1);
1057
- memcpy(write, term_html, html_len);
1058
- write += html_len;
1059
- remaining -= html_len;
1060
- free(term_html);
1061
- } else {
1062
- /* Need term_content_len + 1 for null terminator */
1063
- ENSURE_SPACE((size_t)term_content_len + 1);
1064
- memcpy(write, term_content, term_content_len);
1065
- write += term_content_len;
1066
- remaining -= (size_t)term_content_len;
1067
- }
1068
-
1069
- const char *dt_end = "</dt>\n";
1070
- size_t dt_end_len = strlen(dt_end);
1071
- /* Need dt_end_len + 1 for null terminator */
1072
- ENSURE_SPACE(dt_end_len + 1);
1073
- memcpy(write, dt_end, dt_end_len);
1074
- write += dt_end_len;
1075
- remaining -= dt_end_len;
1076
-
1077
- term_len = 0;
1078
- term_has_blockquote = false;
1079
- skipped_blank_after_term = false; /* Clear flag - we used the term */
1080
- }
1081
-
1082
- in_def_list = true;
1083
- }
1084
-
1085
- /* Write definition */
1086
- if (in_blockquote_context) {
1087
- /* Add > prefix(es) at start of line for blockquote context */
1088
- size_t prefix_needed = blockquote_depth * 2;
1089
- /* Need prefix_needed + 1 for null terminator */
1090
- ENSURE_SPACE(prefix_needed + 1);
1091
- for (int i = 0; i < blockquote_depth && remaining > 2; i++) {
1092
- *write++ = '>';
1093
- *write++ = ' ';
1094
- remaining -= 2;
1095
- }
1096
- }
1097
-
1098
- const char *dd_start = "<dd>";
1099
- size_t dd_start_len = strlen(dd_start);
1100
- /* Need dd_start_len + 1 for null terminator */
1101
- ENSURE_SPACE(dd_start_len + 1);
1102
- memcpy(write, dd_start, dd_start_len);
1103
- write += dd_start_len;
1104
- remaining -= dd_start_len;
1105
-
1106
- /* Extract definition text (after : and space) */
1107
- p++; /* Skip : */
1108
- while (p < line_end && (*p == ' ' || *p == '\t')) p++;
1109
-
1110
- size_t def_text_len = line_end - p;
1111
-
1112
- /* Parse definition text as inline Markdown */
1113
- char *def_html = NULL;
1114
- if (def_text_len > 0) {
1115
- /* Quick check: does this text contain any markdown syntax? */
1116
- bool has_markdown = false;
1117
- const char *check_p = p;
1118
- const char *check_end = p + def_text_len;
1119
- while (check_p < check_end) {
1120
- char c = *check_p++;
1121
- /* Check for common markdown patterns */
1122
- if (c == '*' || c == '_' || c == '[' || c == ']' || c == '!' ||
1123
- c == '`' || c == '\\' || (c == '<' && check_p < check_end && (*check_p == '!' || isalnum((unsigned char)*check_p)))) {
1124
- has_markdown = true;
1125
- break;
1126
- }
1127
- }
1128
-
1129
- if (!has_markdown) {
1130
- /* Plain text - just HTML escape */
1131
- size_t escaped_len = 0;
1132
- for (const char *esc_p = p; esc_p < p + def_text_len; esc_p++) {
1133
- if (*esc_p == '&') escaped_len += 5; /* &amp; */
1134
- else if (*esc_p == '<' || *esc_p == '>') escaped_len += 4; /* &lt; &gt; */
1135
- else if (*esc_p == '"') escaped_len += 6; /* &quot; */
1136
- else escaped_len += 1;
1137
- }
1138
- def_html = malloc(escaped_len + 1);
1139
- if (def_html) {
1140
- char *out = def_html;
1141
- for (const char *esc_p = p; esc_p < p + def_text_len; esc_p++) {
1142
- if (*esc_p == '&') { memcpy(out, "&amp;", 5); out += 5; }
1143
- else if (*esc_p == '<') { memcpy(out, "&lt;", 4); out += 4; }
1144
- else if (*esc_p == '>') { memcpy(out, "&gt;", 4); out += 4; }
1145
- else if (*esc_p == '"') { memcpy(out, "&quot;", 6); out += 6; }
1146
- else *out++ = *esc_p;
1147
- }
1148
- *out = '\0';
1149
- }
1150
- } else {
1151
- char *def_text = malloc(def_text_len + 1);
1152
- if (def_text) {
1153
- memcpy(def_text, p, def_text_len);
1154
- def_text[def_text_len] = '\0';
1155
-
1156
- /* Parse as Markdown and render to HTML */
1157
- int parser_opts = CMARK_OPT_DEFAULT | CMARK_OPT_SMART; /* Enable smart typography */
1158
- int render_opts = CMARK_OPT_DEFAULT;
1159
- if (unsafe) {
1160
- parser_opts |= CMARK_OPT_UNSAFE;
1161
- parser_opts |= CMARK_OPT_LIBERAL_HTML_TAG; /* Be liberal in interpreting inline HTML tags */
1162
- render_opts |= CMARK_OPT_UNSAFE;
1163
- }
1164
-
1165
- /* Extract only the reference definitions actually used in this definition */
1166
- size_t final_def_len = def_text_len;
1167
- char *final_def_text = def_text;
1168
- if (ref_definitions) {
1169
- size_t id_count = 0;
1170
- char **needed_ids = extract_reference_link_ids(def_text, &id_count);
1171
- if (needed_ids && id_count > 0) {
1172
- char *selected_refs = extract_specific_reference_definitions(ref_definitions, needed_ids);
1173
- if (selected_refs) {
1174
- size_t ref_len = strlen(selected_refs);
1175
- size_t new_size = ref_len + def_text_len + 2; /* +2 for newline and null */
1176
- char *new_def_text = malloc(new_size);
1177
- if (new_def_text) {
1178
- size_t offset = 0;
1179
- memcpy(new_def_text, selected_refs, ref_len);
1180
- offset = ref_len;
1181
- if (ref_len > 0 && selected_refs[ref_len - 1] != '\n') {
1182
- new_def_text[offset++] = '\n';
1183
- }
1184
- memcpy(new_def_text + offset, def_text, def_text_len);
1185
- new_def_text[offset + def_text_len] = '\0';
1186
- free(def_text);
1187
- final_def_text = new_def_text;
1188
- final_def_len = offset + def_text_len;
1189
- }
1190
- free(selected_refs);
1191
- }
1192
- /* Free the IDs array */
1193
- for (size_t i = 0; i < id_count; i++) {
1194
- free(needed_ids[i]);
1195
- }
1196
- free(needed_ids);
1197
- }
1198
- }
1199
-
1200
- cmark_parser *temp_parser = cmark_parser_new(parser_opts);
1201
- if (temp_parser) {
1202
- cmark_parser_feed(temp_parser, final_def_text, final_def_len);
1203
- cmark_node *doc = cmark_parser_finish(temp_parser);
1204
- if (doc) {
1205
- /* Render and extract just the content (strip <p> tags) */
1206
- char *full_html = cmark_render_html(doc, render_opts, NULL);
1207
- if (full_html) {
1208
- /* Strip <p> and </p> tags if present */
1209
- char *content_start = full_html;
1210
- if (strncmp(content_start, "<p>", 3) == 0) {
1211
- content_start += 3;
1212
- }
1213
- char *content_end = content_start + strlen(content_start);
1214
- if (content_end > content_start + 4 &&
1215
- strcmp(content_end - 5, "</p>\n") == 0) {
1216
- content_end -= 5;
1217
- *content_end = '\0';
1218
- }
1219
- def_html = strdup(content_start);
1220
- free(full_html);
1221
- }
1222
- cmark_node_free(doc);
1223
- }
1224
- cmark_parser_free(temp_parser);
1225
- }
1226
- free(final_def_text);
1227
- }
1228
- }
374
+ if (is_kramdown_def) {
375
+ const char *next_start = line_end;
376
+ if (next_start < text + text_len && *next_start == '\n') next_start++;
377
+ if (next_nonblank_line_is_table(next_start, text + text_len)) {
378
+ /* : Caption before table is a table caption, not a definition */
379
+ is_kramdown_def = false;
1229
380
  }
381
+ }
1230
382
 
1231
- /* Write processed HTML or original text */
1232
- if (def_html) {
1233
- size_t html_len = strlen(def_html);
1234
- /* Need html_len + 1 for null terminator */
1235
- ENSURE_SPACE(html_len + 1);
1236
- memcpy(write, def_html, html_len);
1237
- write += html_len;
1238
- remaining -= html_len;
1239
- free(def_html);
1240
- } else {
1241
- /* Need def_text_len + 1 for null terminator */
1242
- ENSURE_SPACE(def_text_len + 1);
1243
- memcpy(write, p, def_text_len);
1244
- write += def_text_len;
1245
- remaining -= def_text_len;
383
+ if (is_kramdown_def) {
384
+ /* Extract definition text (after : or :: and space) */
385
+ const char *def_start = line_start;
386
+ while (def_start < line_end && (*def_start == ' ' || *def_start == '\t')) def_start++;
387
+ if (def_start < line_end && *def_start == ':') {
388
+ def_start++;
389
+ if (def_start < line_end && *def_start == ':') def_start++;
390
+ while (def_start < line_end && (*def_start == ' ' || *def_start == '\t')) def_start++;
1246
391
  }
392
+ size_t def_len = (size_t)(line_end - def_start);
1247
393
 
1248
- const char *dd_end = "</dd>\n";
1249
- size_t dd_end_len = strlen(dd_end);
1250
- /* Need dd_end_len + 1 for null terminator */
1251
- ENSURE_SPACE(dd_end_len + 1);
1252
- memcpy(write, dd_end, dd_end_len);
1253
- write += dd_end_len;
1254
- remaining -= dd_end_len;
1255
- } else if (line_length == 0 || (line_length == 1 && *line_start == '\r')) {
1256
- /* Blank line */
1257
- if (in_def_list) {
1258
- /* Allow blank lines within definition lists - skip them to keep HTML block continuous */
1259
- /* Don't close the list - it will continue if next line is a definition */
1260
- /* Blank lines in HTML are mostly ignored anyway, so skipping is safe */
1261
- } else {
1262
- /* Not in a definition list - check if we have a buffered term */
1263
- if (term_len > 0) {
1264
- /* Keep the term buffered - don't flush it yet */
1265
- /* Don't output the blank line yet - wait to see if next line is a definition */
1266
- /* If next line is a definition, we'll start the list and the blank line is effectively ignored */
1267
- /* If next line is not a definition, we'll output the term and blank line then */
1268
- skipped_blank_after_term = true;
1269
- } else {
1270
- /* No buffered term - regular blank line */
1271
- ENSURE_SPACE(1);
1272
- *write++ = '\n';
1273
- remaining--;
1274
- }
1275
- }
1276
- } else {
1277
- /* Regular line */
1278
- /* Skip definition list processing if we're inside a code block (shouldn't happen, but be safe) */
1279
- if (in_code_block) {
1280
- /* Copy line as-is */
1281
- ENSURE_SPACE(line_length + 1);
1282
- memcpy(write, line_start, line_length);
1283
- write += line_length;
1284
- remaining -= line_length;
1285
- *write++ = '\n';
1286
- remaining--;
1287
- read = line_end;
1288
- if (*read == '\n') {
1289
- read++;
1290
- }
1291
- continue;
1292
- }
1293
- if (in_def_list) {
1294
- /* This could be a new term */
1295
- /* End current list first */
1296
- const char *dl_end = "</dl>\n\n";
1297
- size_t dl_end_len = strlen(dl_end);
1298
- if (in_blockquote_context) {
1299
- /* Add > prefix(es) at start of line for blockquote context */
1300
- size_t prefix_needed = blockquote_depth * 2;
1301
- /* Need prefix_needed + 1 for null terminator */
1302
- ENSURE_SPACE(prefix_needed + 1);
1303
- for (int i = 0; i < blockquote_depth && remaining > 2; i++) {
1304
- *write++ = '>';
1305
- *write++ = ' ';
1306
- remaining -= 2;
1307
- }
1308
- }
1309
- /* Need dl_end_len + 1 for null terminator */
1310
- ENSURE_SPACE(dl_end_len + 1);
1311
- memcpy(write, dl_end, dl_end_len);
1312
- write += dl_end_len;
1313
- remaining -= dl_end_len;
394
+ if (term_len > 0) {
395
+ /* We have a buffered term - output <dl><dt>term</dt><dd>def</dd> */
396
+ if (!in_def_list) {
397
+ ENSURE_SPACE(20);
398
+ memcpy(write, "<dl>\n", 5);
399
+ write += 5;
400
+ remaining -= 5;
401
+ in_def_list = true;
402
+ }
403
+ if (dd_open) {
404
+ memcpy(write, "</dd>\n", 6);
405
+ write += 6;
406
+ remaining -= 6;
407
+ dd_open = false;
408
+ }
409
+ /* <dt>term</dt> */
410
+ ENSURE_SPACE(10);
411
+ memcpy(write, "<dt>", 4);
412
+ write += 4;
413
+ remaining -= 4;
414
+ char *term_html = render_inline_with_doc(term_buffer, (size_t)term_len, text, text_len, unsafe);
415
+ if (term_html) {
416
+ size_t html_len = strlen(term_html);
417
+ ENSURE_SPACE(html_len + 20);
418
+ memcpy(write, term_html, html_len);
419
+ write += html_len;
420
+ remaining -= html_len;
421
+ free(term_html);
422
+ }
423
+ memcpy(write, "</dt>\n", 6);
424
+ write += 6;
425
+ remaining -= 6;
426
+ term_len = 0;
427
+ } else if (in_def_list) {
428
+ /* Another : definition for same term */
429
+ if (dd_open) {
430
+ memcpy(write, "</dd>\n", 6);
431
+ write += 6;
432
+ remaining -= 6;
433
+ }
434
+ }
435
+
436
+ /* <dd>definition</dd> */
437
+ ENSURE_SPACE(20 + def_len * 2);
438
+ memcpy(write, "<dd>", 4);
439
+ write += 4;
440
+ remaining -= 4;
441
+ dd_open = true;
442
+
443
+ if (def_len > 0) {
444
+ char *def_html = render_inline_with_doc(def_start, def_len, text, text_len, unsafe);
445
+ if (def_html) {
446
+ size_t html_len = strlen(def_html);
447
+ ENSURE_SPACE(html_len + 20);
448
+ memcpy(write, def_html, html_len);
449
+ write += html_len;
450
+ remaining -= html_len;
451
+ free(def_html);
452
+ }
453
+ }
454
+ /* Don't close </dd> yet - allow indented continuation lines */
455
+ }
456
+ /* Check for one-line definition: Term :: Definition */
457
+ else if (!in_code_block) {
458
+ sep = find_def_separator((const unsigned char *)line_start, (int)line_length);
459
+ bool sep_inside_inline = false;
460
+ if (sep >= 0) {
461
+ sep_inside_inline = scan_inline_code_for_sep(line_start, line_length, sep, in_inline_code_span, &in_inline_code_span);
462
+ }
463
+ if (sep >= 0 && !sep_inside_inline) {
464
+ /* Close any open Kramdown dd and flush unused term buffer */
465
+ if (dd_open) {
466
+ memcpy(write, "</dd>\n", 6);
467
+ write += 6;
468
+ remaining -= 6;
469
+ dd_open = false;
470
+ }
471
+ if (in_def_list && term_len > 0) {
472
+ /* Buffered term wasn't used - output as regular line, close list */
473
+ memcpy(write, "</dl>\n\n", 7);
474
+ write += 7;
475
+ remaining -= 7;
1314
476
  in_def_list = false;
1315
- in_blockquote_context = false;
1316
- blockquote_depth = 0;
1317
477
  }
1318
-
1319
- /* If we have a buffered term that wasn't used, write it first */
1320
478
  if (term_len > 0) {
1321
- /* Need term_len bytes + 1 for newline + 1 for null terminator */
1322
479
  ENSURE_SPACE((size_t)term_len + 2);
1323
- memcpy(write, term_buffer, term_len);
480
+ memcpy(write, term_buffer, (size_t)term_len);
1324
481
  write += term_len;
1325
482
  remaining -= (size_t)term_len;
1326
483
  *write++ = '\n';
1327
484
  remaining--;
1328
- /* If we skipped a blank line after the term, output it now */
1329
- if (skipped_blank_after_term) {
1330
- ENSURE_SPACE(1);
1331
- *write++ = '\n';
1332
- remaining--;
1333
- skipped_blank_after_term = false;
1334
- }
1335
485
  term_len = 0;
1336
486
  }
487
+ /* Extract term (before ::) and definition (after ::) */
488
+ const char *term_start = line_start;
489
+ const char *term_end = line_start + sep;
490
+ const char *def_start = line_start + sep + 2;
491
+ const char *def_end = line_end;
492
+
493
+ /* Trim term */
494
+ while (term_start < term_end && (*term_start == ' ' || *term_start == '\t')) term_start++;
495
+ while (term_end > term_start && (term_end[-1] == ' ' || term_end[-1] == '\t')) term_end--;
496
+
497
+ /* Trim definition */
498
+ while (def_start < def_end && (*def_start == ' ' || *def_start == '\t')) def_start++;
1337
499
 
1338
- /* Check if line is a header (starts with #) - check BEFORE other checks */
1339
- const char *header_check = line_start;
1340
- while (header_check < line_end && (*header_check == ' ' || *header_check == '\t')) {
1341
- header_check++;
500
+ size_t term_len = (size_t)(term_end - term_start);
501
+ size_t def_len = (size_t)(def_end - def_start);
502
+
503
+ if (!in_def_list) {
504
+ ENSURE_SPACE(10);
505
+ memcpy(write, "<dl>\n", 5);
506
+ write += 5;
507
+ remaining -= 5;
508
+ in_def_list = true;
1342
509
  }
1343
- bool is_header = (header_check < line_end && *header_check == '#');
1344
510
 
1345
- /* If this is a table row, write it through immediately without buffering */
1346
- if (is_table_row) {
1347
- size_t needed = line_length + (*line_end == '\n' ? 1 : 0);
1348
- /* Need needed + 1 for null terminator */
1349
- ENSURE_SPACE(needed + 1);
1350
- memcpy(write, line_start, line_length);
1351
- write += line_length;
1352
- remaining -= line_length;
1353
- if (*line_end == '\n' && remaining > 0) {
511
+ /* <dt>term</dt> */
512
+ ENSURE_SPACE(20 + term_len * 2);
513
+ memcpy(write, "<dt>", 4);
514
+ write += 4;
515
+ remaining -= 4;
516
+
517
+ /* Parse term as inline markdown */
518
+ if (term_len > 0) {
519
+ char *term_html = render_inline_with_doc(term_start, term_len, text, text_len, unsafe);
520
+ if (term_html) {
521
+ size_t html_len = strlen(term_html);
522
+ ENSURE_SPACE(html_len + 20);
523
+ memcpy(write, term_html, html_len);
524
+ write += html_len;
525
+ remaining -= html_len;
526
+ free(term_html);
527
+ }
528
+ }
529
+
530
+ memcpy(write, "</dt>\n", 6);
531
+ write += 6;
532
+ remaining -= 6;
533
+
534
+ /* <dd>definition</dd> */
535
+ ENSURE_SPACE(20 + def_len * 2);
536
+ memcpy(write, "<dd>", 4);
537
+ write += 4;
538
+ remaining -= 4;
539
+
540
+ if (def_len > 0) {
541
+ char *def_html = render_inline_with_doc(def_start, def_len, text, text_len, unsafe);
542
+ if (def_html) {
543
+ size_t html_len = strlen(def_html);
544
+ ENSURE_SPACE(html_len + 20);
545
+ memcpy(write, def_html, html_len);
546
+ write += html_len;
547
+ remaining -= html_len;
548
+ free(def_html);
549
+ }
550
+ }
551
+
552
+ memcpy(write, "</dd>\n", 6);
553
+ write += 6;
554
+ remaining -= 6;
555
+ } else {
556
+ /* Not one-line def (sep < 0) - buffer as potential Kramdown term */
557
+ if (dd_open) {
558
+ memcpy(write, "</dd>\n", 6);
559
+ write += 6;
560
+ remaining -= 6;
561
+ dd_open = false;
562
+ }
563
+ bool is_blank = (line_length == 0 || (line_length == 1 && (*line_start == '\r' || *line_start == '\n')));
564
+ if (is_blank) {
565
+ /* Blank line: keep def list open (next line might be : definition for same term) */
566
+ if (term_len > 0) {
567
+ /* Skip blank, keep term buffered */
568
+ } else if (!in_def_list) {
569
+ ENSURE_SPACE(2);
1354
570
  *write++ = '\n';
1355
571
  remaining--;
1356
572
  }
1357
- /* Move to next line and continue */
1358
- const char *old_read_continue = read;
1359
- read = line_end;
1360
- if (*read == '\n') read++;
1361
- /* Safety: ensure we advanced */
1362
- if (read <= old_read_continue) {
1363
- /* We're stuck - break instead of continue */
1364
- break;
573
+ /* else: in_def_list, skip blank, list stays open */
574
+ } else {
575
+ if (in_def_list) {
576
+ memcpy(write, "</dl>\n\n", 7);
577
+ write += 7;
578
+ remaining -= 7;
579
+ in_def_list = false;
1365
580
  }
1366
- continue;
1367
- }
1368
- /* If this is a header, write it through immediately without buffering */
1369
- else if (is_header) {
1370
- /* But first, flush any buffered term that wasn't used */
1371
581
  if (term_len > 0) {
1372
- size_t term_needed = (size_t)term_len + 2;
1373
- ENSURE_SPACE(term_needed);
1374
- memcpy(write, term_buffer, term_len);
582
+ ENSURE_SPACE((size_t)term_len + 2);
583
+ memcpy(write, term_buffer, (size_t)term_len);
1375
584
  write += term_len;
1376
585
  remaining -= (size_t)term_len;
1377
586
  *write++ = '\n';
1378
587
  remaining--;
1379
- /* If we skipped a blank line after the term, output it now */
1380
- if (skipped_blank_after_term) {
1381
- ENSURE_SPACE(1);
1382
- *write++ = '\n';
1383
- remaining--;
1384
- skipped_blank_after_term = false;
1385
- }
1386
588
  term_len = 0;
1387
589
  }
1388
- size_t needed = line_length + (*line_end == '\n' ? 1 : 0);
1389
- /* Need needed + 1 for null terminator */
1390
- ENSURE_SPACE(needed + 1);
1391
- memcpy(write, line_start, line_length);
1392
- write += line_length;
1393
- remaining -= line_length;
1394
- if (*line_end == '\n' && remaining > 0) {
1395
- *write++ = '\n';
1396
- remaining--;
1397
- }
1398
- /* Move to next line and continue */
1399
- const char *old_read_continue = read;
1400
- read = line_end;
1401
- if (*read == '\n') read++;
1402
- /* Safety: ensure we advanced */
1403
- if (read <= old_read_continue) {
1404
- /* We're stuck - break instead of continue */
1405
- break;
1406
- }
1407
- continue;
1408
- }
1409
- /* If this is a list item, write it through immediately without buffering */
1410
- else if (is_list_item) {
1411
- size_t needed = line_length + (*line_end == '\n' ? 1 : 0);
1412
- /* Need needed + 1 for null terminator */
1413
- ENSURE_SPACE(needed + 1);
1414
- memcpy(write, line_start, line_length);
1415
- write += line_length;
1416
- remaining -= line_length;
1417
- if (*line_end == '\n' && remaining > 0) {
590
+ const char *p = line_start;
591
+ while (p < line_end && (*p == ' ' || *p == '\t')) p++;
592
+ bool is_ref_def = (p < line_end && *p == '[' && memchr(p, ':', (size_t)(line_end - p)) != NULL);
593
+ if (is_ref_def || line_length >= sizeof(term_buffer) - 1) {
594
+ ENSURE_SPACE(line_length + 2);
595
+ memcpy(write, line_start, line_length);
596
+ write += line_length;
597
+ remaining -= line_length;
1418
598
  *write++ = '\n';
1419
599
  remaining--;
600
+ } else {
601
+ memcpy(term_buffer, line_start, line_length);
602
+ term_len = (int)line_length;
603
+ term_buffer[term_len] = '\0';
1420
604
  }
1421
- /* Move to next line and continue */
1422
- const char *old_read_continue = read;
1423
- read = line_end;
1424
- if (*read == '\n') read++;
1425
- /* Safety: ensure we advanced */
1426
- if (read <= old_read_continue) {
1427
- /* We're stuck - break instead of continue */
1428
- break;
1429
- }
1430
- continue;
1431
605
  }
1432
- /* Check if line contains IAL syntax - if so, write immediately without buffering */
1433
- else if (strstr(line_start, "{:") != NULL) {
1434
- /* Contains IAL - don't buffer it */
1435
- size_t needed = line_length + (*line_end == '\n' ? 1 : 0);
1436
- /* Need needed + 1 for null terminator */
1437
- ENSURE_SPACE(needed + 1);
1438
- memcpy(write, line_start, line_length);
1439
- write += line_length;
1440
- remaining -= line_length;
1441
- if (*line_end == '\n' && remaining > 0) {
1442
- *write++ = '\n';
1443
- remaining--;
1444
- }
1445
- /* Move to next line and continue */
1446
- const char *old_read_continue = read;
1447
- read = line_end;
1448
- if (*read == '\n') read++;
1449
- /* Safety: ensure we advanced */
1450
- if (read <= old_read_continue) {
1451
- /* We're stuck - break instead of continue */
1452
- break;
1453
- }
1454
- continue;
1455
606
  }
1456
- /* Save current line as potential term */
1457
- if (line_length < sizeof(term_buffer) - 1) {
1458
- /* Check if line has blockquote prefix and count depth */
1459
- const char *term_check = line_start;
1460
- while (term_check < line_end && (*term_check == ' ' || *term_check == '\t')) term_check++;
1461
- term_has_blockquote = false;
1462
- term_blockquote_depth = 0;
1463
- const char *depth_check = term_check;
1464
- while (depth_check < line_end && *depth_check == '>') {
1465
- term_has_blockquote = true;
1466
- term_blockquote_depth++;
1467
- depth_check++;
1468
- /* Skip optional space after > */
1469
- if (depth_check < line_end && (*depth_check == ' ' || *depth_check == '\t')) {
1470
- depth_check++;
1471
- }
1472
- }
607
+ }
1473
608
 
1474
- memcpy(term_buffer, line_start, line_length);
1475
- term_len = line_length;
1476
- term_buffer[term_len] = '\0';
1477
- /* Don't write yet - wait to see if next line is definition */
1478
- } else {
1479
- /* Line too long for buffer, just copy through */
1480
- size_t needed = line_length + (*line_end == '\n' ? 1 : 0);
1481
- /* Need needed + 1 for null terminator */
1482
- ENSURE_SPACE(needed + 1);
1483
- memcpy(write, line_start, line_length);
1484
- write += line_length;
1485
- remaining -= line_length;
1486
- if (*line_end == '\n' && remaining > 0) {
1487
- *write++ = '\n';
1488
- remaining--;
1489
- }
1490
- }
609
+ /* Track if this line was a table row (for : Caption after table detection) */
610
+ bool is_blank = (line_length == 0 || (line_length == 1 && (*line_start == '\r' || *line_start == '\n')));
611
+ if (!is_blank) prev_line_was_table_row = is_table_row_line(line_start, line_length);
612
+
613
+ /* Update inline code span state for next line (if not already updated in one-line def path) */
614
+ if (sep < 0) {
615
+ scan_inline_code_for_sep(line_start, line_length, -1, in_inline_code_span, &in_inline_code_span);
1491
616
  }
1492
617
 
1493
- /* Move to next line - ensure we always advance */
1494
- const char *old_read = read;
1495
618
  read = line_end;
1496
- if (*read == '\n') {
1497
- read++;
1498
- }
1499
- /* If we're at the end of the string, break */
1500
- if (*read == '\0') {
1501
- break;
1502
- }
1503
- /* Critical safety check: if we haven't advanced, break immediately */
1504
- if (read <= old_read) {
1505
- /* We're stuck - this should never happen, but break to prevent infinite loop */
1506
- /* Force advance one character as last resort */
1507
- if (*read != '\0') {
1508
- read++;
1509
- } else {
1510
- break;
1511
- }
1512
- }
1513
- /* Additional safety: if we've processed more than the text length, something is wrong */
1514
- if (read > text + text_len) {
1515
- break;
1516
- }
619
+ if (read < text + text_len && *read == '\n') read++;
1517
620
  }
1518
621
 
1519
- /* Close any open definition list */
622
+
623
+ if (dd_open) {
624
+ memcpy(write, "</dd>\n", 6);
625
+ write += 6;
626
+ }
1520
627
  if (in_def_list) {
1521
- const char *dl_end = "</dl>\n";
1522
- size_t dl_end_len = strlen(dl_end);
1523
- if (in_blockquote_context) {
1524
- /* Add > prefix(es) at start of line for blockquote context */
1525
- size_t prefix_needed = blockquote_depth * 2;
1526
- /* Need prefix_needed + 1 for null terminator */
1527
- ENSURE_SPACE(prefix_needed + 1);
1528
- for (int i = 0; i < blockquote_depth && remaining > 2; i++) {
1529
- *write++ = '>';
1530
- *write++ = ' ';
1531
- remaining -= 2;
1532
- }
1533
- }
1534
- /* Need dl_end_len + 1 for null terminator */
1535
- ENSURE_SPACE(dl_end_len + 1);
1536
- memcpy(write, dl_end, dl_end_len);
1537
- write += dl_end_len;
1538
- remaining -= dl_end_len;
628
+ memcpy(write, "</dl>\n", 6);
629
+ write += 6;
1539
630
  }
1540
-
1541
- /* Write any remaining term */
1542
631
  if (term_len > 0) {
1543
- /* Need term_len bytes + 1 for newline + 1 for null terminator */
1544
632
  ENSURE_SPACE((size_t)term_len + 2);
1545
- memcpy(write, term_buffer, term_len);
633
+ memcpy(write, term_buffer, (size_t)term_len);
1546
634
  write += term_len;
1547
- remaining -= (size_t)term_len;
1548
635
  *write++ = '\n';
1549
- remaining--;
1550
- /* If we skipped a blank line after the term, output it now */
1551
- if (skipped_blank_after_term) {
1552
- ENSURE_SPACE(1);
1553
- *write++ = '\n';
1554
- remaining--;
1555
- skipped_blank_after_term = false;
1556
- }
1557
- }
1558
-
1559
- /* Free reference definitions if we extracted them */
1560
- if (ref_definitions) {
1561
- free(ref_definitions);
1562
- ref_definitions = NULL; /* Prevent double-free */
1563
- }
1564
-
1565
- /* Ensure space for null terminator */
1566
- if (remaining < 1) {
1567
- size_t used = write - output;
1568
- output_capacity = (used + 2) * 2;
1569
- char *new_output = realloc(output, output_capacity + 1);
1570
- if (!new_output) {
1571
- free(output);
1572
- return NULL;
1573
- }
1574
- output = new_output;
1575
- write = output + used;
1576
- remaining = output_capacity - used;
1577
- }
1578
- /* Safety check: if we processed but didn't actually create any definition lists,
1579
- * return NULL to use original text. This handles cases where the early exit
1580
- * incorrectly detected a pattern but no definition lists were actually created. */
1581
- if (!found_any_def_list) {
1582
- /* No definition lists were created - if we processed but didn't create any DLs,
1583
- * something went wrong. Return NULL to use original text. */
1584
- free(output);
1585
- if (ref_definitions) {
1586
- free(ref_definitions);
1587
- }
1588
- return NULL;
1589
636
  }
1590
637
 
1591
638
  *write = '\0';
1592
-
1593
- #undef ENSURE_SPACE
1594
-
1595
- /* If we didn't write anything, return original text to avoid empty output */
1596
- if (write == output) {
1597
- free(output);
1598
- if (ref_definitions) {
1599
- free(ref_definitions);
1600
- }
1601
- return NULL; /* Return NULL to indicate no processing was done */
1602
- }
639
+ #undef ENSURE_SPACE
1603
640
 
1604
641
  return output;
1605
642
  }
1606
643
 
1607
- /**
1608
- * Post-process - no longer needed with preprocessing approach
1609
- */
1610
- static cmark_node *postprocess(cmark_syntax_extension *ext,
1611
- cmark_parser *parser,
1612
- cmark_node *root) {
1613
- (void)ext;
1614
- (void)parser;
1615
- /* Definition lists are now handled via preprocessing */
1616
- return root;
1617
- }
1618
-
1619
- /**
1620
- * Render definition list to HTML
1621
- */
1622
- static void html_render(cmark_syntax_extension *ext,
1623
- struct cmark_html_renderer *renderer,
1624
- cmark_node *node,
1625
- cmark_event_type ev_type,
1626
- int options) {
1627
- (void)ext;
1628
- (void)options;
1629
- cmark_strbuf *html = renderer->html;
1630
-
1631
- if (ev_type == CMARK_EVENT_ENTER) {
1632
- if (node->type == APEX_NODE_DEFINITION_LIST) {
1633
- cmark_strbuf_puts(html, "<dl>\n");
1634
- } else if (node->type == APEX_NODE_DEFINITION_TERM) {
1635
- cmark_strbuf_puts(html, "<dt>");
1636
- } else if (node->type == APEX_NODE_DEFINITION_DATA) {
1637
- cmark_strbuf_puts(html, "<dd>");
1638
- }
1639
- } else if (ev_type == CMARK_EVENT_EXIT) {
1640
- if (node->type == APEX_NODE_DEFINITION_LIST) {
1641
- cmark_strbuf_puts(html, "</dl>\n");
1642
- } else if (node->type == APEX_NODE_DEFINITION_TERM) {
1643
- cmark_strbuf_puts(html, "</dt>\n");
1644
- } else if (node->type == APEX_NODE_DEFINITION_DATA) {
1645
- cmark_strbuf_puts(html, "</dd>\n");
1646
- }
1647
- }
1648
- }
1649
-
1650
- /**
1651
- * Create definition list extension
1652
- */
1653
- cmark_syntax_extension *create_definition_list_extension(void) {
1654
- cmark_syntax_extension *ext = cmark_syntax_extension_new("definition_list");
1655
- if (!ext) return NULL;
1656
-
1657
- /* Register node types */
1658
- APEX_NODE_DEFINITION_LIST = cmark_syntax_extension_add_node(0);
1659
- APEX_NODE_DEFINITION_TERM = cmark_syntax_extension_add_node(0);
1660
- APEX_NODE_DEFINITION_DATA = cmark_syntax_extension_add_node(0);
1661
-
1662
- /* Set callbacks */
1663
- cmark_syntax_extension_set_open_block_func(ext, open_block);
1664
- cmark_syntax_extension_set_match_block_func(ext, match_block);
1665
- cmark_syntax_extension_set_can_contain_func(ext, can_contain);
1666
- cmark_syntax_extension_set_html_render_func(ext, html_render);
1667
- cmark_syntax_extension_set_postprocess_func(ext, postprocess);
1668
-
1669
- return ext;
644
+ void apex_deflist_debug_touch(int enable_definition_lists) {
645
+ (void)enable_definition_lists;
646
+ /* No-op for one-line format - debug was for old Kramdown format */
1670
647
  }