apex-ruby 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/ext/apex_ext/apex_ext.c +6 -0
  3. data/ext/apex_ext/apex_src/AGENTS.md +41 -0
  4. data/ext/apex_ext/apex_src/CHANGELOG.md +412 -2
  5. data/ext/apex_ext/apex_src/CMakeLists.txt +41 -29
  6. data/ext/apex_ext/apex_src/Formula/apex.rb +2 -2
  7. data/ext/apex_ext/apex_src/Package.swift +9 -0
  8. data/ext/apex_ext/apex_src/README.md +31 -9
  9. data/ext/apex_ext/apex_src/ROADMAP.md +5 -0
  10. data/ext/apex_ext/apex_src/VERSION +1 -1
  11. data/ext/apex_ext/apex_src/cli/main.c +1125 -13
  12. data/ext/apex_ext/apex_src/docs/index.md +459 -0
  13. data/ext/apex_ext/apex_src/include/apex/apex.h +67 -5
  14. data/ext/apex_ext/apex_src/include/apex/ast_man.h +20 -0
  15. data/ext/apex_ext/apex_src/include/apex/ast_markdown.h +39 -0
  16. data/ext/apex_ext/apex_src/include/apex/ast_terminal.h +40 -0
  17. data/ext/apex_ext/apex_src/include/apex/module.modulemap +1 -1
  18. data/ext/apex_ext/apex_src/man/apex-config.5 +333 -258
  19. data/ext/apex_ext/apex_src/man/apex-config.5.md +3 -1
  20. data/ext/apex_ext/apex_src/man/apex-plugins.7 +401 -316
  21. data/ext/apex_ext/apex_src/man/apex.1 +663 -620
  22. data/ext/apex_ext/apex_src/man/apex.1.html +703 -0
  23. data/ext/apex_ext/apex_src/man/apex.1.md +160 -90
  24. data/ext/apex_ext/apex_src/objc/Apex.swift +6 -0
  25. data/ext/apex_ext/apex_src/objc/NSString+Apex.h +12 -0
  26. data/ext/apex_ext/apex_src/objc/NSString+Apex.m +9 -0
  27. data/ext/apex_ext/apex_src/pages/index.md +459 -0
  28. data/ext/apex_ext/apex_src/src/_README.md +4 -4
  29. data/ext/apex_ext/apex_src/src/apex.c +702 -44
  30. data/ext/apex_ext/apex_src/src/ast_json.c +1130 -0
  31. data/ext/apex_ext/apex_src/src/ast_json.h +46 -0
  32. data/ext/apex_ext/apex_src/src/ast_man.c +948 -0
  33. data/ext/apex_ext/apex_src/src/ast_markdown.c +409 -0
  34. data/ext/apex_ext/apex_src/src/ast_terminal.c +2516 -0
  35. data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +8 -5
  36. data/ext/apex_ext/apex_src/src/extensions/definition_list.c +491 -1514
  37. data/ext/apex_ext/apex_src/src/extensions/definition_list.h +8 -15
  38. data/ext/apex_ext/apex_src/src/extensions/emoji.c +207 -0
  39. data/ext/apex_ext/apex_src/src/extensions/emoji.h +14 -0
  40. data/ext/apex_ext/apex_src/src/extensions/header_ids.c +178 -71
  41. data/ext/apex_ext/apex_src/src/extensions/highlight.c +37 -5
  42. data/ext/apex_ext/apex_src/src/extensions/ial.c +416 -47
  43. data/ext/apex_ext/apex_src/src/extensions/includes.c +241 -10
  44. data/ext/apex_ext/apex_src/src/extensions/includes.h +1 -0
  45. data/ext/apex_ext/apex_src/src/extensions/metadata.c +166 -3
  46. data/ext/apex_ext/apex_src/src/extensions/metadata.h +7 -0
  47. data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +34 -3
  48. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +55 -10
  49. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +7 -4
  50. data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +84 -52
  51. data/ext/apex_ext/apex_src/src/extensions/toc.c +133 -19
  52. data/ext/apex_ext/apex_src/src/filters_ast.c +194 -0
  53. data/ext/apex_ext/apex_src/src/filters_ast.h +36 -0
  54. data/ext/apex_ext/apex_src/src/html_renderer.c +1265 -35
  55. data/ext/apex_ext/apex_src/src/html_renderer.h +21 -0
  56. data/ext/apex_ext/apex_src/src/plugins_remote.c +40 -14
  57. data/ext/apex_ext/apex_src/tests/CMakeLists.txt +1 -0
  58. data/ext/apex_ext/apex_src/tests/README.md +11 -5
  59. data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +13 -2
  60. data/ext/apex_ext/apex_src/tests/fixtures/filters/filter_output_with_rawblock.json +1 -0
  61. data/ext/apex_ext/apex_src/tests/fixtures/filters/unwrap.md +7 -0
  62. data/ext/apex_ext/apex_src/tests/fixtures/images/auto-wildcard.md +8 -0
  63. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.avif +0 -0
  64. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.jpg +0 -0
  65. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.webp +0 -0
  66. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.avif +0 -0
  67. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.jpg +0 -0
  68. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.webp +0 -0
  69. data/ext/apex_ext/apex_src/tests/fixtures/images/media_formats_test.md +63 -0
  70. data/ext/apex_ext/apex_src/tests/fixtures/includes/data-semi.csv +3 -0
  71. data/ext/apex_ext/apex_src/tests/fixtures/includes/with space.txt +1 -0
  72. data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +4 -1
  73. data/ext/apex_ext/apex_src/tests/paginate_cli_test.sh +64 -0
  74. data/ext/apex_ext/apex_src/tests/terminal_width_test.sh +29 -0
  75. data/ext/apex_ext/apex_src/tests/test-swift-package.sh +14 -0
  76. data/ext/apex_ext/apex_src/tests/test_cmark_callback.c +189 -0
  77. data/ext/apex_ext/apex_src/tests/test_extensions.c +374 -0
  78. data/ext/apex_ext/apex_src/tests/test_metadata.c +68 -0
  79. data/ext/apex_ext/apex_src/tests/test_output.c +291 -2
  80. data/ext/apex_ext/apex_src/tests/test_runner.c +10 -0
  81. data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +1 -1
  82. data/ext/apex_ext/apex_src/tests/test_tables.c +17 -1
  83. data/lib/apex/version.rb +1 -1
  84. metadata +32 -2
  85. data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +0 -456
@@ -25,6 +25,8 @@ static const char *get_tool_binary(const char *tool) {
25
25
  return "pygmentize";
26
26
  } else if (strcmp(tool, "skylighting") == 0) {
27
27
  return "skylighting";
28
+ } else if (strcmp(tool, "shiki") == 0) {
29
+ return "shiki";
28
30
  }
29
31
  return NULL;
30
32
  }
@@ -189,10 +191,11 @@ static char *run_command(const char *cmd, const char *input) {
189
191
 
190
192
  /**
191
193
  * Highlight a single code block using the specified tool.
192
- * Returns newly allocated HTML, or NULL on failure.
194
+ * Returns newly allocated HTML (or ANSI when ansi_output is true), or NULL on failure.
193
195
  */
194
196
  static char *highlight_code_block(const char *code, const char *language,
195
- const char *tool, bool line_numbers) {
197
+ const char *tool, bool line_numbers, bool ansi_output,
198
+ const char *theme) {
196
199
  char cmd[512];
197
200
  const char *binary = get_tool_binary(tool);
198
201
  if (!binary) return NULL;
@@ -200,15 +203,23 @@ static char *highlight_code_block(const char *code, const char *language,
200
203
  if (strcmp(tool, "pygments") == 0) {
201
204
  /* Pygments: pygmentize -l LANG -f html [-O linenos=1] */
202
205
  if (language && *language) {
203
- if (line_numbers) {
206
+ if (line_numbers && theme && *theme) {
207
+ snprintf(cmd, sizeof(cmd), "%s -l %s -f html -O linenos=1,style=%s", binary, language, theme);
208
+ } else if (line_numbers) {
204
209
  snprintf(cmd, sizeof(cmd), "%s -l %s -f html -O linenos=1", binary, language);
210
+ } else if (theme && *theme) {
211
+ snprintf(cmd, sizeof(cmd), "%s -l %s -f html -O style=%s", binary, language, theme);
205
212
  } else {
206
213
  snprintf(cmd, sizeof(cmd), "%s -l %s -f html", binary, language);
207
214
  }
208
215
  } else {
209
216
  /* Use -g for auto-detection when no language specified */
210
- if (line_numbers) {
217
+ if (line_numbers && theme && *theme) {
218
+ snprintf(cmd, sizeof(cmd), "%s -g -f html -O linenos=1,style=%s", binary, theme);
219
+ } else if (line_numbers) {
211
220
  snprintf(cmd, sizeof(cmd), "%s -g -f html -O linenos=1", binary);
221
+ } else if (theme && *theme) {
222
+ snprintf(cmd, sizeof(cmd), "%s -g -f html -O style=%s", binary, theme);
212
223
  } else {
213
224
  snprintf(cmd, sizeof(cmd), "%s -g -f html", binary);
214
225
  }
@@ -217,19 +228,52 @@ static char *highlight_code_block(const char *code, const char *language,
217
228
  /* Skylighting: skylighting --syntax LANG -f html -r [-n]
218
229
  * -r = fragment mode (no full HTML document wrapper) */
219
230
  if (language && *language) {
220
- if (line_numbers) {
221
- snprintf(cmd, sizeof(cmd), "%s --syntax %s -f html -r -n", binary, language);
231
+ if (line_numbers && theme && *theme) {
232
+ snprintf(cmd, sizeof(cmd), "%s --syntax %s --style %s -f html -r -n",
233
+ binary, language, theme);
234
+ } else if (line_numbers) {
235
+ snprintf(cmd, sizeof(cmd), "%s --syntax %s -f html -r -n",
236
+ binary, language);
237
+ } else if (theme && *theme) {
238
+ snprintf(cmd, sizeof(cmd), "%s --syntax %s --style %s -f html -r",
239
+ binary, language, theme);
222
240
  } else {
223
- snprintf(cmd, sizeof(cmd), "%s --syntax %s -f html -r", binary, language);
241
+ snprintf(cmd, sizeof(cmd), "%s --syntax %s -f html -r",
242
+ binary, language);
224
243
  }
225
244
  } else {
226
245
  /* Skylighting without syntax tries to auto-detect, but may fail */
227
- if (line_numbers) {
246
+ if (line_numbers && theme && *theme) {
247
+ snprintf(cmd, sizeof(cmd), "%s --style %s -f html -r -n",
248
+ binary, theme);
249
+ } else if (line_numbers) {
228
250
  snprintf(cmd, sizeof(cmd), "%s -f html -r -n", binary);
251
+ } else if (theme && *theme) {
252
+ snprintf(cmd, sizeof(cmd), "%s --style %s -f html -r",
253
+ binary, theme);
229
254
  } else {
230
255
  snprintf(cmd, sizeof(cmd), "%s -f html -r", binary);
231
256
  }
232
257
  }
258
+ } else if (strcmp(tool, "shiki") == 0) {
259
+ /* Shiki CLI: shiki [--lang LANG] --format html|ansi
260
+ * Reads code from stdin. Exits non-zero if lang is missing and cannot be auto-detected;
261
+ * we capture that and fall back to plain text. */
262
+ const char *fmt = ansi_output ? "ansi" : "html";
263
+ if (language && *language) {
264
+ if (theme && *theme) {
265
+ snprintf(cmd, sizeof(cmd), "%s --lang %s --theme %s --format %s", binary, language, theme, fmt);
266
+ } else {
267
+ snprintf(cmd, sizeof(cmd), "%s --lang %s --format %s", binary, language, fmt);
268
+ }
269
+ } else {
270
+ /* No language: Shiki may fail (non-zero exit); run_command returns NULL → we use original block */
271
+ if (theme && *theme) {
272
+ snprintf(cmd, sizeof(cmd), "%s --theme %s --format %s", binary, theme, fmt);
273
+ } else {
274
+ snprintf(cmd, sizeof(cmd), "%s --format %s", binary, fmt);
275
+ }
276
+ }
233
277
  } else {
234
278
  return NULL;
235
279
  }
@@ -240,7 +284,8 @@ static char *highlight_code_block(const char *code, const char *language,
240
284
  /**
241
285
  * Apply syntax highlighting to code blocks in HTML.
242
286
  */
243
- char *apex_apply_syntax_highlighting(const char *html, const char *tool, bool line_numbers, bool language_only) {
287
+ char *apex_apply_syntax_highlighting(const char *html, const char *tool, bool line_numbers,
288
+ bool language_only, bool ansi_output, const char *theme) {
244
289
  if (!html || !tool) return html ? strdup(html) : NULL;
245
290
 
246
291
  /* Check if tool is available */
@@ -399,7 +444,7 @@ char *apex_apply_syntax_highlighting(const char *html, const char *tool, bool li
399
444
  }
400
445
 
401
446
  /* Run syntax highlighter */
402
- char *highlighted = highlight_code_block(code, language, tool, line_numbers);
447
+ char *highlighted = highlight_code_block(code, language, tool, line_numbers, ansi_output, theme);
403
448
  free(code);
404
449
 
405
450
  if (highlighted && *highlighted) {
@@ -22,21 +22,24 @@
22
22
  * Supported tools:
23
23
  * - "pygments": Uses pygmentize command (Python)
24
24
  * - "skylighting": Uses skylighting command (Haskell)
25
+ * - "shiki": Uses shiki CLI (@shikijs/cli); uses --format html or ansi based on ansi_output
25
26
  *
26
27
  * @param html The HTML output containing code blocks to highlight
27
- * @param tool The highlighting tool name ("pygments" or "skylighting")
28
+ * @param tool The highlighting tool name ("pygments", "skylighting", or "shiki")
28
29
  * @param line_numbers Whether to include line numbers in output
29
30
  * @param language_only When true, only highlight blocks that have a language specified
31
+ * @param ansi_output When true, request ANSI output (e.g. for terminal); only affects Shiki (--format ansi vs html)
32
+ * @param theme Optional theme/style name to pass to the external tool (e.g. Pygments style, Shiki theme)
30
33
  * @return Newly allocated HTML with highlighted code blocks, or NULL on error.
31
34
  * If the tool is not found or fails, returns a copy of the original HTML
32
- * with a warning printed to stderr.
35
+ * with a warning printed to stderr. For Shiki, non-zero exit (e.g. missing --lang) yields plain text.
33
36
  */
34
- char *apex_apply_syntax_highlighting(const char *html, const char *tool, bool line_numbers, bool language_only);
37
+ char *apex_apply_syntax_highlighting(const char *html, const char *tool, bool line_numbers, bool language_only, bool ansi_output, const char *theme);
35
38
 
36
39
  /**
37
40
  * Check if a syntax highlighting tool is available in PATH.
38
41
  *
39
- * @param tool The tool name ("pygments" or "skylighting")
42
+ * @param tool The tool name ("pygments", "skylighting", or "shiki")
40
43
  * @return true if the tool's binary is found and executable, false otherwise
41
44
  */
42
45
  bool apex_syntax_highlighter_available(const char *tool);
@@ -305,6 +305,47 @@ static bool process_cell_alignment(const char **content_start, const char **cont
305
305
  return false;
306
306
  }
307
307
 
308
+ /**
309
+ * Extract [Caption] text from a paragraph node.
310
+ * Handles both: (1) TEXT node with literal "[Caption]", (2) LINK node where [text] was parsed as link.
311
+ * Returns allocated string or NULL. Caller must free.
312
+ */
313
+ static char *get_caption_from_paragraph(cmark_node *para) {
314
+ if (!para || cmark_node_get_type(para) != CMARK_NODE_PARAGRAPH) return NULL;
315
+ cmark_node *child = cmark_node_first_child(para);
316
+ if (!child) return NULL;
317
+
318
+ cmark_node_type t = cmark_node_get_type(child);
319
+ if (t == CMARK_NODE_TEXT) {
320
+ const char *text = cmark_node_get_literal(child);
321
+ if (!text || text[0] != '[') return NULL;
322
+ const char *end = strchr(text + 1, ']');
323
+ if (!end) return NULL;
324
+ const char *after = end + 1;
325
+ while (*after && isspace((unsigned char)*after)) after++;
326
+ if (*after != '\0') return NULL;
327
+ size_t len = (size_t)(end - text - 1);
328
+ if (len == 0 || len >= 512) return NULL;
329
+ char *caption = malloc(len + 1);
330
+ if (caption) {
331
+ memcpy(caption, text + 1, len);
332
+ caption[len] = '\0';
333
+ }
334
+ return caption;
335
+ }
336
+ if (t == CMARK_NODE_LINK) {
337
+ /* [Caption] parsed as link - caption is the link text */
338
+ cmark_node *link_text = cmark_node_first_child(child);
339
+ if (!link_text || cmark_node_get_type(link_text) != CMARK_NODE_TEXT) return NULL;
340
+ const char *text = cmark_node_get_literal(link_text);
341
+ if (!text) return NULL;
342
+ /* Only treat as caption if it's the only content (no siblings) */
343
+ if (cmark_node_next(link_text) != NULL) return NULL;
344
+ return strdup(text);
345
+ }
346
+ return NULL;
347
+ }
348
+
308
349
  /**
309
350
  * Get text fingerprint from paragraph node (first 50 chars for matching)
310
351
  */
@@ -386,35 +427,30 @@ static table_caption *collect_table_captions(cmark_node *document, para_to_remov
386
427
  /* Check previous node for caption */
387
428
  cmark_node *prev = cmark_node_previous(node);
388
429
  if (prev && cmark_node_get_type(prev) == CMARK_NODE_PARAGRAPH) {
389
- /* Check if previous paragraph is a caption */
390
- cmark_node *text_node = cmark_node_first_child(prev);
391
- if (text_node && cmark_node_get_type(text_node) == CMARK_NODE_TEXT) {
392
- const char *text = cmark_node_get_literal(text_node);
393
- if (text && text[0] == '[') {
394
- const char *end = strchr(text + 1, ']');
395
- if (end) {
396
- const char *after = end + 1;
397
- while (*after && isspace((unsigned char)*after)) after++;
398
- if (*after == '\0') {
399
- /* This is a caption - extract it */
400
- size_t caption_len = end - text - 1;
401
- char *caption = malloc(caption_len + 1);
402
- if (caption) {
403
- memcpy(caption, text + 1, caption_len);
404
- caption[caption_len] = '\0';
405
- table_caption *cap = malloc(sizeof(table_caption));
406
- if (cap) {
407
- cap->table_index = table_index;
408
- cap->caption = caption;
409
- cap->next = list;
410
- list = cap;
411
- caption_found = true;
412
- } else {
413
- free(caption);
414
- }
415
- }
430
+ char *caption = get_caption_from_paragraph(prev);
431
+ if (caption) {
432
+ table_caption *cap = malloc(sizeof(table_caption));
433
+ if (cap) {
434
+ cap->table_index = table_index;
435
+ cap->caption = caption;
436
+ cap->next = list;
437
+ list = cap;
438
+ caption_found = true;
439
+ /* Mark caption paragraph for removal (para_index is prev's index) */
440
+ char *fingerprint = get_para_text_fingerprint(prev);
441
+ if (fingerprint) {
442
+ para_to_remove *para = malloc(sizeof(para_to_remove));
443
+ if (para) {
444
+ para->para_index = para_index;
445
+ para->text_fingerprint = fingerprint;
446
+ para->next = *paras_to_remove;
447
+ *paras_to_remove = para;
448
+ } else {
449
+ free(fingerprint);
416
450
  }
417
451
  }
452
+ } else {
453
+ free(caption);
418
454
  }
419
455
  }
420
456
  }
@@ -422,33 +458,29 @@ static table_caption *collect_table_captions(cmark_node *document, para_to_remov
422
458
  if (!caption_found) {
423
459
  cmark_node *next = cmark_node_next(node);
424
460
  if (next && cmark_node_get_type(next) == CMARK_NODE_PARAGRAPH) {
425
- cmark_node *text_node = cmark_node_first_child(next);
426
- if (text_node && cmark_node_get_type(text_node) == CMARK_NODE_TEXT) {
427
- const char *text = cmark_node_get_literal(text_node);
428
- if (text && text[0] == '[') {
429
- const char *end = strchr(text + 1, ']');
430
- if (end) {
431
- const char *after = end + 1;
432
- while (*after && isspace((unsigned char)*after)) after++;
433
- if (*after == '\0') {
434
- /* This is a caption - extract it */
435
- size_t caption_len = end - text - 1;
436
- char *caption = malloc(caption_len + 1);
437
- if (caption) {
438
- memcpy(caption, text + 1, caption_len);
439
- caption[caption_len] = '\0';
440
- table_caption *cap = malloc(sizeof(table_caption));
441
- if (cap) {
442
- cap->table_index = table_index;
443
- cap->caption = caption;
444
- cap->next = list;
445
- list = cap;
446
- } else {
447
- free(caption);
448
- }
449
- }
461
+ char *caption = get_caption_from_paragraph(next);
462
+ if (caption) {
463
+ table_caption *cap = malloc(sizeof(table_caption));
464
+ if (cap) {
465
+ cap->table_index = table_index;
466
+ cap->caption = caption;
467
+ cap->next = list;
468
+ list = cap;
469
+ /* Mark caption paragraph for removal (next's index = para_index + 1) */
470
+ char *fingerprint = get_para_text_fingerprint(next);
471
+ if (fingerprint) {
472
+ para_to_remove *para = malloc(sizeof(para_to_remove));
473
+ if (para) {
474
+ para->para_index = para_index + 1;
475
+ para->text_fingerprint = fingerprint;
476
+ para->next = *paras_to_remove;
477
+ *paras_to_remove = para;
478
+ } else {
479
+ free(fingerprint);
450
480
  }
451
481
  }
482
+ } else {
483
+ free(caption);
452
484
  }
453
485
  }
454
486
  }
@@ -79,6 +79,7 @@ static header_item *collect_headers(cmark_node *node, header_item **tail) {
79
79
 
80
80
  /**
81
81
  * Generate TOC HTML from headers
82
+ * Produces valid ul > li > ul nesting (nested lists inside list items)
82
83
  */
83
84
  static char *generate_toc_html(header_item *headers, int min_level, int max_level) {
84
85
  if (!headers) return strdup("");
@@ -90,6 +91,7 @@ static char *generate_toc_html(header_item *headers, int min_level, int max_leve
90
91
  char *write = html;
91
92
  size_t remaining = capacity;
92
93
  int current_level = 0;
94
+ int last_level = 0; /* level of last item added (0 = none yet) */
93
95
 
94
96
  #define APPEND(str) do { \
95
97
  size_t len = strlen(str); \
@@ -106,28 +108,46 @@ static char *generate_toc_html(header_item *headers, int min_level, int max_leve
106
108
  /* Skip headers outside min/max range */
107
109
  if (h->level < min_level || h->level > max_level) continue;
108
110
 
109
- /* Close lists if going up levels */
111
+ /* Going up: close </ul></li> for each level (nested ul inside parent li) */
110
112
  while (current_level > h->level) {
111
- APPEND("</ul>\n");
113
+ APPEND("</ul>\n</li>\n");
112
114
  current_level--;
113
115
  }
114
116
 
115
- /* Open lists if going down levels */
117
+ /* Going down: open one <ul> inside the previous li before adding child.
118
+ * At root (current_level 0): only open one ul - never ul > ul.
119
+ * When going deeper: open exactly one ul per step - never ul > ul. */
116
120
  while (current_level < h->level) {
117
- APPEND("<ul>\n");
118
- current_level++;
121
+ if (current_level == 0) {
122
+ APPEND("<ul>\n");
123
+ current_level = 1;
124
+ break;
125
+ }
126
+ if (last_level > 0) {
127
+ APPEND("<ul>\n");
128
+ current_level++;
129
+ break;
130
+ } else {
131
+ break; /* No parent li - add as direct child of root ul */
132
+ }
133
+ }
134
+
135
+ /* Close previous li when adding sibling (same or shallower level) */
136
+ if (last_level > 0 && h->level <= last_level) {
137
+ APPEND("</li>\n");
119
138
  }
120
139
 
121
- /* Add list item */
140
+ /* Add list item (leave open - may contain nested ul) */
122
141
  char item[1024];
123
- snprintf(item, sizeof(item), "<li><a href=\"#%s\">%s</a></li>\n",
142
+ snprintf(item, sizeof(item), "<li><a href=\"#%s\">%s</a>",
124
143
  h->id, h->text);
125
144
  APPEND(item);
145
+ last_level = h->level;
126
146
  }
127
147
 
128
- /* Close remaining lists */
148
+ /* Close remaining: </li></ul> for each open level */
129
149
  while (current_level > 0) {
130
- APPEND("</ul>\n");
150
+ APPEND("</li>\n</ul>\n");
131
151
  current_level--;
132
152
  }
133
153
 
@@ -139,6 +159,62 @@ static char *generate_toc_html(header_item *headers, int min_level, int max_leve
139
159
  return html;
140
160
  }
141
161
 
162
+ /**
163
+ * Return true if position 'pos' in 'html' is inside a <code> or <pre> element.
164
+ * Used to skip TOC markers that appear in code blocks or inline code.
165
+ */
166
+ static int is_inside_code_or_pre(const char *html, size_t pos) {
167
+ int in_code = 0;
168
+ int in_pre = 0;
169
+ size_t i = 0;
170
+ size_t len = pos;
171
+
172
+ while (i < len) {
173
+ if (html[i] == '<') {
174
+ if (i + 5 <= len && (html[i+1] == 'c' || html[i+1] == 'C') &&
175
+ (html[i+2] == 'o' || html[i+2] == 'O') &&
176
+ (html[i+3] == 'd' || html[i+3] == 'D') &&
177
+ (html[i+4] == 'e' || html[i+4] == 'E')) {
178
+ char next = (i + 5 < len) ? html[i + 5] : '\0';
179
+ if (next == '>' || next == ' ' || next == '\t' || next == '\n') {
180
+ in_code++;
181
+ i += 5;
182
+ continue;
183
+ }
184
+ }
185
+ if (i + 4 <= len && (html[i+1] == 'p' || html[i+1] == 'P') &&
186
+ (html[i+2] == 'r' || html[i+2] == 'R') &&
187
+ (html[i+3] == 'e' || html[i+3] == 'E')) {
188
+ char next = (i + 4 < len) ? html[i + 4] : '\0';
189
+ if (next == '>' || next == ' ' || next == '\t' || next == '\n') {
190
+ in_pre++;
191
+ i += 4;
192
+ continue;
193
+ }
194
+ }
195
+ if (i + 7 <= len && html[i+1] == '/' &&
196
+ (html[i+2] == 'c' || html[i+2] == 'C') &&
197
+ (html[i+3] == 'o' || html[i+3] == 'O') &&
198
+ (html[i+4] == 'd' || html[i+4] == 'D') &&
199
+ (html[i+5] == 'e' || html[i+5] == 'E') && html[i+6] == '>') {
200
+ if (in_code > 0) in_code--;
201
+ i += 7;
202
+ continue;
203
+ }
204
+ if (i + 6 <= len && html[i+1] == '/' &&
205
+ (html[i+2] == 'p' || html[i+2] == 'P') &&
206
+ (html[i+3] == 'r' || html[i+3] == 'R') &&
207
+ (html[i+4] == 'e' || html[i+4] == 'E') && html[i+5] == '>') {
208
+ if (in_pre > 0) in_pre--;
209
+ i += 6;
210
+ continue;
211
+ }
212
+ }
213
+ i++;
214
+ }
215
+ return (in_code > 0 || in_pre > 0);
216
+ }
217
+
142
218
  /**
143
219
  * Parse TOC marker for min/max levels
144
220
  */
@@ -180,18 +256,57 @@ static void parse_toc_marker(const char *marker, int *min_level, int *max_level)
180
256
  }
181
257
  }
182
258
 
259
+ /**
260
+ * Find the first TOC marker (<!--TOC or {{TOC) that is not inside <code> or <pre>.
261
+ * Returns pointer to the marker, or NULL if none valid. *is_html_comment is set
262
+ * to 1 for <!--TOC, 0 for {{TOC.
263
+ */
264
+ static const char *find_toc_marker_not_in_code(const char *html, int *is_html_comment) {
265
+ const char *p = html;
266
+ *is_html_comment = 0;
267
+
268
+ while (1) {
269
+ const char *next_comment = strstr(p, "<!--TOC");
270
+ const char *next_mmd = strstr(p, "{{TOC");
271
+
272
+ /* No more markers */
273
+ if (!next_comment && !next_mmd) return NULL;
274
+
275
+ /* Pick the earlier of the two */
276
+ const char *cand = NULL;
277
+ if (next_comment && next_mmd) {
278
+ cand = (next_comment < next_mmd) ? next_comment : next_mmd;
279
+ } else {
280
+ cand = next_comment ? next_comment : next_mmd;
281
+ }
282
+
283
+ if (!is_inside_code_or_pre(html, (size_t)(cand - html))) {
284
+ *is_html_comment = (cand == next_comment);
285
+ return cand;
286
+ }
287
+
288
+ /* This occurrence is inside code; skip past it and search again */
289
+ if (cand == next_comment) {
290
+ const char *end = strstr(cand, "-->");
291
+ p = end ? end + 3 : cand + 1;
292
+ } else {
293
+ const char *end = strstr(cand, "}}");
294
+ p = end ? end + 2 : cand + 1;
295
+ }
296
+ }
297
+ }
298
+
183
299
  /**
184
300
  * Process TOC markers in HTML
185
301
  */
186
302
  char *apex_process_toc(const char *html, cmark_node *document, int id_format) {
187
303
  if (!html || !document) return html ? strdup(html) : NULL;
188
304
 
189
- /* Check if there are any TOC markers */
190
- const char *toc_marker = strstr(html, "<!--TOC");
191
- const char *toc_mmd = strstr(html, "{{TOC");
305
+ int is_html_comment = 0;
306
+ const char *marker = find_toc_marker_not_in_code(html, &is_html_comment);
192
307
 
193
- if (!toc_marker && !toc_mmd) {
194
- return strdup(html); /* No TOC markers, return as-is */
308
+ if (!marker) {
309
+ return strdup(html); /* No valid TOC marker (or all are in code), return as-is */
195
310
  }
196
311
 
197
312
  /* Collect headers from document */
@@ -204,8 +319,7 @@ char *apex_process_toc(const char *html, cmark_node *document, int id_format) {
204
319
  h->id = apex_generate_header_id(h->text, (apex_id_format_t)id_format);
205
320
  }
206
321
 
207
- /* Find the marker and parse it */
208
- const char *marker = toc_marker ? toc_marker : toc_mmd;
322
+ /* Parse the marker for min/max levels */
209
323
  int min_level, max_level;
210
324
  parse_toc_marker(marker, &min_level, &max_level);
211
325
 
@@ -227,10 +341,10 @@ char *apex_process_toc(const char *html, cmark_node *document, int id_format) {
227
341
 
228
342
  /* Find end of marker */
229
343
  const char *marker_end = NULL;
230
- if (toc_marker) {
344
+ if (is_html_comment) {
231
345
  marker_end = strstr(marker, "-->");
232
346
  if (marker_end) marker_end += 3;
233
- } else if (toc_mmd) {
347
+ } else {
234
348
  marker_end = strstr(marker, "}}");
235
349
  if (marker_end) marker_end += 2;
236
350
  }
@@ -242,7 +356,7 @@ char *apex_process_toc(const char *html, cmark_node *document, int id_format) {
242
356
  }
243
357
 
244
358
  /* Build output: before + TOC + after */
245
- size_t before_len = marker - html;
359
+ size_t before_len = (size_t)(marker - html);
246
360
  size_t after_len = strlen(marker_end);
247
361
 
248
362
  memcpy(output, html, before_len);