apex-ruby 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/ext/apex_ext/apex_ext.c +6 -0
  3. data/ext/apex_ext/apex_src/AGENTS.md +41 -0
  4. data/ext/apex_ext/apex_src/CHANGELOG.md +412 -2
  5. data/ext/apex_ext/apex_src/CMakeLists.txt +41 -29
  6. data/ext/apex_ext/apex_src/Formula/apex.rb +2 -2
  7. data/ext/apex_ext/apex_src/Package.swift +9 -0
  8. data/ext/apex_ext/apex_src/README.md +31 -9
  9. data/ext/apex_ext/apex_src/ROADMAP.md +5 -0
  10. data/ext/apex_ext/apex_src/VERSION +1 -1
  11. data/ext/apex_ext/apex_src/cli/main.c +1125 -13
  12. data/ext/apex_ext/apex_src/docs/index.md +459 -0
  13. data/ext/apex_ext/apex_src/include/apex/apex.h +67 -5
  14. data/ext/apex_ext/apex_src/include/apex/ast_man.h +20 -0
  15. data/ext/apex_ext/apex_src/include/apex/ast_markdown.h +39 -0
  16. data/ext/apex_ext/apex_src/include/apex/ast_terminal.h +40 -0
  17. data/ext/apex_ext/apex_src/include/apex/module.modulemap +1 -1
  18. data/ext/apex_ext/apex_src/man/apex-config.5 +333 -258
  19. data/ext/apex_ext/apex_src/man/apex-config.5.md +3 -1
  20. data/ext/apex_ext/apex_src/man/apex-plugins.7 +401 -316
  21. data/ext/apex_ext/apex_src/man/apex.1 +663 -620
  22. data/ext/apex_ext/apex_src/man/apex.1.html +703 -0
  23. data/ext/apex_ext/apex_src/man/apex.1.md +160 -90
  24. data/ext/apex_ext/apex_src/objc/Apex.swift +6 -0
  25. data/ext/apex_ext/apex_src/objc/NSString+Apex.h +12 -0
  26. data/ext/apex_ext/apex_src/objc/NSString+Apex.m +9 -0
  27. data/ext/apex_ext/apex_src/pages/index.md +459 -0
  28. data/ext/apex_ext/apex_src/src/_README.md +4 -4
  29. data/ext/apex_ext/apex_src/src/apex.c +702 -44
  30. data/ext/apex_ext/apex_src/src/ast_json.c +1130 -0
  31. data/ext/apex_ext/apex_src/src/ast_json.h +46 -0
  32. data/ext/apex_ext/apex_src/src/ast_man.c +948 -0
  33. data/ext/apex_ext/apex_src/src/ast_markdown.c +409 -0
  34. data/ext/apex_ext/apex_src/src/ast_terminal.c +2516 -0
  35. data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +8 -5
  36. data/ext/apex_ext/apex_src/src/extensions/definition_list.c +491 -1514
  37. data/ext/apex_ext/apex_src/src/extensions/definition_list.h +8 -15
  38. data/ext/apex_ext/apex_src/src/extensions/emoji.c +207 -0
  39. data/ext/apex_ext/apex_src/src/extensions/emoji.h +14 -0
  40. data/ext/apex_ext/apex_src/src/extensions/header_ids.c +178 -71
  41. data/ext/apex_ext/apex_src/src/extensions/highlight.c +37 -5
  42. data/ext/apex_ext/apex_src/src/extensions/ial.c +416 -47
  43. data/ext/apex_ext/apex_src/src/extensions/includes.c +241 -10
  44. data/ext/apex_ext/apex_src/src/extensions/includes.h +1 -0
  45. data/ext/apex_ext/apex_src/src/extensions/metadata.c +166 -3
  46. data/ext/apex_ext/apex_src/src/extensions/metadata.h +7 -0
  47. data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +34 -3
  48. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +55 -10
  49. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +7 -4
  50. data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +84 -52
  51. data/ext/apex_ext/apex_src/src/extensions/toc.c +133 -19
  52. data/ext/apex_ext/apex_src/src/filters_ast.c +194 -0
  53. data/ext/apex_ext/apex_src/src/filters_ast.h +36 -0
  54. data/ext/apex_ext/apex_src/src/html_renderer.c +1265 -35
  55. data/ext/apex_ext/apex_src/src/html_renderer.h +21 -0
  56. data/ext/apex_ext/apex_src/src/plugins_remote.c +40 -14
  57. data/ext/apex_ext/apex_src/tests/CMakeLists.txt +1 -0
  58. data/ext/apex_ext/apex_src/tests/README.md +11 -5
  59. data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +13 -2
  60. data/ext/apex_ext/apex_src/tests/fixtures/filters/filter_output_with_rawblock.json +1 -0
  61. data/ext/apex_ext/apex_src/tests/fixtures/filters/unwrap.md +7 -0
  62. data/ext/apex_ext/apex_src/tests/fixtures/images/auto-wildcard.md +8 -0
  63. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.avif +0 -0
  64. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.jpg +0 -0
  65. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.webp +0 -0
  66. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.avif +0 -0
  67. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.jpg +0 -0
  68. data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.webp +0 -0
  69. data/ext/apex_ext/apex_src/tests/fixtures/images/media_formats_test.md +63 -0
  70. data/ext/apex_ext/apex_src/tests/fixtures/includes/data-semi.csv +3 -0
  71. data/ext/apex_ext/apex_src/tests/fixtures/includes/with space.txt +1 -0
  72. data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +4 -1
  73. data/ext/apex_ext/apex_src/tests/paginate_cli_test.sh +64 -0
  74. data/ext/apex_ext/apex_src/tests/terminal_width_test.sh +29 -0
  75. data/ext/apex_ext/apex_src/tests/test-swift-package.sh +14 -0
  76. data/ext/apex_ext/apex_src/tests/test_cmark_callback.c +189 -0
  77. data/ext/apex_ext/apex_src/tests/test_extensions.c +374 -0
  78. data/ext/apex_ext/apex_src/tests/test_metadata.c +68 -0
  79. data/ext/apex_ext/apex_src/tests/test_output.c +291 -2
  80. data/ext/apex_ext/apex_src/tests/test_runner.c +10 -0
  81. data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +1 -1
  82. data/ext/apex_ext/apex_src/tests/test_tables.c +17 -1
  83. data/lib/apex/version.rb +1 -1
  84. metadata +32 -2
  85. data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +0 -456
@@ -1,10 +1,8 @@
1
1
  /**
2
- * Definition List Extension for Apex
2
+ * One-Line Definition List Extension for Apex
3
3
  *
4
- * Supports Kramdown/PHP Markdown Extra style definition lists:
5
- * Term
6
- * : Definition 1
7
- * : Definition 2
4
+ * Supports: Term :: Definition text or Term::Definition text
5
+ * Multiple consecutive lines create one <dl> with multiple <dt>/<dd> pairs.
8
6
  */
9
7
 
10
8
  #ifndef APEX_DEFINITION_LIST_H
@@ -12,31 +10,26 @@
12
10
 
13
11
  #include <stdbool.h>
14
12
  #include "cmark-gfm.h"
15
- #include "cmark-gfm-extension_api.h"
16
13
 
17
14
  #ifdef __cplusplus
18
15
  extern "C" {
19
16
  #endif
20
17
 
21
- /* Custom node types for definition lists */
22
- /* Note: APEX_NODE_DEFINITION_* are defined as enum values in parser.h, not as variables */
23
-
24
18
  /**
25
- * Process definition lists via preprocessing
26
- * Converts : syntax to HTML before main parsing
19
+ * Process one-line definition lists via preprocessing.
20
+ * Converts "Term :: Definition" lines to <dl><dt>Term</dt><dd>Definition</dd></dl>
27
21
  * @param text The markdown text to process
28
- * @param unsafe If true, allow raw HTML in output (pass CMARK_OPT_UNSAFE)
22
+ * @param unsafe If true, allow raw HTML in output
29
23
  */
30
24
  char *apex_process_definition_lists(const char *text, bool unsafe);
31
25
 
32
26
  /**
33
- * Create and return the definition list extension
27
+ * Debug touch - no-op for one-line format
34
28
  */
35
- cmark_syntax_extension *create_definition_list_extension(void);
29
+ void apex_deflist_debug_touch(int enable_definition_lists);
36
30
 
37
31
  #ifdef __cplusplus
38
32
  }
39
33
  #endif
40
34
 
41
35
  #endif /* APEX_DEFINITION_LIST_H */
42
-
@@ -7,6 +7,7 @@
7
7
  #include <stdlib.h>
8
8
  #include <stdio.h>
9
9
  #include <ctype.h>
10
+ #include <stdbool.h>
10
11
  #include <stdint.h>
11
12
  #include "emoji_data.h"
12
13
 
@@ -15,6 +16,30 @@ static void normalize_emoji_name(char *name);
15
16
  static int is_table_alignment_pattern(const char *start, const char *end);
16
17
  static int is_inside_html_attribute(const char *pos, const char *start);
17
18
 
19
+ /** True if content at p looks like a list marker (- , * , + , or digit+. ) */
20
+ static int looks_like_list_marker(const char *p) {
21
+ if (!*p) return 0;
22
+ if (*p == '-' || *p == '*' || *p == '+')
23
+ return (p[1] == ' ' || p[1] == '\t');
24
+ if (isdigit((unsigned char)*p)) {
25
+ while (isdigit((unsigned char)*p)) p++;
26
+ return (*p == '.' && (p[1] == ' ' || p[1] == '\t'));
27
+ }
28
+ return 0;
29
+ }
30
+
31
+ /** True if we're at the start of a line that is an indented code block (4+ spaces or tab). */
32
+ static int line_is_indented_code_block(const char *read) {
33
+ if (!*read) return 0;
34
+ if (*read == '\t')
35
+ return !looks_like_list_marker(read + 1);
36
+ if (read[0] != ' ' || read[1] != ' ' || read[2] != ' ' || read[3] != ' ')
37
+ return 0;
38
+ const char *content = read + 4;
39
+ while (*content == ' ') content++;
40
+ return *content && !looks_like_list_marker(content);
41
+ }
42
+
18
43
  /**
19
44
  * Find emoji entry by name
20
45
  * Returns pointer to emoji_entry or NULL if not found
@@ -234,7 +259,33 @@ char *apex_replace_emoji(const char *html) {
234
259
  char *write = output;
235
260
  size_t remaining = capacity;
236
261
 
262
+ bool in_code_tag = false; /* Skip emoji inside <code>...</code> and <pre>...</pre> */
263
+
237
264
  while (*read) {
265
+ /* Track <code> and <pre> tags - skip emoji replacement inside code */
266
+ if (*read == '<' && read[1]) {
267
+ if (read[6] && read[1] == '/' && read[2] == 'c' && read[3] == 'o' && read[4] == 'd' && read[5] == 'e' && read[6] == '>') {
268
+ in_code_tag = false;
269
+ } else if (read[5] && read[1] == '/' && read[2] == 'p' && read[3] == 'r' && read[4] == 'e' && read[5] == '>') {
270
+ in_code_tag = false;
271
+ } else if (read[5] && read[1] == 'c' && read[2] == 'o' && read[3] == 'd' && read[4] == 'e' &&
272
+ (read[5] == '>' || read[5] == ' ' || read[5] == '\t')) {
273
+ in_code_tag = true;
274
+ } else if (read[4] && read[1] == 'p' && read[2] == 'r' && read[3] == 'e' &&
275
+ (read[4] == '>' || read[4] == ' ' || read[4] == '\t')) {
276
+ in_code_tag = true;
277
+ }
278
+ }
279
+ if (in_code_tag) {
280
+ if (remaining > 0) {
281
+ *write++ = *read++;
282
+ remaining--;
283
+ } else {
284
+ read++;
285
+ }
286
+ continue;
287
+ }
288
+
238
289
  /* Check if we're inside an index placeholder <!--IDX:...--> - if so, skip emoji processing */
239
290
  if (read >= html + 7 && strncmp(read - 7, "<!--IDX:", 8) == 0) {
240
291
  /* Find the end of the placeholder */
@@ -437,6 +488,133 @@ char *apex_replace_emoji(const char *html) {
437
488
  return output;
438
489
  }
439
490
 
491
+ /**
492
+ * Replace :emoji: patterns in plain text with Unicode emoji only.
493
+ *
494
+ * This variant is intended for non-HTML outputs (e.g. terminal rendering)
495
+ * where we do not want to emit <img> tags. It reuses the same emoji table
496
+ * but only substitutes entries that have a Unicode representation; image-
497
+ * only emoji names are left as their original :name: patterns.
498
+ */
499
+ char *apex_replace_emoji_text(const char *text) {
500
+ if (!text) return NULL;
501
+
502
+ size_t capacity = strlen(text) * 2 + 16; /* Enough for most unicode expansions */
503
+ char *output = malloc(capacity);
504
+ if (!output) return strdup(text);
505
+
506
+ const char *read = text;
507
+ char *write = output;
508
+ size_t remaining = capacity;
509
+
510
+ bool in_code_block = false;
511
+ bool in_inline_code = false;
512
+ bool in_indented_code_block = false;
513
+
514
+ while (*read) {
515
+ /* At line start: indented code block only if 4+ spaces/tab and not a list line */
516
+ if (read == text || read[-1] == '\n') {
517
+ in_indented_code_block = line_is_indented_code_block(read);
518
+ }
519
+
520
+ /* Track fenced code blocks (```) and inline code (`) */
521
+ if (*read == '`') {
522
+ if (read[1] == '`' && read[2] == '`') {
523
+ in_code_block = !in_code_block;
524
+ } else if (!in_code_block) {
525
+ in_inline_code = !in_inline_code;
526
+ }
527
+ }
528
+
529
+ /* Skip emoji replacement inside any code context */
530
+ if (in_code_block || in_inline_code || in_indented_code_block) {
531
+ if (remaining > 0) {
532
+ *write++ = *read++;
533
+ remaining--;
534
+ } else {
535
+ read++;
536
+ }
537
+ continue;
538
+ }
539
+
540
+ if (*read == ':') {
541
+ /* Look for closing : */
542
+ const char *end = strchr(read + 1, ':');
543
+ if (end && (end - read) < 50) { /* Reasonable emoji name length */
544
+ /* Extract emoji name */
545
+ int name_len = (int)(end - (read + 1));
546
+ const char *name_start = read + 1;
547
+
548
+ if (name_len > 0) {
549
+ /* Reject names containing whitespace */
550
+ int has_space = 0;
551
+ for (int i = 0; i < name_len; i++) {
552
+ char ch = name_start[i];
553
+ if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
554
+ has_space = 1;
555
+ break;
556
+ }
557
+ }
558
+
559
+ /* Skip common table alignment patterns like :---: */
560
+ if (!has_space && is_table_alignment_pattern(name_start, end)) {
561
+ size_t pattern_len = (size_t)(end - read + 1);
562
+ if (pattern_len <= remaining) {
563
+ memcpy(write, read, pattern_len);
564
+ write += pattern_len;
565
+ remaining -= pattern_len;
566
+ }
567
+ read = end + 1;
568
+ continue;
569
+ }
570
+
571
+ if (!has_space) {
572
+ /* Normalize name and look up in emoji table */
573
+ char normalized[64];
574
+ if ((size_t)name_len >= sizeof(normalized)) {
575
+ name_len = (int)sizeof(normalized) - 1;
576
+ }
577
+ memcpy(normalized, name_start, (size_t)name_len);
578
+ normalized[name_len] = '\0';
579
+ normalize_emoji_name(normalized);
580
+ size_t normalized_len = strlen(normalized);
581
+
582
+ const emoji_entry *entry = find_emoji_entry(normalized, (int)normalized_len);
583
+ if (entry && entry->unicode) {
584
+ /* Substitute Unicode emoji */
585
+ size_t emoji_len = strlen(entry->unicode);
586
+ if (emoji_len <= remaining) {
587
+ memcpy(write, entry->unicode, emoji_len);
588
+ write += emoji_len;
589
+ remaining -= emoji_len;
590
+ read = end + 1;
591
+ continue;
592
+ }
593
+ /* If not enough space, fall through and copy pattern as-is */
594
+ }
595
+ }
596
+ }
597
+ }
598
+ }
599
+
600
+ /* Default: copy single byte */
601
+ if (remaining > 0) {
602
+ *write++ = *read++;
603
+ remaining--;
604
+ } else {
605
+ read++;
606
+ }
607
+ }
608
+
609
+ if (remaining > 0) {
610
+ *write = '\0';
611
+ } else {
612
+ output[capacity - 1] = '\0';
613
+ }
614
+
615
+ return output;
616
+ }
617
+
440
618
  /**
441
619
  * Normalize emoji name: lowercase, hyphens to underscores, remove colons
442
620
  */
@@ -584,7 +762,36 @@ char *apex_autocorrect_emoji_names(const char *text) {
584
762
  char *write = output;
585
763
  size_t remaining = capacity;
586
764
 
765
+ bool in_code_block = false;
766
+ bool in_inline_code = false;
767
+ bool in_indented_code_block = false;
768
+
587
769
  while (*read) {
770
+ /* At line start: indented code block only if 4+ spaces/tab and not a list line */
771
+ if (read == text || read[-1] == '\n') {
772
+ in_indented_code_block = line_is_indented_code_block(read);
773
+ }
774
+
775
+ /* Track fenced code blocks (```) and inline code (`) */
776
+ if (*read == '`') {
777
+ if (read[1] == '`' && read[2] == '`') {
778
+ in_code_block = !in_code_block;
779
+ } else if (!in_code_block) {
780
+ in_inline_code = !in_inline_code;
781
+ }
782
+ }
783
+
784
+ /* Skip emoji processing inside any code context */
785
+ if (in_code_block || in_inline_code || in_indented_code_block) {
786
+ if (remaining > 0) {
787
+ *write++ = *read++;
788
+ remaining--;
789
+ } else {
790
+ read++;
791
+ }
792
+ continue;
793
+ }
794
+
588
795
  if (*read == ':') {
589
796
  /* Look for closing : */
590
797
  const char *end = strchr(read + 1, ':');
@@ -14,6 +14,20 @@ extern "C" {
14
14
  */
15
15
  char *apex_replace_emoji(const char *html);
16
16
 
17
+ /**
18
+ * Replace :emoji: patterns in plain text with Unicode emoji only.
19
+ *
20
+ * This is suitable for non-HTML outputs (e.g. terminal rendering) where
21
+ * image-based emoji tags are not desired. If an emoji entry has no
22
+ * Unicode representation (image-only), the original :emoji: pattern is
23
+ * left unchanged.
24
+ *
25
+ * @param text Plain text to process (UTF-8)
26
+ * @return Newly allocated string with emoji replacements applied, or NULL
27
+ * on error. Caller must free the returned string.
28
+ */
29
+ char *apex_replace_emoji_text(const char *text);
30
+
17
31
  /**
18
32
  * Find emoji name from unicode emoji (reverse lookup)
19
33
  * @param unicode The unicode emoji string (UTF-8)
@@ -98,6 +98,15 @@ char *apex_generate_header_id(const char *text, apex_id_format_t format) {
98
98
  continue;
99
99
  }
100
100
 
101
+ /* Check for apostrophes: curly (') U+2019: 0xE2 0x80 0x99, left quote (') U+2018: 0xE2 0x80 0x98 */
102
+ if (c == 0xE2 && read[1] != '\0' && read[2] != '\0' &&
103
+ (unsigned char)read[1] == 0x80 &&
104
+ ((unsigned char)read[2] == 0x99 || (unsigned char)read[2] == 0x98)) {
105
+ /* Remove apostrophes in all formats - they break anchor links */
106
+ read += 2;
107
+ continue;
108
+ }
109
+
101
110
  if (format == APEX_ID_FORMAT_MMD) {
102
111
  /* MMD format: preserve dashes, lowercase alphanumerics, preserve diacritics, skip spaces/punctuation */
103
112
  if (c == '-') {
@@ -381,6 +390,51 @@ char *apex_generate_header_id(const char *text, apex_id_format_t format) {
381
390
  return id;
382
391
  }
383
392
 
393
+ /**
394
+ * Recursively append literal text from node and its descendants to buffer.
395
+ * Handles TEXT, CODE, and recurses into inline containers (EMPH, STRONG, etc.)
396
+ * so "### *Processing* modes" yields "Processing modes" matching rendered HTML.
397
+ */
398
+ static void append_literal(char **text, char **write, size_t *capacity, size_t *remaining,
399
+ const char *literal) {
400
+ if (!literal) return;
401
+ size_t len = strlen(literal);
402
+ while (len >= *remaining) {
403
+ size_t new_cap = *capacity * 2;
404
+ char *new_text = realloc(*text, new_cap);
405
+ if (!new_text) return;
406
+ *write = new_text + (*write - *text);
407
+ *text = new_text;
408
+ *capacity = new_cap;
409
+ *remaining = new_cap - (size_t)(*write - *text);
410
+ }
411
+ memcpy(*write, literal, len);
412
+ *write += len;
413
+ *remaining -= len;
414
+ }
415
+
416
+ static void extract_heading_text_recursive(cmark_node *node, char **text, char **write,
417
+ size_t *capacity, size_t *remaining) {
418
+ cmark_node_type type = cmark_node_get_type(node);
419
+
420
+ if (type == CMARK_NODE_TEXT || type == CMARK_NODE_CODE) {
421
+ append_literal(text, write, capacity, remaining, cmark_node_get_literal(node));
422
+ return;
423
+ }
424
+ /* HTML_INLINE has literal (e.g. "&") - needed for "Documentation & resources" */
425
+ if (type == CMARK_NODE_HTML_INLINE) {
426
+ append_literal(text, write, capacity, remaining, cmark_node_get_literal(node));
427
+ return;
428
+ }
429
+
430
+ /* Recurse into inline containers (EMPH, STRONG, LINK, etc.) */
431
+ cmark_node *child = cmark_node_first_child(node);
432
+ while (child) {
433
+ extract_heading_text_recursive(child, text, write, capacity, remaining);
434
+ child = cmark_node_next(child);
435
+ }
436
+ }
437
+
384
438
  /**
385
439
  * Extract text content from a heading node
386
440
  */
@@ -389,59 +443,15 @@ char *apex_extract_heading_text(cmark_node *heading_node) {
389
443
  return strdup("");
390
444
  }
391
445
 
392
- /* Walk children and collect text */
393
446
  size_t capacity = 256;
394
447
  char *text = malloc(capacity);
395
448
  if (!text) return strdup("");
396
-
397
449
  char *write = text;
398
450
  size_t remaining = capacity;
399
451
 
400
452
  cmark_node *child = cmark_node_first_child(heading_node);
401
453
  while (child) {
402
- cmark_node_type type = cmark_node_get_type(child);
403
-
404
- if (type == CMARK_NODE_TEXT) {
405
- const char *literal = cmark_node_get_literal(child);
406
- if (literal) {
407
- size_t len = strlen(literal);
408
- if (len >= remaining) {
409
- size_t new_capacity = capacity * 2;
410
- char *new_text = realloc(text, new_capacity);
411
- if (!new_text) {
412
- free(text);
413
- return strdup("");
414
- }
415
- write = new_text + (write - text);
416
- text = new_text;
417
- remaining = new_capacity - (write - text);
418
- }
419
- memcpy(write, literal, len);
420
- write += len;
421
- remaining -= len;
422
- }
423
- } else if (type == CMARK_NODE_CODE) {
424
- const char *literal = cmark_node_get_literal(child);
425
- if (literal) {
426
- size_t len = strlen(literal);
427
- if (len >= remaining) {
428
- size_t new_capacity = capacity * 2;
429
- char *new_text = realloc(text, new_capacity);
430
- if (!new_text) {
431
- free(text);
432
- return strdup("");
433
- }
434
- write = new_text + (write - text);
435
- text = new_text;
436
- remaining = new_capacity - (write - text);
437
- }
438
- memcpy(write, literal, len);
439
- write += len;
440
- remaining -= len;
441
- }
442
- }
443
- /* Skip other inline elements for ID generation */
444
-
454
+ extract_heading_text_recursive(child, &text, &write, &capacity, &remaining);
445
455
  child = cmark_node_next(child);
446
456
  }
447
457
 
@@ -562,43 +572,91 @@ bool apex_extract_manual_header_id(char **heading_text, char **manual_id_out) {
562
572
  return false;
563
573
  }
564
574
 
575
+ /**
576
+ * Extract plain text from a link node (for simple [ref] style).
577
+ * Returns allocated string or NULL.
578
+ */
579
+ static char *get_link_label_text(cmark_node *link_node) {
580
+ if (!link_node || cmark_node_get_type(link_node) != CMARK_NODE_LINK) return NULL;
581
+ cmark_node *child = cmark_node_first_child(link_node);
582
+ if (!child || cmark_node_get_type(child) != CMARK_NODE_TEXT) return NULL;
583
+ const char *literal = cmark_node_get_literal(child);
584
+ return literal ? strdup(literal) : NULL;
585
+ }
586
+
587
+ /**
588
+ * Check if a string is a valid MMD heading ID (no spaces, no metadata %).
589
+ */
590
+ static bool is_valid_mmd_id(const char *s) {
591
+ if (!s || !*s) return false;
592
+ for (; *s; s++) {
593
+ if (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r' || *s == '%') return false;
594
+ }
595
+ return true;
596
+ }
597
+
565
598
  /**
566
599
  * Process manual header IDs in a heading node
567
600
  * Extracts MMD [id] or Kramdown {#id} syntax and stores ID in user_data
568
601
  * Updates the heading text node to remove the manual ID syntax
602
+ *
603
+ * Walks ALL text children (not just first) so headings split by "&" etc.
604
+ * (e.g. TEXT + HTML_INLINE + TEXT) are handled - the IAL may be in a later child.
605
+ *
606
+ * Edge case: When [id] matches a link reference and would render as a link,
607
+ * but [id] is the last element in the heading with other content before it,
608
+ * treat it as MMD heading ID (not a link). This avoids the conflict where
609
+ * "# Heading [mermaid]" with "[mermaid]: URL" would wrongly render mermaid as
610
+ * a link. If the heading is ONLY [id] (e.g. "# [mermaid]"), keep it as a link
611
+ * to avoid empty headings.
569
612
  */
570
613
  bool apex_process_manual_header_id(cmark_node *heading_node) {
571
614
  if (!heading_node || cmark_node_get_type(heading_node) != CMARK_NODE_HEADING) {
572
615
  return false;
573
616
  }
574
617
 
575
- /* Get the text node inside the heading */
576
- cmark_node *text_node = cmark_node_first_child(heading_node);
577
- if (!text_node || cmark_node_get_type(text_node) != CMARK_NODE_TEXT) {
578
- return false;
579
- }
618
+ /* Check each TEXT child for manual ID - "&" etc. can split content across nodes.
619
+ Prefer the rightmost (last) match to align with IAL behavior. */
620
+ cmark_node *match_node = NULL;
621
+ char *match_text = NULL;
622
+ char *match_id = NULL;
580
623
 
581
- const char *text = cmark_node_get_literal(text_node);
582
- if (!text) return false;
624
+ for (cmark_node *child = cmark_node_first_child(heading_node); child;
625
+ child = cmark_node_next(child)) {
626
+ if (cmark_node_get_type(child) != CMARK_NODE_TEXT) continue;
583
627
 
584
- /* Extract text and try to find manual ID */
585
- char *text_copy = strdup(text);
586
- if (!text_copy) return false;
628
+ const char *literal = cmark_node_get_literal(child);
629
+ if (!literal) continue;
587
630
 
588
- char *manual_id = NULL;
589
- bool found = apex_extract_manual_header_id(&text_copy, &manual_id);
631
+ char *text_copy = strdup(literal);
632
+ if (!text_copy) continue;
590
633
 
591
- if (found && manual_id) {
634
+ char *manual_id = NULL;
635
+ bool found = apex_extract_manual_header_id(&text_copy, &manual_id);
636
+
637
+ if (found && manual_id) {
638
+ /* Discard previous match - we want the rightmost */
639
+ free(match_text);
640
+ free(match_id);
641
+ match_node = child;
642
+ match_text = text_copy;
643
+ match_id = manual_id;
644
+ } else {
645
+ free(text_copy);
646
+ if (manual_id) free(manual_id);
647
+ }
648
+ }
649
+
650
+ if (match_node && match_id) {
592
651
  /* Store ID in user_data as id="..." */
593
- char *id_attr = malloc(strlen(manual_id) + 6); /* id="" + null */
652
+ char *id_attr = malloc(strlen(match_id) + 6); /* id="" + null */
594
653
  if (id_attr) {
595
- sprintf(id_attr, "id=\"%s\"", manual_id);
654
+ sprintf(id_attr, "id=\"%s\"", match_id);
596
655
 
597
- /* Merge with existing user_data if present */
656
+ /* Merge with existing user_data if present (e.g. from IAL) */
598
657
  char *existing = (char *)cmark_node_get_user_data(heading_node);
599
658
  if (existing) {
600
- /* Append to existing */
601
- char *combined = malloc(strlen(existing) + strlen(id_attr) + 2); /* + space + null */
659
+ char *combined = malloc(strlen(existing) + strlen(id_attr) + 2);
602
660
  if (combined) {
603
661
  sprintf(combined, "%s %s", existing, id_attr);
604
662
  cmark_node_set_user_data(heading_node, combined);
@@ -611,16 +669,65 @@ bool apex_process_manual_header_id(cmark_node *heading_node) {
611
669
  }
612
670
  }
613
671
 
614
- /* Update the text node to remove manual ID syntax */
615
- cmark_node_set_literal(text_node, text_copy);
616
-
617
- free(manual_id);
618
- free(text_copy);
672
+ cmark_node_set_literal(match_node, match_text);
673
+ free(match_id);
674
+ free(match_text);
619
675
  return true;
620
676
  }
621
677
 
622
- free(text_copy);
623
- if (manual_id) free(manual_id);
624
- return false;
678
+ /* Edge case: [id] was parsed as a link (ref existed). If it's the last
679
+ * element and there's other content, treat as MMD heading ID. */
680
+ cmark_node *last = NULL;
681
+ cmark_node *child = cmark_node_first_child(heading_node);
682
+ while (child) {
683
+ cmark_node_type t = cmark_node_get_type(child);
684
+ if (t != CMARK_NODE_SOFTBREAK && t != CMARK_NODE_LINEBREAK) {
685
+ last = child;
686
+ }
687
+ child = cmark_node_next(child);
688
+ }
689
+
690
+ if (!last || cmark_node_get_type(last) != CMARK_NODE_LINK) return false;
691
+
692
+ /* Must have at least one sibling before the link (avoid empty headings) */
693
+ cmark_node *prev = cmark_node_previous(last);
694
+ if (!prev) return false;
695
+
696
+ char *link_text = get_link_label_text(last);
697
+ if (!link_text || !is_valid_mmd_id(link_text)) {
698
+ free(link_text);
699
+ return false;
700
+ }
701
+
702
+ /* Replace link with text node, set heading id */
703
+ cmark_node *text_replacement = cmark_node_new(CMARK_NODE_TEXT);
704
+ if (!text_replacement) {
705
+ free(link_text);
706
+ return false;
707
+ }
708
+ cmark_node_set_literal(text_replacement, link_text);
709
+ cmark_node_insert_before(last, text_replacement);
710
+ cmark_node_unlink(last);
711
+ cmark_node_free(last);
712
+
713
+ char *id_attr = malloc(strlen(link_text) + 6);
714
+ if (id_attr) {
715
+ sprintf(id_attr, "id=\"%s\"", link_text);
716
+ char *existing = (char *)cmark_node_get_user_data(heading_node);
717
+ if (existing) {
718
+ char *combined = malloc(strlen(existing) + strlen(id_attr) + 2);
719
+ if (combined) {
720
+ sprintf(combined, "%s %s", existing, id_attr);
721
+ cmark_node_set_user_data(heading_node, combined);
722
+ free(id_attr);
723
+ } else {
724
+ cmark_node_set_user_data(heading_node, id_attr);
725
+ }
726
+ } else {
727
+ cmark_node_set_user_data(heading_node, id_attr);
728
+ }
729
+ }
730
+ free(link_text);
731
+ return true;
625
732
  }
626
733