apex-ruby 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/apex_ext/apex_ext.c +6 -0
- data/ext/apex_ext/apex_src/AGENTS.md +41 -0
- data/ext/apex_ext/apex_src/CHANGELOG.md +412 -2
- data/ext/apex_ext/apex_src/CMakeLists.txt +41 -29
- data/ext/apex_ext/apex_src/Formula/apex.rb +2 -2
- data/ext/apex_ext/apex_src/Package.swift +9 -0
- data/ext/apex_ext/apex_src/README.md +31 -9
- data/ext/apex_ext/apex_src/ROADMAP.md +5 -0
- data/ext/apex_ext/apex_src/VERSION +1 -1
- data/ext/apex_ext/apex_src/cli/main.c +1125 -13
- data/ext/apex_ext/apex_src/docs/index.md +459 -0
- data/ext/apex_ext/apex_src/include/apex/apex.h +67 -5
- data/ext/apex_ext/apex_src/include/apex/ast_man.h +20 -0
- data/ext/apex_ext/apex_src/include/apex/ast_markdown.h +39 -0
- data/ext/apex_ext/apex_src/include/apex/ast_terminal.h +40 -0
- data/ext/apex_ext/apex_src/include/apex/module.modulemap +1 -1
- data/ext/apex_ext/apex_src/man/apex-config.5 +333 -258
- data/ext/apex_ext/apex_src/man/apex-config.5.md +3 -1
- data/ext/apex_ext/apex_src/man/apex-plugins.7 +401 -316
- data/ext/apex_ext/apex_src/man/apex.1 +663 -620
- data/ext/apex_ext/apex_src/man/apex.1.html +703 -0
- data/ext/apex_ext/apex_src/man/apex.1.md +160 -90
- data/ext/apex_ext/apex_src/objc/Apex.swift +6 -0
- data/ext/apex_ext/apex_src/objc/NSString+Apex.h +12 -0
- data/ext/apex_ext/apex_src/objc/NSString+Apex.m +9 -0
- data/ext/apex_ext/apex_src/pages/index.md +459 -0
- data/ext/apex_ext/apex_src/src/_README.md +4 -4
- data/ext/apex_ext/apex_src/src/apex.c +702 -44
- data/ext/apex_ext/apex_src/src/ast_json.c +1130 -0
- data/ext/apex_ext/apex_src/src/ast_json.h +46 -0
- data/ext/apex_ext/apex_src/src/ast_man.c +948 -0
- data/ext/apex_ext/apex_src/src/ast_markdown.c +409 -0
- data/ext/apex_ext/apex_src/src/ast_terminal.c +2516 -0
- data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +8 -5
- data/ext/apex_ext/apex_src/src/extensions/definition_list.c +491 -1514
- data/ext/apex_ext/apex_src/src/extensions/definition_list.h +8 -15
- data/ext/apex_ext/apex_src/src/extensions/emoji.c +207 -0
- data/ext/apex_ext/apex_src/src/extensions/emoji.h +14 -0
- data/ext/apex_ext/apex_src/src/extensions/header_ids.c +178 -71
- data/ext/apex_ext/apex_src/src/extensions/highlight.c +37 -5
- data/ext/apex_ext/apex_src/src/extensions/ial.c +416 -47
- data/ext/apex_ext/apex_src/src/extensions/includes.c +241 -10
- data/ext/apex_ext/apex_src/src/extensions/includes.h +1 -0
- data/ext/apex_ext/apex_src/src/extensions/metadata.c +166 -3
- data/ext/apex_ext/apex_src/src/extensions/metadata.h +7 -0
- data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +34 -3
- data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +55 -10
- data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +7 -4
- data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +84 -52
- data/ext/apex_ext/apex_src/src/extensions/toc.c +133 -19
- data/ext/apex_ext/apex_src/src/filters_ast.c +194 -0
- data/ext/apex_ext/apex_src/src/filters_ast.h +36 -0
- data/ext/apex_ext/apex_src/src/html_renderer.c +1265 -35
- data/ext/apex_ext/apex_src/src/html_renderer.h +21 -0
- data/ext/apex_ext/apex_src/src/plugins_remote.c +40 -14
- data/ext/apex_ext/apex_src/tests/CMakeLists.txt +1 -0
- data/ext/apex_ext/apex_src/tests/README.md +11 -5
- data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +13 -2
- data/ext/apex_ext/apex_src/tests/fixtures/filters/filter_output_with_rawblock.json +1 -0
- data/ext/apex_ext/apex_src/tests/fixtures/filters/unwrap.md +7 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/auto-wildcard.md +8 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.avif +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.jpg +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu.webp +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.avif +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.jpg +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/img/app-pass-1-profile-menu@2x.webp +0 -0
- data/ext/apex_ext/apex_src/tests/fixtures/images/media_formats_test.md +63 -0
- data/ext/apex_ext/apex_src/tests/fixtures/includes/data-semi.csv +3 -0
- data/ext/apex_ext/apex_src/tests/fixtures/includes/with space.txt +1 -0
- data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +4 -1
- data/ext/apex_ext/apex_src/tests/paginate_cli_test.sh +64 -0
- data/ext/apex_ext/apex_src/tests/terminal_width_test.sh +29 -0
- data/ext/apex_ext/apex_src/tests/test-swift-package.sh +14 -0
- data/ext/apex_ext/apex_src/tests/test_cmark_callback.c +189 -0
- data/ext/apex_ext/apex_src/tests/test_extensions.c +374 -0
- data/ext/apex_ext/apex_src/tests/test_metadata.c +68 -0
- data/ext/apex_ext/apex_src/tests/test_output.c +291 -2
- data/ext/apex_ext/apex_src/tests/test_runner.c +10 -0
- data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +1 -1
- data/ext/apex_ext/apex_src/tests/test_tables.c +17 -1
- data/lib/apex/version.rb +1 -1
- metadata +32 -2
- data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +0 -456
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Definition List Extension for Apex
|
|
2
|
+
* One-Line Definition List Extension for Apex
|
|
3
3
|
*
|
|
4
|
-
* Supports
|
|
5
|
-
*
|
|
6
|
-
* : Definition 1
|
|
7
|
-
* : Definition 2
|
|
4
|
+
* Supports: Term :: Definition text or Term::Definition text
|
|
5
|
+
* Multiple consecutive lines create one <dl> with multiple <dt>/<dd> pairs.
|
|
8
6
|
*/
|
|
9
7
|
|
|
10
8
|
#ifndef APEX_DEFINITION_LIST_H
|
|
@@ -12,31 +10,26 @@
|
|
|
12
10
|
|
|
13
11
|
#include <stdbool.h>
|
|
14
12
|
#include "cmark-gfm.h"
|
|
15
|
-
#include "cmark-gfm-extension_api.h"
|
|
16
13
|
|
|
17
14
|
#ifdef __cplusplus
|
|
18
15
|
extern "C" {
|
|
19
16
|
#endif
|
|
20
17
|
|
|
21
|
-
/* Custom node types for definition lists */
|
|
22
|
-
/* Note: APEX_NODE_DEFINITION_* are defined as enum values in parser.h, not as variables */
|
|
23
|
-
|
|
24
18
|
/**
|
|
25
|
-
* Process definition lists via preprocessing
|
|
26
|
-
* Converts
|
|
19
|
+
* Process one-line definition lists via preprocessing.
|
|
20
|
+
* Converts "Term :: Definition" lines to <dl><dt>Term</dt><dd>Definition</dd></dl>
|
|
27
21
|
* @param text The markdown text to process
|
|
28
|
-
* @param unsafe If true, allow raw HTML in output
|
|
22
|
+
* @param unsafe If true, allow raw HTML in output
|
|
29
23
|
*/
|
|
30
24
|
char *apex_process_definition_lists(const char *text, bool unsafe);
|
|
31
25
|
|
|
32
26
|
/**
|
|
33
|
-
*
|
|
27
|
+
* Debug touch - no-op for one-line format
|
|
34
28
|
*/
|
|
35
|
-
|
|
29
|
+
void apex_deflist_debug_touch(int enable_definition_lists);
|
|
36
30
|
|
|
37
31
|
#ifdef __cplusplus
|
|
38
32
|
}
|
|
39
33
|
#endif
|
|
40
34
|
|
|
41
35
|
#endif /* APEX_DEFINITION_LIST_H */
|
|
42
|
-
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
#include <stdlib.h>
|
|
8
8
|
#include <stdio.h>
|
|
9
9
|
#include <ctype.h>
|
|
10
|
+
#include <stdbool.h>
|
|
10
11
|
#include <stdint.h>
|
|
11
12
|
#include "emoji_data.h"
|
|
12
13
|
|
|
@@ -15,6 +16,30 @@ static void normalize_emoji_name(char *name);
|
|
|
15
16
|
static int is_table_alignment_pattern(const char *start, const char *end);
|
|
16
17
|
static int is_inside_html_attribute(const char *pos, const char *start);
|
|
17
18
|
|
|
19
|
+
/** True if content at p looks like a list marker (- , * , + , or digit+. ) */
|
|
20
|
+
static int looks_like_list_marker(const char *p) {
|
|
21
|
+
if (!*p) return 0;
|
|
22
|
+
if (*p == '-' || *p == '*' || *p == '+')
|
|
23
|
+
return (p[1] == ' ' || p[1] == '\t');
|
|
24
|
+
if (isdigit((unsigned char)*p)) {
|
|
25
|
+
while (isdigit((unsigned char)*p)) p++;
|
|
26
|
+
return (*p == '.' && (p[1] == ' ' || p[1] == '\t'));
|
|
27
|
+
}
|
|
28
|
+
return 0;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** True if we're at the start of a line that is an indented code block (4+ spaces or tab). */
|
|
32
|
+
static int line_is_indented_code_block(const char *read) {
|
|
33
|
+
if (!*read) return 0;
|
|
34
|
+
if (*read == '\t')
|
|
35
|
+
return !looks_like_list_marker(read + 1);
|
|
36
|
+
if (read[0] != ' ' || read[1] != ' ' || read[2] != ' ' || read[3] != ' ')
|
|
37
|
+
return 0;
|
|
38
|
+
const char *content = read + 4;
|
|
39
|
+
while (*content == ' ') content++;
|
|
40
|
+
return *content && !looks_like_list_marker(content);
|
|
41
|
+
}
|
|
42
|
+
|
|
18
43
|
/**
|
|
19
44
|
* Find emoji entry by name
|
|
20
45
|
* Returns pointer to emoji_entry or NULL if not found
|
|
@@ -234,7 +259,33 @@ char *apex_replace_emoji(const char *html) {
|
|
|
234
259
|
char *write = output;
|
|
235
260
|
size_t remaining = capacity;
|
|
236
261
|
|
|
262
|
+
bool in_code_tag = false; /* Skip emoji inside <code>...</code> and <pre>...</pre> */
|
|
263
|
+
|
|
237
264
|
while (*read) {
|
|
265
|
+
/* Track <code> and <pre> tags - skip emoji replacement inside code */
|
|
266
|
+
if (*read == '<' && read[1]) {
|
|
267
|
+
if (read[6] && read[1] == '/' && read[2] == 'c' && read[3] == 'o' && read[4] == 'd' && read[5] == 'e' && read[6] == '>') {
|
|
268
|
+
in_code_tag = false;
|
|
269
|
+
} else if (read[5] && read[1] == '/' && read[2] == 'p' && read[3] == 'r' && read[4] == 'e' && read[5] == '>') {
|
|
270
|
+
in_code_tag = false;
|
|
271
|
+
} else if (read[5] && read[1] == 'c' && read[2] == 'o' && read[3] == 'd' && read[4] == 'e' &&
|
|
272
|
+
(read[5] == '>' || read[5] == ' ' || read[5] == '\t')) {
|
|
273
|
+
in_code_tag = true;
|
|
274
|
+
} else if (read[4] && read[1] == 'p' && read[2] == 'r' && read[3] == 'e' &&
|
|
275
|
+
(read[4] == '>' || read[4] == ' ' || read[4] == '\t')) {
|
|
276
|
+
in_code_tag = true;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
if (in_code_tag) {
|
|
280
|
+
if (remaining > 0) {
|
|
281
|
+
*write++ = *read++;
|
|
282
|
+
remaining--;
|
|
283
|
+
} else {
|
|
284
|
+
read++;
|
|
285
|
+
}
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
|
|
238
289
|
/* Check if we're inside an index placeholder <!--IDX:...--> - if so, skip emoji processing */
|
|
239
290
|
if (read >= html + 7 && strncmp(read - 7, "<!--IDX:", 8) == 0) {
|
|
240
291
|
/* Find the end of the placeholder */
|
|
@@ -437,6 +488,133 @@ char *apex_replace_emoji(const char *html) {
|
|
|
437
488
|
return output;
|
|
438
489
|
}
|
|
439
490
|
|
|
491
|
+
/**
|
|
492
|
+
* Replace :emoji: patterns in plain text with Unicode emoji only.
|
|
493
|
+
*
|
|
494
|
+
* This variant is intended for non-HTML outputs (e.g. terminal rendering)
|
|
495
|
+
* where we do not want to emit <img> tags. It reuses the same emoji table
|
|
496
|
+
* but only substitutes entries that have a Unicode representation; image-
|
|
497
|
+
* only emoji names are left as their original :name: patterns.
|
|
498
|
+
*/
|
|
499
|
+
char *apex_replace_emoji_text(const char *text) {
|
|
500
|
+
if (!text) return NULL;
|
|
501
|
+
|
|
502
|
+
size_t capacity = strlen(text) * 2 + 16; /* Enough for most unicode expansions */
|
|
503
|
+
char *output = malloc(capacity);
|
|
504
|
+
if (!output) return strdup(text);
|
|
505
|
+
|
|
506
|
+
const char *read = text;
|
|
507
|
+
char *write = output;
|
|
508
|
+
size_t remaining = capacity;
|
|
509
|
+
|
|
510
|
+
bool in_code_block = false;
|
|
511
|
+
bool in_inline_code = false;
|
|
512
|
+
bool in_indented_code_block = false;
|
|
513
|
+
|
|
514
|
+
while (*read) {
|
|
515
|
+
/* At line start: indented code block only if 4+ spaces/tab and not a list line */
|
|
516
|
+
if (read == text || read[-1] == '\n') {
|
|
517
|
+
in_indented_code_block = line_is_indented_code_block(read);
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/* Track fenced code blocks (```) and inline code (`) */
|
|
521
|
+
if (*read == '`') {
|
|
522
|
+
if (read[1] == '`' && read[2] == '`') {
|
|
523
|
+
in_code_block = !in_code_block;
|
|
524
|
+
} else if (!in_code_block) {
|
|
525
|
+
in_inline_code = !in_inline_code;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
/* Skip emoji replacement inside any code context */
|
|
530
|
+
if (in_code_block || in_inline_code || in_indented_code_block) {
|
|
531
|
+
if (remaining > 0) {
|
|
532
|
+
*write++ = *read++;
|
|
533
|
+
remaining--;
|
|
534
|
+
} else {
|
|
535
|
+
read++;
|
|
536
|
+
}
|
|
537
|
+
continue;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
if (*read == ':') {
|
|
541
|
+
/* Look for closing : */
|
|
542
|
+
const char *end = strchr(read + 1, ':');
|
|
543
|
+
if (end && (end - read) < 50) { /* Reasonable emoji name length */
|
|
544
|
+
/* Extract emoji name */
|
|
545
|
+
int name_len = (int)(end - (read + 1));
|
|
546
|
+
const char *name_start = read + 1;
|
|
547
|
+
|
|
548
|
+
if (name_len > 0) {
|
|
549
|
+
/* Reject names containing whitespace */
|
|
550
|
+
int has_space = 0;
|
|
551
|
+
for (int i = 0; i < name_len; i++) {
|
|
552
|
+
char ch = name_start[i];
|
|
553
|
+
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
|
|
554
|
+
has_space = 1;
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
/* Skip common table alignment patterns like :---: */
|
|
560
|
+
if (!has_space && is_table_alignment_pattern(name_start, end)) {
|
|
561
|
+
size_t pattern_len = (size_t)(end - read + 1);
|
|
562
|
+
if (pattern_len <= remaining) {
|
|
563
|
+
memcpy(write, read, pattern_len);
|
|
564
|
+
write += pattern_len;
|
|
565
|
+
remaining -= pattern_len;
|
|
566
|
+
}
|
|
567
|
+
read = end + 1;
|
|
568
|
+
continue;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
if (!has_space) {
|
|
572
|
+
/* Normalize name and look up in emoji table */
|
|
573
|
+
char normalized[64];
|
|
574
|
+
if ((size_t)name_len >= sizeof(normalized)) {
|
|
575
|
+
name_len = (int)sizeof(normalized) - 1;
|
|
576
|
+
}
|
|
577
|
+
memcpy(normalized, name_start, (size_t)name_len);
|
|
578
|
+
normalized[name_len] = '\0';
|
|
579
|
+
normalize_emoji_name(normalized);
|
|
580
|
+
size_t normalized_len = strlen(normalized);
|
|
581
|
+
|
|
582
|
+
const emoji_entry *entry = find_emoji_entry(normalized, (int)normalized_len);
|
|
583
|
+
if (entry && entry->unicode) {
|
|
584
|
+
/* Substitute Unicode emoji */
|
|
585
|
+
size_t emoji_len = strlen(entry->unicode);
|
|
586
|
+
if (emoji_len <= remaining) {
|
|
587
|
+
memcpy(write, entry->unicode, emoji_len);
|
|
588
|
+
write += emoji_len;
|
|
589
|
+
remaining -= emoji_len;
|
|
590
|
+
read = end + 1;
|
|
591
|
+
continue;
|
|
592
|
+
}
|
|
593
|
+
/* If not enough space, fall through and copy pattern as-is */
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
/* Default: copy single byte */
|
|
601
|
+
if (remaining > 0) {
|
|
602
|
+
*write++ = *read++;
|
|
603
|
+
remaining--;
|
|
604
|
+
} else {
|
|
605
|
+
read++;
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
if (remaining > 0) {
|
|
610
|
+
*write = '\0';
|
|
611
|
+
} else {
|
|
612
|
+
output[capacity - 1] = '\0';
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
return output;
|
|
616
|
+
}
|
|
617
|
+
|
|
440
618
|
/**
|
|
441
619
|
* Normalize emoji name: lowercase, hyphens to underscores, remove colons
|
|
442
620
|
*/
|
|
@@ -584,7 +762,36 @@ char *apex_autocorrect_emoji_names(const char *text) {
|
|
|
584
762
|
char *write = output;
|
|
585
763
|
size_t remaining = capacity;
|
|
586
764
|
|
|
765
|
+
bool in_code_block = false;
|
|
766
|
+
bool in_inline_code = false;
|
|
767
|
+
bool in_indented_code_block = false;
|
|
768
|
+
|
|
587
769
|
while (*read) {
|
|
770
|
+
/* At line start: indented code block only if 4+ spaces/tab and not a list line */
|
|
771
|
+
if (read == text || read[-1] == '\n') {
|
|
772
|
+
in_indented_code_block = line_is_indented_code_block(read);
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
/* Track fenced code blocks (```) and inline code (`) */
|
|
776
|
+
if (*read == '`') {
|
|
777
|
+
if (read[1] == '`' && read[2] == '`') {
|
|
778
|
+
in_code_block = !in_code_block;
|
|
779
|
+
} else if (!in_code_block) {
|
|
780
|
+
in_inline_code = !in_inline_code;
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
/* Skip emoji processing inside any code context */
|
|
785
|
+
if (in_code_block || in_inline_code || in_indented_code_block) {
|
|
786
|
+
if (remaining > 0) {
|
|
787
|
+
*write++ = *read++;
|
|
788
|
+
remaining--;
|
|
789
|
+
} else {
|
|
790
|
+
read++;
|
|
791
|
+
}
|
|
792
|
+
continue;
|
|
793
|
+
}
|
|
794
|
+
|
|
588
795
|
if (*read == ':') {
|
|
589
796
|
/* Look for closing : */
|
|
590
797
|
const char *end = strchr(read + 1, ':');
|
|
@@ -14,6 +14,20 @@ extern "C" {
|
|
|
14
14
|
*/
|
|
15
15
|
char *apex_replace_emoji(const char *html);
|
|
16
16
|
|
|
17
|
+
/**
|
|
18
|
+
* Replace :emoji: patterns in plain text with Unicode emoji only.
|
|
19
|
+
*
|
|
20
|
+
* This is suitable for non-HTML outputs (e.g. terminal rendering) where
|
|
21
|
+
* image-based emoji tags are not desired. If an emoji entry has no
|
|
22
|
+
* Unicode representation (image-only), the original :emoji: pattern is
|
|
23
|
+
* left unchanged.
|
|
24
|
+
*
|
|
25
|
+
* @param text Plain text to process (UTF-8)
|
|
26
|
+
* @return Newly allocated string with emoji replacements applied, or NULL
|
|
27
|
+
* on error. Caller must free the returned string.
|
|
28
|
+
*/
|
|
29
|
+
char *apex_replace_emoji_text(const char *text);
|
|
30
|
+
|
|
17
31
|
/**
|
|
18
32
|
* Find emoji name from unicode emoji (reverse lookup)
|
|
19
33
|
* @param unicode The unicode emoji string (UTF-8)
|
|
@@ -98,6 +98,15 @@ char *apex_generate_header_id(const char *text, apex_id_format_t format) {
|
|
|
98
98
|
continue;
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
+
/* Check for apostrophes: curly (') U+2019: 0xE2 0x80 0x99, left quote (') U+2018: 0xE2 0x80 0x98 */
|
|
102
|
+
if (c == 0xE2 && read[1] != '\0' && read[2] != '\0' &&
|
|
103
|
+
(unsigned char)read[1] == 0x80 &&
|
|
104
|
+
((unsigned char)read[2] == 0x99 || (unsigned char)read[2] == 0x98)) {
|
|
105
|
+
/* Remove apostrophes in all formats - they break anchor links */
|
|
106
|
+
read += 2;
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
|
|
101
110
|
if (format == APEX_ID_FORMAT_MMD) {
|
|
102
111
|
/* MMD format: preserve dashes, lowercase alphanumerics, preserve diacritics, skip spaces/punctuation */
|
|
103
112
|
if (c == '-') {
|
|
@@ -381,6 +390,51 @@ char *apex_generate_header_id(const char *text, apex_id_format_t format) {
|
|
|
381
390
|
return id;
|
|
382
391
|
}
|
|
383
392
|
|
|
393
|
+
/**
|
|
394
|
+
* Recursively append literal text from node and its descendants to buffer.
|
|
395
|
+
* Handles TEXT, CODE, and recurses into inline containers (EMPH, STRONG, etc.)
|
|
396
|
+
* so "### *Processing* modes" yields "Processing modes" matching rendered HTML.
|
|
397
|
+
*/
|
|
398
|
+
static void append_literal(char **text, char **write, size_t *capacity, size_t *remaining,
|
|
399
|
+
const char *literal) {
|
|
400
|
+
if (!literal) return;
|
|
401
|
+
size_t len = strlen(literal);
|
|
402
|
+
while (len >= *remaining) {
|
|
403
|
+
size_t new_cap = *capacity * 2;
|
|
404
|
+
char *new_text = realloc(*text, new_cap);
|
|
405
|
+
if (!new_text) return;
|
|
406
|
+
*write = new_text + (*write - *text);
|
|
407
|
+
*text = new_text;
|
|
408
|
+
*capacity = new_cap;
|
|
409
|
+
*remaining = new_cap - (size_t)(*write - *text);
|
|
410
|
+
}
|
|
411
|
+
memcpy(*write, literal, len);
|
|
412
|
+
*write += len;
|
|
413
|
+
*remaining -= len;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
static void extract_heading_text_recursive(cmark_node *node, char **text, char **write,
|
|
417
|
+
size_t *capacity, size_t *remaining) {
|
|
418
|
+
cmark_node_type type = cmark_node_get_type(node);
|
|
419
|
+
|
|
420
|
+
if (type == CMARK_NODE_TEXT || type == CMARK_NODE_CODE) {
|
|
421
|
+
append_literal(text, write, capacity, remaining, cmark_node_get_literal(node));
|
|
422
|
+
return;
|
|
423
|
+
}
|
|
424
|
+
/* HTML_INLINE has literal (e.g. "&") - needed for "Documentation & resources" */
|
|
425
|
+
if (type == CMARK_NODE_HTML_INLINE) {
|
|
426
|
+
append_literal(text, write, capacity, remaining, cmark_node_get_literal(node));
|
|
427
|
+
return;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/* Recurse into inline containers (EMPH, STRONG, LINK, etc.) */
|
|
431
|
+
cmark_node *child = cmark_node_first_child(node);
|
|
432
|
+
while (child) {
|
|
433
|
+
extract_heading_text_recursive(child, text, write, capacity, remaining);
|
|
434
|
+
child = cmark_node_next(child);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
384
438
|
/**
|
|
385
439
|
* Extract text content from a heading node
|
|
386
440
|
*/
|
|
@@ -389,59 +443,15 @@ char *apex_extract_heading_text(cmark_node *heading_node) {
|
|
|
389
443
|
return strdup("");
|
|
390
444
|
}
|
|
391
445
|
|
|
392
|
-
/* Walk children and collect text */
|
|
393
446
|
size_t capacity = 256;
|
|
394
447
|
char *text = malloc(capacity);
|
|
395
448
|
if (!text) return strdup("");
|
|
396
|
-
|
|
397
449
|
char *write = text;
|
|
398
450
|
size_t remaining = capacity;
|
|
399
451
|
|
|
400
452
|
cmark_node *child = cmark_node_first_child(heading_node);
|
|
401
453
|
while (child) {
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
if (type == CMARK_NODE_TEXT) {
|
|
405
|
-
const char *literal = cmark_node_get_literal(child);
|
|
406
|
-
if (literal) {
|
|
407
|
-
size_t len = strlen(literal);
|
|
408
|
-
if (len >= remaining) {
|
|
409
|
-
size_t new_capacity = capacity * 2;
|
|
410
|
-
char *new_text = realloc(text, new_capacity);
|
|
411
|
-
if (!new_text) {
|
|
412
|
-
free(text);
|
|
413
|
-
return strdup("");
|
|
414
|
-
}
|
|
415
|
-
write = new_text + (write - text);
|
|
416
|
-
text = new_text;
|
|
417
|
-
remaining = new_capacity - (write - text);
|
|
418
|
-
}
|
|
419
|
-
memcpy(write, literal, len);
|
|
420
|
-
write += len;
|
|
421
|
-
remaining -= len;
|
|
422
|
-
}
|
|
423
|
-
} else if (type == CMARK_NODE_CODE) {
|
|
424
|
-
const char *literal = cmark_node_get_literal(child);
|
|
425
|
-
if (literal) {
|
|
426
|
-
size_t len = strlen(literal);
|
|
427
|
-
if (len >= remaining) {
|
|
428
|
-
size_t new_capacity = capacity * 2;
|
|
429
|
-
char *new_text = realloc(text, new_capacity);
|
|
430
|
-
if (!new_text) {
|
|
431
|
-
free(text);
|
|
432
|
-
return strdup("");
|
|
433
|
-
}
|
|
434
|
-
write = new_text + (write - text);
|
|
435
|
-
text = new_text;
|
|
436
|
-
remaining = new_capacity - (write - text);
|
|
437
|
-
}
|
|
438
|
-
memcpy(write, literal, len);
|
|
439
|
-
write += len;
|
|
440
|
-
remaining -= len;
|
|
441
|
-
}
|
|
442
|
-
}
|
|
443
|
-
/* Skip other inline elements for ID generation */
|
|
444
|
-
|
|
454
|
+
extract_heading_text_recursive(child, &text, &write, &capacity, &remaining);
|
|
445
455
|
child = cmark_node_next(child);
|
|
446
456
|
}
|
|
447
457
|
|
|
@@ -562,43 +572,91 @@ bool apex_extract_manual_header_id(char **heading_text, char **manual_id_out) {
|
|
|
562
572
|
return false;
|
|
563
573
|
}
|
|
564
574
|
|
|
575
|
+
/**
|
|
576
|
+
* Extract plain text from a link node (for simple [ref] style).
|
|
577
|
+
* Returns allocated string or NULL.
|
|
578
|
+
*/
|
|
579
|
+
static char *get_link_label_text(cmark_node *link_node) {
|
|
580
|
+
if (!link_node || cmark_node_get_type(link_node) != CMARK_NODE_LINK) return NULL;
|
|
581
|
+
cmark_node *child = cmark_node_first_child(link_node);
|
|
582
|
+
if (!child || cmark_node_get_type(child) != CMARK_NODE_TEXT) return NULL;
|
|
583
|
+
const char *literal = cmark_node_get_literal(child);
|
|
584
|
+
return literal ? strdup(literal) : NULL;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
/**
|
|
588
|
+
* Check if a string is a valid MMD heading ID (no spaces, no metadata %).
|
|
589
|
+
*/
|
|
590
|
+
static bool is_valid_mmd_id(const char *s) {
|
|
591
|
+
if (!s || !*s) return false;
|
|
592
|
+
for (; *s; s++) {
|
|
593
|
+
if (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r' || *s == '%') return false;
|
|
594
|
+
}
|
|
595
|
+
return true;
|
|
596
|
+
}
|
|
597
|
+
|
|
565
598
|
/**
|
|
566
599
|
* Process manual header IDs in a heading node
|
|
567
600
|
* Extracts MMD [id] or Kramdown {#id} syntax and stores ID in user_data
|
|
568
601
|
* Updates the heading text node to remove the manual ID syntax
|
|
602
|
+
*
|
|
603
|
+
* Walks ALL text children (not just first) so headings split by "&" etc.
|
|
604
|
+
* (e.g. TEXT + HTML_INLINE + TEXT) are handled - the IAL may be in a later child.
|
|
605
|
+
*
|
|
606
|
+
* Edge case: When [id] matches a link reference and would render as a link,
|
|
607
|
+
* but [id] is the last element in the heading with other content before it,
|
|
608
|
+
* treat it as MMD heading ID (not a link). This avoids the conflict where
|
|
609
|
+
* "# Heading [mermaid]" with "[mermaid]: URL" would wrongly render mermaid as
|
|
610
|
+
* a link. If the heading is ONLY [id] (e.g. "# [mermaid]"), keep it as a link
|
|
611
|
+
* to avoid empty headings.
|
|
569
612
|
*/
|
|
570
613
|
bool apex_process_manual_header_id(cmark_node *heading_node) {
|
|
571
614
|
if (!heading_node || cmark_node_get_type(heading_node) != CMARK_NODE_HEADING) {
|
|
572
615
|
return false;
|
|
573
616
|
}
|
|
574
617
|
|
|
575
|
-
/*
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
618
|
+
/* Check each TEXT child for manual ID - "&" etc. can split content across nodes.
|
|
619
|
+
Prefer the rightmost (last) match to align with IAL behavior. */
|
|
620
|
+
cmark_node *match_node = NULL;
|
|
621
|
+
char *match_text = NULL;
|
|
622
|
+
char *match_id = NULL;
|
|
580
623
|
|
|
581
|
-
|
|
582
|
-
|
|
624
|
+
for (cmark_node *child = cmark_node_first_child(heading_node); child;
|
|
625
|
+
child = cmark_node_next(child)) {
|
|
626
|
+
if (cmark_node_get_type(child) != CMARK_NODE_TEXT) continue;
|
|
583
627
|
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
if (!text_copy) return false;
|
|
628
|
+
const char *literal = cmark_node_get_literal(child);
|
|
629
|
+
if (!literal) continue;
|
|
587
630
|
|
|
588
|
-
|
|
589
|
-
|
|
631
|
+
char *text_copy = strdup(literal);
|
|
632
|
+
if (!text_copy) continue;
|
|
590
633
|
|
|
591
|
-
|
|
634
|
+
char *manual_id = NULL;
|
|
635
|
+
bool found = apex_extract_manual_header_id(&text_copy, &manual_id);
|
|
636
|
+
|
|
637
|
+
if (found && manual_id) {
|
|
638
|
+
/* Discard previous match - we want the rightmost */
|
|
639
|
+
free(match_text);
|
|
640
|
+
free(match_id);
|
|
641
|
+
match_node = child;
|
|
642
|
+
match_text = text_copy;
|
|
643
|
+
match_id = manual_id;
|
|
644
|
+
} else {
|
|
645
|
+
free(text_copy);
|
|
646
|
+
if (manual_id) free(manual_id);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
if (match_node && match_id) {
|
|
592
651
|
/* Store ID in user_data as id="..." */
|
|
593
|
-
char *id_attr = malloc(strlen(
|
|
652
|
+
char *id_attr = malloc(strlen(match_id) + 6); /* id="" + null */
|
|
594
653
|
if (id_attr) {
|
|
595
|
-
sprintf(id_attr, "id=\"%s\"",
|
|
654
|
+
sprintf(id_attr, "id=\"%s\"", match_id);
|
|
596
655
|
|
|
597
|
-
/* Merge with existing user_data if present */
|
|
656
|
+
/* Merge with existing user_data if present (e.g. from IAL) */
|
|
598
657
|
char *existing = (char *)cmark_node_get_user_data(heading_node);
|
|
599
658
|
if (existing) {
|
|
600
|
-
|
|
601
|
-
char *combined = malloc(strlen(existing) + strlen(id_attr) + 2); /* + space + null */
|
|
659
|
+
char *combined = malloc(strlen(existing) + strlen(id_attr) + 2);
|
|
602
660
|
if (combined) {
|
|
603
661
|
sprintf(combined, "%s %s", existing, id_attr);
|
|
604
662
|
cmark_node_set_user_data(heading_node, combined);
|
|
@@ -611,16 +669,65 @@ bool apex_process_manual_header_id(cmark_node *heading_node) {
|
|
|
611
669
|
}
|
|
612
670
|
}
|
|
613
671
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
free(manual_id);
|
|
618
|
-
free(text_copy);
|
|
672
|
+
cmark_node_set_literal(match_node, match_text);
|
|
673
|
+
free(match_id);
|
|
674
|
+
free(match_text);
|
|
619
675
|
return true;
|
|
620
676
|
}
|
|
621
677
|
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
678
|
+
/* Edge case: [id] was parsed as a link (ref existed). If it's the last
|
|
679
|
+
* element and there's other content, treat as MMD heading ID. */
|
|
680
|
+
cmark_node *last = NULL;
|
|
681
|
+
cmark_node *child = cmark_node_first_child(heading_node);
|
|
682
|
+
while (child) {
|
|
683
|
+
cmark_node_type t = cmark_node_get_type(child);
|
|
684
|
+
if (t != CMARK_NODE_SOFTBREAK && t != CMARK_NODE_LINEBREAK) {
|
|
685
|
+
last = child;
|
|
686
|
+
}
|
|
687
|
+
child = cmark_node_next(child);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
if (!last || cmark_node_get_type(last) != CMARK_NODE_LINK) return false;
|
|
691
|
+
|
|
692
|
+
/* Must have at least one sibling before the link (avoid empty headings) */
|
|
693
|
+
cmark_node *prev = cmark_node_previous(last);
|
|
694
|
+
if (!prev) return false;
|
|
695
|
+
|
|
696
|
+
char *link_text = get_link_label_text(last);
|
|
697
|
+
if (!link_text || !is_valid_mmd_id(link_text)) {
|
|
698
|
+
free(link_text);
|
|
699
|
+
return false;
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
/* Replace link with text node, set heading id */
|
|
703
|
+
cmark_node *text_replacement = cmark_node_new(CMARK_NODE_TEXT);
|
|
704
|
+
if (!text_replacement) {
|
|
705
|
+
free(link_text);
|
|
706
|
+
return false;
|
|
707
|
+
}
|
|
708
|
+
cmark_node_set_literal(text_replacement, link_text);
|
|
709
|
+
cmark_node_insert_before(last, text_replacement);
|
|
710
|
+
cmark_node_unlink(last);
|
|
711
|
+
cmark_node_free(last);
|
|
712
|
+
|
|
713
|
+
char *id_attr = malloc(strlen(link_text) + 6);
|
|
714
|
+
if (id_attr) {
|
|
715
|
+
sprintf(id_attr, "id=\"%s\"", link_text);
|
|
716
|
+
char *existing = (char *)cmark_node_get_user_data(heading_node);
|
|
717
|
+
if (existing) {
|
|
718
|
+
char *combined = malloc(strlen(existing) + strlen(id_attr) + 2);
|
|
719
|
+
if (combined) {
|
|
720
|
+
sprintf(combined, "%s %s", existing, id_attr);
|
|
721
|
+
cmark_node_set_user_data(heading_node, combined);
|
|
722
|
+
free(id_attr);
|
|
723
|
+
} else {
|
|
724
|
+
cmark_node_set_user_data(heading_node, id_attr);
|
|
725
|
+
}
|
|
726
|
+
} else {
|
|
727
|
+
cmark_node_set_user_data(heading_node, id_attr);
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
free(link_text);
|
|
731
|
+
return true;
|
|
625
732
|
}
|
|
626
733
|
|