commonmarker 0.23.10 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +1182 -0
  3. data/Cargo.toml +7 -0
  4. data/README.md +217 -170
  5. data/ext/commonmarker/Cargo.toml +20 -0
  6. data/ext/commonmarker/extconf.rb +3 -6
  7. data/ext/commonmarker/src/lib.rs +103 -0
  8. data/ext/commonmarker/src/node.rs +1151 -0
  9. data/ext/commonmarker/src/options.rs +175 -0
  10. data/ext/commonmarker/src/plugins/syntax_highlighting.rs +166 -0
  11. data/ext/commonmarker/src/plugins.rs +6 -0
  12. data/ext/commonmarker/src/utils.rs +8 -0
  13. data/lib/commonmarker/config.rb +92 -40
  14. data/lib/commonmarker/constants.rb +7 -0
  15. data/lib/commonmarker/extension.rb +14 -0
  16. data/lib/commonmarker/node/ast.rb +8 -0
  17. data/lib/commonmarker/node/inspect.rb +14 -4
  18. data/lib/commonmarker/node.rb +29 -47
  19. data/lib/commonmarker/renderer.rb +1 -127
  20. data/lib/commonmarker/utils.rb +22 -0
  21. data/lib/commonmarker/version.rb +2 -2
  22. data/lib/commonmarker.rb +27 -25
  23. metadata +38 -186
  24. data/Rakefile +0 -109
  25. data/bin/commonmarker +0 -118
  26. data/commonmarker.gemspec +0 -38
  27. data/ext/commonmarker/arena.c +0 -104
  28. data/ext/commonmarker/autolink.c +0 -508
  29. data/ext/commonmarker/autolink.h +0 -8
  30. data/ext/commonmarker/blocks.c +0 -1622
  31. data/ext/commonmarker/buffer.c +0 -278
  32. data/ext/commonmarker/buffer.h +0 -116
  33. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  34. data/ext/commonmarker/chunk.h +0 -135
  35. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  36. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
  37. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  38. data/ext/commonmarker/cmark-gfm.h +0 -833
  39. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  40. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  41. data/ext/commonmarker/cmark.c +0 -55
  42. data/ext/commonmarker/cmark_ctype.c +0 -44
  43. data/ext/commonmarker/cmark_ctype.h +0 -33
  44. data/ext/commonmarker/commonmark.c +0 -514
  45. data/ext/commonmarker/commonmarker.c +0 -1308
  46. data/ext/commonmarker/commonmarker.h +0 -16
  47. data/ext/commonmarker/config.h +0 -76
  48. data/ext/commonmarker/core-extensions.c +0 -27
  49. data/ext/commonmarker/entities.inc +0 -2138
  50. data/ext/commonmarker/ext_scanners.c +0 -879
  51. data/ext/commonmarker/ext_scanners.h +0 -24
  52. data/ext/commonmarker/footnotes.c +0 -63
  53. data/ext/commonmarker/footnotes.h +0 -27
  54. data/ext/commonmarker/houdini.h +0 -57
  55. data/ext/commonmarker/houdini_href_e.c +0 -100
  56. data/ext/commonmarker/houdini_html_e.c +0 -66
  57. data/ext/commonmarker/houdini_html_u.c +0 -149
  58. data/ext/commonmarker/html.c +0 -502
  59. data/ext/commonmarker/html.h +0 -27
  60. data/ext/commonmarker/inlines.c +0 -1788
  61. data/ext/commonmarker/inlines.h +0 -29
  62. data/ext/commonmarker/iterator.c +0 -159
  63. data/ext/commonmarker/iterator.h +0 -26
  64. data/ext/commonmarker/latex.c +0 -468
  65. data/ext/commonmarker/linked_list.c +0 -37
  66. data/ext/commonmarker/man.c +0 -274
  67. data/ext/commonmarker/map.c +0 -129
  68. data/ext/commonmarker/map.h +0 -44
  69. data/ext/commonmarker/node.c +0 -1045
  70. data/ext/commonmarker/node.h +0 -167
  71. data/ext/commonmarker/parser.h +0 -59
  72. data/ext/commonmarker/plaintext.c +0 -218
  73. data/ext/commonmarker/plugin.c +0 -36
  74. data/ext/commonmarker/plugin.h +0 -34
  75. data/ext/commonmarker/references.c +0 -43
  76. data/ext/commonmarker/references.h +0 -26
  77. data/ext/commonmarker/registry.c +0 -63
  78. data/ext/commonmarker/registry.h +0 -24
  79. data/ext/commonmarker/render.c +0 -213
  80. data/ext/commonmarker/render.h +0 -62
  81. data/ext/commonmarker/scanners.c +0 -14056
  82. data/ext/commonmarker/scanners.h +0 -70
  83. data/ext/commonmarker/scanners.re +0 -341
  84. data/ext/commonmarker/strikethrough.c +0 -167
  85. data/ext/commonmarker/strikethrough.h +0 -9
  86. data/ext/commonmarker/syntax_extension.c +0 -149
  87. data/ext/commonmarker/syntax_extension.h +0 -34
  88. data/ext/commonmarker/table.c +0 -917
  89. data/ext/commonmarker/table.h +0 -12
  90. data/ext/commonmarker/tagfilter.c +0 -60
  91. data/ext/commonmarker/tagfilter.h +0 -8
  92. data/ext/commonmarker/tasklist.c +0 -156
  93. data/ext/commonmarker/tasklist.h +0 -8
  94. data/ext/commonmarker/utf8.c +0 -317
  95. data/ext/commonmarker/utf8.h +0 -35
  96. data/ext/commonmarker/xml.c +0 -182
  97. data/lib/commonmarker/renderer/html_renderer.rb +0 -256
@@ -1,24 +0,0 @@
1
- #include "chunk.h"
2
- #include "cmark-gfm.h"
3
-
4
- #ifdef __cplusplus
5
- extern "C" {
6
- #endif
7
-
8
- bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
9
- unsigned char *ptr, int len, bufsize_t offset);
10
- bufsize_t _scan_table_start(const unsigned char *p);
11
- bufsize_t _scan_table_cell(const unsigned char *p);
12
- bufsize_t _scan_table_cell_end(const unsigned char *p);
13
- bufsize_t _scan_table_row_end(const unsigned char *p);
14
- bufsize_t _scan_tasklist(const unsigned char *p);
15
-
16
- #define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
17
- #define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
18
- #define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
19
- #define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
20
- #define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
21
-
22
- #ifdef __cplusplus
23
- }
24
- #endif
@@ -1,63 +0,0 @@
1
- #include "cmark-gfm.h"
2
- #include "parser.h"
3
- #include "footnotes.h"
4
- #include "inlines.h"
5
- #include "chunk.h"
6
-
7
- static void footnote_free(cmark_map *map, cmark_map_entry *_ref) {
8
- cmark_footnote *ref = (cmark_footnote *)_ref;
9
- cmark_mem *mem = map->mem;
10
- if (ref != NULL) {
11
- mem->free(ref->entry.label);
12
- if (ref->node)
13
- cmark_node_free(ref->node);
14
- mem->free(ref);
15
- }
16
- }
17
-
18
- void cmark_footnote_create(cmark_map *map, cmark_node *node) {
19
- cmark_footnote *ref;
20
- unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal);
21
-
22
- /* empty footnote name, or composed from only whitespace */
23
- if (reflabel == NULL)
24
- return;
25
-
26
- assert(map->sorted == NULL);
27
-
28
- ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref));
29
- ref->entry.label = reflabel;
30
- ref->node = node;
31
- ref->entry.age = map->size;
32
- ref->entry.next = map->refs;
33
-
34
- map->refs = (cmark_map_entry *)ref;
35
- map->size++;
36
- }
37
-
38
- cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
39
- return cmark_map_new(mem, footnote_free);
40
- }
41
-
42
- // Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
43
- // unlink all of the footnote nodes before freeing their memory.
44
- //
45
- // Sometimes, two (unused) footnote nodes can end up referencing each other,
46
- // which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
47
- // etc, can lead to a use-after-free error.
48
- //
49
- // Better to `unlink` every footnote node first, setting their next, prev, and
50
- // parent pointers to NULL, and only then walk thru & free them up.
51
- void cmark_unlink_footnotes_map(cmark_map *map) {
52
- cmark_map_entry *ref;
53
- cmark_map_entry *next;
54
-
55
- ref = map->refs;
56
- while(ref) {
57
- next = ref->next;
58
- if (((cmark_footnote *)ref)->node) {
59
- cmark_node_unlink(((cmark_footnote *)ref)->node);
60
- }
61
- ref = next;
62
- }
63
- }
@@ -1,27 +0,0 @@
1
- #ifndef CMARK_FOOTNOTES_H
2
- #define CMARK_FOOTNOTES_H
3
-
4
- #include "map.h"
5
-
6
- #ifdef __cplusplus
7
- extern "C" {
8
- #endif
9
-
10
- struct cmark_footnote {
11
- cmark_map_entry entry;
12
- cmark_node *node;
13
- unsigned int ix;
14
- };
15
-
16
- typedef struct cmark_footnote cmark_footnote;
17
-
18
- void cmark_footnote_create(cmark_map *map, cmark_node *node);
19
- cmark_map *cmark_footnote_map_new(cmark_mem *mem);
20
-
21
- void cmark_unlink_footnotes_map(cmark_map *map);
22
-
23
- #ifdef __cplusplus
24
- }
25
- #endif
26
-
27
- #endif
@@ -1,57 +0,0 @@
1
- #ifndef CMARK_HOUDINI_H
2
- #define CMARK_HOUDINI_H
3
-
4
- #ifdef __cplusplus
5
- extern "C" {
6
- #endif
7
-
8
- #include <stdint.h>
9
- #include "config.h"
10
- #include "buffer.h"
11
-
12
- #ifdef HAVE___BUILTIN_EXPECT
13
- #define likely(x) __builtin_expect((x), 1)
14
- #define unlikely(x) __builtin_expect((x), 0)
15
- #else
16
- #define likely(x) (x)
17
- #define unlikely(x) (x)
18
- #endif
19
-
20
- #ifdef HOUDINI_USE_LOCALE
21
- #define _isxdigit(c) isxdigit(c)
22
- #define _isdigit(c) isdigit(c)
23
- #else
24
- /*
25
- * Helper _isdigit methods -- do not trust the current locale
26
- * */
27
- #define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
28
- #define _isdigit(c) ((c) >= '0' && (c) <= '9')
29
- #endif
30
-
31
- #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
32
- #define HOUDINI_UNESCAPED_SIZE(x) (x)
33
-
34
- CMARK_GFM_EXPORT
35
- bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
36
- bufsize_t size);
37
- CMARK_GFM_EXPORT
38
- int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
39
- bufsize_t size);
40
- CMARK_GFM_EXPORT
41
- int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
42
- bufsize_t size, int secure);
43
- CMARK_GFM_EXPORT
44
- int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
45
- bufsize_t size);
46
- CMARK_GFM_EXPORT
47
- void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
48
- bufsize_t size);
49
- CMARK_GFM_EXPORT
50
- int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
51
- bufsize_t size);
52
-
53
- #ifdef __cplusplus
54
- }
55
- #endif
56
-
57
- #endif
@@ -1,100 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
-
7
- /*
8
- * The following characters will not be escaped:
9
- *
10
- * -_.+!*'(),%#@?=;:/,+&$~ alphanum
11
- *
12
- * Note that this character set is the addition of:
13
- *
14
- * - The characters which are safe to be in an URL
15
- * - The characters which are *not* safe to be in
16
- * an URL because they are RESERVED characters.
17
- *
18
- * We assume (lazily) that any RESERVED char that
19
- * appears inside an URL is actually meant to
20
- * have its native function (i.e. as an URL
21
- * component/separator) and hence needs no escaping.
22
- *
23
- * There are two exceptions: the chacters & (amp)
24
- * and ' (single quote) do not appear in the table.
25
- * They are meant to appear in the URL as components,
26
- * yet they require special HTML-entity escaping
27
- * to generate valid HTML markup.
28
- *
29
- * All other characters will be escaped to %XX.
30
- *
31
- */
32
- static const char HREF_SAFE[] = {
33
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
35
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
37
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
- 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
- };
45
-
46
- int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
47
- static const uint8_t hex_chars[] = "0123456789ABCDEF";
48
- bufsize_t i = 0, org;
49
- uint8_t hex_str[3];
50
-
51
- hex_str[0] = '%';
52
-
53
- while (i < size) {
54
- org = i;
55
- while (i < size && HREF_SAFE[src[i]] != 0)
56
- i++;
57
-
58
- if (likely(i > org))
59
- cmark_strbuf_put(ob, src + org, i - org);
60
-
61
- /* escaping */
62
- if (i >= size)
63
- break;
64
-
65
- switch (src[i]) {
66
- /* amp appears all the time in URLs, but needs
67
- * HTML-entity escaping to be inside an href */
68
- case '&':
69
- cmark_strbuf_puts(ob, "&amp;");
70
- break;
71
-
72
- /* the single quote is a valid URL character
73
- * according to the standard; it needs HTML
74
- * entity escaping too */
75
- case '\'':
76
- cmark_strbuf_puts(ob, "&#x27;");
77
- break;
78
-
79
- /* the space can be escaped to %20 or a plus
80
- * sign. we're going with the generic escape
81
- * for now. the plus thing is more commonly seen
82
- * when building GET strings */
83
- #if 0
84
- case ' ':
85
- cmark_strbuf_putc(ob, '+');
86
- break;
87
- #endif
88
-
89
- /* every other character goes with a %XX escaping */
90
- default:
91
- hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
92
- hex_str[2] = hex_chars[src[i] & 0xF];
93
- cmark_strbuf_put(ob, hex_str, 3);
94
- }
95
-
96
- i++;
97
- }
98
-
99
- return 1;
100
- }
@@ -1,66 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
-
7
- /**
8
- * According to the OWASP rules:
9
- *
10
- * & --> &amp;
11
- * < --> &lt;
12
- * > --> &gt;
13
- * " --> &quot;
14
- * ' --> &#x27; &apos; is not recommended
15
- * / --> &#x2F; forward slash is included as it helps end an HTML entity
16
- *
17
- */
18
- static const char HTML_ESCAPE_TABLE[] = {
19
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
21
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
- };
31
-
32
- static const char *HTML_ESCAPES[] = {"", "&quot;", "&amp;", "&#39;",
33
- "&#47;", "&lt;", "&gt;"};
34
-
35
- int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
36
- int secure) {
37
- bufsize_t i = 0, org, esc = 0;
38
-
39
- while (i < size) {
40
- org = i;
41
- while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
42
- i++;
43
-
44
- if (i > org)
45
- cmark_strbuf_put(ob, src + org, i - org);
46
-
47
- /* escaping */
48
- if (unlikely(i >= size))
49
- break;
50
-
51
- /* The forward slash and single quote are only escaped in secure mode */
52
- if ((src[i] == '/' || src[i] == '\'') && !secure) {
53
- cmark_strbuf_putc(ob, src[i]);
54
- } else {
55
- cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
56
- }
57
-
58
- i++;
59
- }
60
-
61
- return 1;
62
- }
63
-
64
- int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
65
- return houdini_escape_html0(ob, src, size, 1);
66
- }
@@ -1,149 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "buffer.h"
6
- #include "houdini.h"
7
- #include "utf8.h"
8
- #include "entities.inc"
9
-
10
- /* Binary tree lookup code for entities added by JGM */
11
-
12
- static const unsigned char *S_lookup(int i, int low, int hi,
13
- const unsigned char *s, int len) {
14
- int j;
15
- int cmp =
16
- strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
17
- if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
18
- return (const unsigned char *)cmark_entities[i].bytes;
19
- } else if (cmp <= 0 && i > low) {
20
- j = i - ((i - low) / 2);
21
- if (j == i)
22
- j -= 1;
23
- return S_lookup(j, low, i - 1, s, len);
24
- } else if (cmp > 0 && i < hi) {
25
- j = i + ((hi - i) / 2);
26
- if (j == i)
27
- j += 1;
28
- return S_lookup(j, i + 1, hi, s, len);
29
- } else {
30
- return NULL;
31
- }
32
- }
33
-
34
- static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
35
- return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
36
- }
37
-
38
- bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
39
- bufsize_t size) {
40
- bufsize_t i = 0;
41
-
42
- if (size >= 3 && src[0] == '#') {
43
- int codepoint = 0;
44
- int num_digits = 0;
45
-
46
- if (_isdigit(src[1])) {
47
- for (i = 1; i < size && _isdigit(src[i]); ++i) {
48
- codepoint = (codepoint * 10) + (src[i] - '0');
49
-
50
- if (codepoint >= 0x110000) {
51
- // Keep counting digits but
52
- // avoid integer overflow.
53
- codepoint = 0x110000;
54
- }
55
- }
56
-
57
- num_digits = i - 1;
58
- }
59
-
60
- else if (src[1] == 'x' || src[1] == 'X') {
61
- for (i = 2; i < size && _isxdigit(src[i]); ++i) {
62
- codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
63
-
64
- if (codepoint >= 0x110000) {
65
- // Keep counting digits but
66
- // avoid integer overflow.
67
- codepoint = 0x110000;
68
- }
69
- }
70
-
71
- num_digits = i - 2;
72
- }
73
-
74
- if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
75
- if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
76
- codepoint >= 0x110000) {
77
- codepoint = 0xFFFD;
78
- }
79
- cmark_utf8proc_encode_char(codepoint, ob);
80
- return i + 1;
81
- }
82
- }
83
-
84
- else {
85
- if (size > CMARK_ENTITY_MAX_LENGTH)
86
- size = CMARK_ENTITY_MAX_LENGTH;
87
-
88
- for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
89
- if (src[i] == ' ')
90
- break;
91
-
92
- if (src[i] == ';') {
93
- const unsigned char *entity = S_lookup_entity(src, i);
94
-
95
- if (entity != NULL) {
96
- cmark_strbuf_puts(ob, (const char *)entity);
97
- return i + 1;
98
- }
99
-
100
- break;
101
- }
102
- }
103
- }
104
-
105
- return 0;
106
- }
107
-
108
- int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
109
- bufsize_t size) {
110
- bufsize_t i = 0, org, ent;
111
-
112
- while (i < size) {
113
- org = i;
114
- while (i < size && src[i] != '&')
115
- i++;
116
-
117
- if (likely(i > org)) {
118
- if (unlikely(org == 0)) {
119
- if (i >= size)
120
- return 0;
121
-
122
- cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
123
- }
124
-
125
- cmark_strbuf_put(ob, src + org, i - org);
126
- }
127
-
128
- /* escaping */
129
- if (i >= size)
130
- break;
131
-
132
- i++;
133
-
134
- ent = houdini_unescape_ent(ob, src + i, size - i);
135
- i += ent;
136
-
137
- /* not really an entity */
138
- if (ent == 0)
139
- cmark_strbuf_putc(ob, '&');
140
- }
141
-
142
- return 1;
143
- }
144
-
145
- void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
146
- bufsize_t size) {
147
- if (!houdini_unescape_html(ob, src, size))
148
- cmark_strbuf_put(ob, src, size);
149
- }