commonmarker 0.23.10 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +1221 -0
  3. data/Cargo.toml +7 -0
  4. data/README.md +233 -172
  5. data/ext/commonmarker/Cargo.toml +20 -0
  6. data/ext/commonmarker/extconf.rb +3 -6
  7. data/ext/commonmarker/src/lib.rs +103 -0
  8. data/ext/commonmarker/src/node.rs +1160 -0
  9. data/ext/commonmarker/src/options.rs +216 -0
  10. data/ext/commonmarker/src/plugins/syntax_highlighting.rs +166 -0
  11. data/ext/commonmarker/src/plugins.rs +6 -0
  12. data/ext/commonmarker/src/utils.rs +8 -0
  13. data/lib/commonmarker/config.rb +91 -40
  14. data/lib/commonmarker/constants.rb +7 -0
  15. data/lib/commonmarker/extension.rb +14 -0
  16. data/lib/commonmarker/node/ast.rb +8 -0
  17. data/lib/commonmarker/node/inspect.rb +14 -4
  18. data/lib/commonmarker/node.rb +29 -47
  19. data/lib/commonmarker/renderer.rb +1 -127
  20. data/lib/commonmarker/utils.rb +22 -0
  21. data/lib/commonmarker/version.rb +2 -2
  22. data/lib/commonmarker.rb +27 -25
  23. metadata +38 -186
  24. data/Rakefile +0 -109
  25. data/bin/commonmarker +0 -118
  26. data/commonmarker.gemspec +0 -38
  27. data/ext/commonmarker/arena.c +0 -104
  28. data/ext/commonmarker/autolink.c +0 -508
  29. data/ext/commonmarker/autolink.h +0 -8
  30. data/ext/commonmarker/blocks.c +0 -1622
  31. data/ext/commonmarker/buffer.c +0 -278
  32. data/ext/commonmarker/buffer.h +0 -116
  33. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  34. data/ext/commonmarker/chunk.h +0 -135
  35. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  36. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
  37. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  38. data/ext/commonmarker/cmark-gfm.h +0 -833
  39. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  40. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  41. data/ext/commonmarker/cmark.c +0 -55
  42. data/ext/commonmarker/cmark_ctype.c +0 -44
  43. data/ext/commonmarker/cmark_ctype.h +0 -33
  44. data/ext/commonmarker/commonmark.c +0 -514
  45. data/ext/commonmarker/commonmarker.c +0 -1308
  46. data/ext/commonmarker/commonmarker.h +0 -16
  47. data/ext/commonmarker/config.h +0 -76
  48. data/ext/commonmarker/core-extensions.c +0 -27
  49. data/ext/commonmarker/entities.inc +0 -2138
  50. data/ext/commonmarker/ext_scanners.c +0 -879
  51. data/ext/commonmarker/ext_scanners.h +0 -24
  52. data/ext/commonmarker/footnotes.c +0 -63
  53. data/ext/commonmarker/footnotes.h +0 -27
  54. data/ext/commonmarker/houdini.h +0 -57
  55. data/ext/commonmarker/houdini_href_e.c +0 -100
  56. data/ext/commonmarker/houdini_html_e.c +0 -66
  57. data/ext/commonmarker/houdini_html_u.c +0 -149
  58. data/ext/commonmarker/html.c +0 -502
  59. data/ext/commonmarker/html.h +0 -27
  60. data/ext/commonmarker/inlines.c +0 -1788
  61. data/ext/commonmarker/inlines.h +0 -29
  62. data/ext/commonmarker/iterator.c +0 -159
  63. data/ext/commonmarker/iterator.h +0 -26
  64. data/ext/commonmarker/latex.c +0 -468
  65. data/ext/commonmarker/linked_list.c +0 -37
  66. data/ext/commonmarker/man.c +0 -274
  67. data/ext/commonmarker/map.c +0 -129
  68. data/ext/commonmarker/map.h +0 -44
  69. data/ext/commonmarker/node.c +0 -1045
  70. data/ext/commonmarker/node.h +0 -167
  71. data/ext/commonmarker/parser.h +0 -59
  72. data/ext/commonmarker/plaintext.c +0 -218
  73. data/ext/commonmarker/plugin.c +0 -36
  74. data/ext/commonmarker/plugin.h +0 -34
  75. data/ext/commonmarker/references.c +0 -43
  76. data/ext/commonmarker/references.h +0 -26
  77. data/ext/commonmarker/registry.c +0 -63
  78. data/ext/commonmarker/registry.h +0 -24
  79. data/ext/commonmarker/render.c +0 -213
  80. data/ext/commonmarker/render.h +0 -62
  81. data/ext/commonmarker/scanners.c +0 -14056
  82. data/ext/commonmarker/scanners.h +0 -70
  83. data/ext/commonmarker/scanners.re +0 -341
  84. data/ext/commonmarker/strikethrough.c +0 -167
  85. data/ext/commonmarker/strikethrough.h +0 -9
  86. data/ext/commonmarker/syntax_extension.c +0 -149
  87. data/ext/commonmarker/syntax_extension.h +0 -34
  88. data/ext/commonmarker/table.c +0 -917
  89. data/ext/commonmarker/table.h +0 -12
  90. data/ext/commonmarker/tagfilter.c +0 -60
  91. data/ext/commonmarker/tagfilter.h +0 -8
  92. data/ext/commonmarker/tasklist.c +0 -156
  93. data/ext/commonmarker/tasklist.h +0 -8
  94. data/ext/commonmarker/utf8.c +0 -317
  95. data/ext/commonmarker/utf8.h +0 -35
  96. data/ext/commonmarker/xml.c +0 -182
  97. data/lib/commonmarker/renderer/html_renderer.rb +0 -256
@@ -1,24 +0,0 @@
1
- #include "chunk.h"
2
- #include "cmark-gfm.h"
3
-
4
- #ifdef __cplusplus
5
- extern "C" {
6
- #endif
7
-
8
- bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
9
- unsigned char *ptr, int len, bufsize_t offset);
10
- bufsize_t _scan_table_start(const unsigned char *p);
11
- bufsize_t _scan_table_cell(const unsigned char *p);
12
- bufsize_t _scan_table_cell_end(const unsigned char *p);
13
- bufsize_t _scan_table_row_end(const unsigned char *p);
14
- bufsize_t _scan_tasklist(const unsigned char *p);
15
-
16
- #define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
17
- #define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
18
- #define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
19
- #define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
20
- #define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
21
-
22
- #ifdef __cplusplus
23
- }
24
- #endif
@@ -1,63 +0,0 @@
1
- #include "cmark-gfm.h"
2
- #include "parser.h"
3
- #include "footnotes.h"
4
- #include "inlines.h"
5
- #include "chunk.h"
6
-
7
- static void footnote_free(cmark_map *map, cmark_map_entry *_ref) {
8
- cmark_footnote *ref = (cmark_footnote *)_ref;
9
- cmark_mem *mem = map->mem;
10
- if (ref != NULL) {
11
- mem->free(ref->entry.label);
12
- if (ref->node)
13
- cmark_node_free(ref->node);
14
- mem->free(ref);
15
- }
16
- }
17
-
18
- void cmark_footnote_create(cmark_map *map, cmark_node *node) {
19
- cmark_footnote *ref;
20
- unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal);
21
-
22
- /* empty footnote name, or composed from only whitespace */
23
- if (reflabel == NULL)
24
- return;
25
-
26
- assert(map->sorted == NULL);
27
-
28
- ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref));
29
- ref->entry.label = reflabel;
30
- ref->node = node;
31
- ref->entry.age = map->size;
32
- ref->entry.next = map->refs;
33
-
34
- map->refs = (cmark_map_entry *)ref;
35
- map->size++;
36
- }
37
-
38
- cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
39
- return cmark_map_new(mem, footnote_free);
40
- }
41
-
42
- // Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
43
- // unlink all of the footnote nodes before freeing their memory.
44
- //
45
- // Sometimes, two (unused) footnote nodes can end up referencing each other,
46
- // which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
47
- // etc, can lead to a use-after-free error.
48
- //
49
- // Better to `unlink` every footnote node first, setting their next, prev, and
50
- // parent pointers to NULL, and only then walk thru & free them up.
51
- void cmark_unlink_footnotes_map(cmark_map *map) {
52
- cmark_map_entry *ref;
53
- cmark_map_entry *next;
54
-
55
- ref = map->refs;
56
- while(ref) {
57
- next = ref->next;
58
- if (((cmark_footnote *)ref)->node) {
59
- cmark_node_unlink(((cmark_footnote *)ref)->node);
60
- }
61
- ref = next;
62
- }
63
- }
@@ -1,27 +0,0 @@
1
- #ifndef CMARK_FOOTNOTES_H
2
- #define CMARK_FOOTNOTES_H
3
-
4
- #include "map.h"
5
-
6
- #ifdef __cplusplus
7
- extern "C" {
8
- #endif
9
-
10
- struct cmark_footnote {
11
- cmark_map_entry entry;
12
- cmark_node *node;
13
- unsigned int ix;
14
- };
15
-
16
- typedef struct cmark_footnote cmark_footnote;
17
-
18
- void cmark_footnote_create(cmark_map *map, cmark_node *node);
19
- cmark_map *cmark_footnote_map_new(cmark_mem *mem);
20
-
21
- void cmark_unlink_footnotes_map(cmark_map *map);
22
-
23
- #ifdef __cplusplus
24
- }
25
- #endif
26
-
27
- #endif
@@ -1,57 +0,0 @@
1
- #ifndef CMARK_HOUDINI_H
2
- #define CMARK_HOUDINI_H
3
-
4
- #ifdef __cplusplus
5
- extern "C" {
6
- #endif
7
-
8
- #include <stdint.h>
9
- #include "config.h"
10
- #include "buffer.h"
11
-
12
- #ifdef HAVE___BUILTIN_EXPECT
13
- #define likely(x) __builtin_expect((x), 1)
14
- #define unlikely(x) __builtin_expect((x), 0)
15
- #else
16
- #define likely(x) (x)
17
- #define unlikely(x) (x)
18
- #endif
19
-
20
- #ifdef HOUDINI_USE_LOCALE
21
- #define _isxdigit(c) isxdigit(c)
22
- #define _isdigit(c) isdigit(c)
23
- #else
24
- /*
25
- * Helper _isdigit methods -- do not trust the current locale
26
- * */
27
- #define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
28
- #define _isdigit(c) ((c) >= '0' && (c) <= '9')
29
- #endif
30
-
31
- #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
32
- #define HOUDINI_UNESCAPED_SIZE(x) (x)
33
-
34
- CMARK_GFM_EXPORT
35
- bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
36
- bufsize_t size);
37
- CMARK_GFM_EXPORT
38
- int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
39
- bufsize_t size);
40
- CMARK_GFM_EXPORT
41
- int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
42
- bufsize_t size, int secure);
43
- CMARK_GFM_EXPORT
44
- int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
45
- bufsize_t size);
46
- CMARK_GFM_EXPORT
47
- void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
48
- bufsize_t size);
49
- CMARK_GFM_EXPORT
50
- int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
51
- bufsize_t size);
52
-
53
- #ifdef __cplusplus
54
- }
55
- #endif
56
-
57
- #endif
@@ -1,100 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
-
7
- /*
8
- * The following characters will not be escaped:
9
- *
10
- * -_.+!*'(),%#@?=;:/,+&$~ alphanum
11
- *
12
- * Note that this character set is the addition of:
13
- *
14
- * - The characters which are safe to be in an URL
15
- * - The characters which are *not* safe to be in
16
- * an URL because they are RESERVED characters.
17
- *
18
- * We assume (lazily) that any RESERVED char that
19
- * appears inside an URL is actually meant to
20
- * have its native function (i.e. as an URL
21
- * component/separator) and hence needs no escaping.
22
- *
23
- * There are two exceptions: the chacters & (amp)
24
- * and ' (single quote) do not appear in the table.
25
- * They are meant to appear in the URL as components,
26
- * yet they require special HTML-entity escaping
27
- * to generate valid HTML markup.
28
- *
29
- * All other characters will be escaped to %XX.
30
- *
31
- */
32
- static const char HREF_SAFE[] = {
33
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
35
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
37
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
- 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
- };
45
-
46
- int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
47
- static const uint8_t hex_chars[] = "0123456789ABCDEF";
48
- bufsize_t i = 0, org;
49
- uint8_t hex_str[3];
50
-
51
- hex_str[0] = '%';
52
-
53
- while (i < size) {
54
- org = i;
55
- while (i < size && HREF_SAFE[src[i]] != 0)
56
- i++;
57
-
58
- if (likely(i > org))
59
- cmark_strbuf_put(ob, src + org, i - org);
60
-
61
- /* escaping */
62
- if (i >= size)
63
- break;
64
-
65
- switch (src[i]) {
66
- /* amp appears all the time in URLs, but needs
67
- * HTML-entity escaping to be inside an href */
68
- case '&':
69
- cmark_strbuf_puts(ob, "&amp;");
70
- break;
71
-
72
- /* the single quote is a valid URL character
73
- * according to the standard; it needs HTML
74
- * entity escaping too */
75
- case '\'':
76
- cmark_strbuf_puts(ob, "&#x27;");
77
- break;
78
-
79
- /* the space can be escaped to %20 or a plus
80
- * sign. we're going with the generic escape
81
- * for now. the plus thing is more commonly seen
82
- * when building GET strings */
83
- #if 0
84
- case ' ':
85
- cmark_strbuf_putc(ob, '+');
86
- break;
87
- #endif
88
-
89
- /* every other character goes with a %XX escaping */
90
- default:
91
- hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
92
- hex_str[2] = hex_chars[src[i] & 0xF];
93
- cmark_strbuf_put(ob, hex_str, 3);
94
- }
95
-
96
- i++;
97
- }
98
-
99
- return 1;
100
- }
@@ -1,66 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "houdini.h"
6
-
7
- /**
8
- * According to the OWASP rules:
9
- *
10
- * & --> &amp;
11
- * < --> &lt;
12
- * > --> &gt;
13
- * " --> &quot;
14
- * ' --> &#x27; &apos; is not recommended
15
- * / --> &#x2F; forward slash is included as it helps end an HTML entity
16
- *
17
- */
18
- static const char HTML_ESCAPE_TABLE[] = {
19
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
21
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
- };
31
-
32
- static const char *HTML_ESCAPES[] = {"", "&quot;", "&amp;", "&#39;",
33
- "&#47;", "&lt;", "&gt;"};
34
-
35
- int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
36
- int secure) {
37
- bufsize_t i = 0, org, esc = 0;
38
-
39
- while (i < size) {
40
- org = i;
41
- while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
42
- i++;
43
-
44
- if (i > org)
45
- cmark_strbuf_put(ob, src + org, i - org);
46
-
47
- /* escaping */
48
- if (unlikely(i >= size))
49
- break;
50
-
51
- /* The forward slash and single quote are only escaped in secure mode */
52
- if ((src[i] == '/' || src[i] == '\'') && !secure) {
53
- cmark_strbuf_putc(ob, src[i]);
54
- } else {
55
- cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
56
- }
57
-
58
- i++;
59
- }
60
-
61
- return 1;
62
- }
63
-
64
- int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
65
- return houdini_escape_html0(ob, src, size, 1);
66
- }
@@ -1,149 +0,0 @@
1
- #include <assert.h>
2
- #include <stdio.h>
3
- #include <string.h>
4
-
5
- #include "buffer.h"
6
- #include "houdini.h"
7
- #include "utf8.h"
8
- #include "entities.inc"
9
-
10
- /* Binary tree lookup code for entities added by JGM */
11
-
12
- static const unsigned char *S_lookup(int i, int low, int hi,
13
- const unsigned char *s, int len) {
14
- int j;
15
- int cmp =
16
- strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
17
- if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
18
- return (const unsigned char *)cmark_entities[i].bytes;
19
- } else if (cmp <= 0 && i > low) {
20
- j = i - ((i - low) / 2);
21
- if (j == i)
22
- j -= 1;
23
- return S_lookup(j, low, i - 1, s, len);
24
- } else if (cmp > 0 && i < hi) {
25
- j = i + ((hi - i) / 2);
26
- if (j == i)
27
- j += 1;
28
- return S_lookup(j, i + 1, hi, s, len);
29
- } else {
30
- return NULL;
31
- }
32
- }
33
-
34
- static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
35
- return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
36
- }
37
-
38
- bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
39
- bufsize_t size) {
40
- bufsize_t i = 0;
41
-
42
- if (size >= 3 && src[0] == '#') {
43
- int codepoint = 0;
44
- int num_digits = 0;
45
-
46
- if (_isdigit(src[1])) {
47
- for (i = 1; i < size && _isdigit(src[i]); ++i) {
48
- codepoint = (codepoint * 10) + (src[i] - '0');
49
-
50
- if (codepoint >= 0x110000) {
51
- // Keep counting digits but
52
- // avoid integer overflow.
53
- codepoint = 0x110000;
54
- }
55
- }
56
-
57
- num_digits = i - 1;
58
- }
59
-
60
- else if (src[1] == 'x' || src[1] == 'X') {
61
- for (i = 2; i < size && _isxdigit(src[i]); ++i) {
62
- codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
63
-
64
- if (codepoint >= 0x110000) {
65
- // Keep counting digits but
66
- // avoid integer overflow.
67
- codepoint = 0x110000;
68
- }
69
- }
70
-
71
- num_digits = i - 2;
72
- }
73
-
74
- if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
75
- if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
76
- codepoint >= 0x110000) {
77
- codepoint = 0xFFFD;
78
- }
79
- cmark_utf8proc_encode_char(codepoint, ob);
80
- return i + 1;
81
- }
82
- }
83
-
84
- else {
85
- if (size > CMARK_ENTITY_MAX_LENGTH)
86
- size = CMARK_ENTITY_MAX_LENGTH;
87
-
88
- for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
89
- if (src[i] == ' ')
90
- break;
91
-
92
- if (src[i] == ';') {
93
- const unsigned char *entity = S_lookup_entity(src, i);
94
-
95
- if (entity != NULL) {
96
- cmark_strbuf_puts(ob, (const char *)entity);
97
- return i + 1;
98
- }
99
-
100
- break;
101
- }
102
- }
103
- }
104
-
105
- return 0;
106
- }
107
-
108
- int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
109
- bufsize_t size) {
110
- bufsize_t i = 0, org, ent;
111
-
112
- while (i < size) {
113
- org = i;
114
- while (i < size && src[i] != '&')
115
- i++;
116
-
117
- if (likely(i > org)) {
118
- if (unlikely(org == 0)) {
119
- if (i >= size)
120
- return 0;
121
-
122
- cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
123
- }
124
-
125
- cmark_strbuf_put(ob, src + org, i - org);
126
- }
127
-
128
- /* escaping */
129
- if (i >= size)
130
- break;
131
-
132
- i++;
133
-
134
- ent = houdini_unescape_ent(ob, src + i, size - i);
135
- i += ent;
136
-
137
- /* not really an entity */
138
- if (ent == 0)
139
- cmark_strbuf_putc(ob, '&');
140
- }
141
-
142
- return 1;
143
- }
144
-
145
- void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
146
- bufsize_t size) {
147
- if (!houdini_unescape_html(ob, src, size))
148
- cmark_strbuf_put(ob, src, size);
149
- }