qiita_marker 0.23.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +50 -0
  4. data/Rakefile +113 -0
  5. data/bin/qiita_marker +123 -0
  6. data/ext/qiita_marker/arena.c +103 -0
  7. data/ext/qiita_marker/autolink.c +425 -0
  8. data/ext/qiita_marker/autolink.h +8 -0
  9. data/ext/qiita_marker/blocks.c +1596 -0
  10. data/ext/qiita_marker/buffer.c +278 -0
  11. data/ext/qiita_marker/buffer.h +116 -0
  12. data/ext/qiita_marker/case_fold_switch.inc +4327 -0
  13. data/ext/qiita_marker/chunk.h +135 -0
  14. data/ext/qiita_marker/cmark-gfm-core-extensions.h +54 -0
  15. data/ext/qiita_marker/cmark-gfm-extension_api.h +736 -0
  16. data/ext/qiita_marker/cmark-gfm-extensions_export.h +42 -0
  17. data/ext/qiita_marker/cmark-gfm.h +817 -0
  18. data/ext/qiita_marker/cmark-gfm_export.h +42 -0
  19. data/ext/qiita_marker/cmark-gfm_version.h +7 -0
  20. data/ext/qiita_marker/cmark.c +55 -0
  21. data/ext/qiita_marker/cmark_ctype.c +44 -0
  22. data/ext/qiita_marker/cmark_ctype.h +33 -0
  23. data/ext/qiita_marker/commonmark.c +529 -0
  24. data/ext/qiita_marker/config.h +76 -0
  25. data/ext/qiita_marker/core-extensions.c +27 -0
  26. data/ext/qiita_marker/entities.inc +2138 -0
  27. data/ext/qiita_marker/ext_scanners.c +879 -0
  28. data/ext/qiita_marker/ext_scanners.h +24 -0
  29. data/ext/qiita_marker/extconf.rb +7 -0
  30. data/ext/qiita_marker/footnotes.c +63 -0
  31. data/ext/qiita_marker/footnotes.h +27 -0
  32. data/ext/qiita_marker/houdini.h +57 -0
  33. data/ext/qiita_marker/houdini_href_e.c +100 -0
  34. data/ext/qiita_marker/houdini_html_e.c +66 -0
  35. data/ext/qiita_marker/houdini_html_u.c +149 -0
  36. data/ext/qiita_marker/html.c +486 -0
  37. data/ext/qiita_marker/html.h +27 -0
  38. data/ext/qiita_marker/inlines.c +1691 -0
  39. data/ext/qiita_marker/inlines.h +29 -0
  40. data/ext/qiita_marker/iterator.c +159 -0
  41. data/ext/qiita_marker/iterator.h +26 -0
  42. data/ext/qiita_marker/latex.c +466 -0
  43. data/ext/qiita_marker/linked_list.c +37 -0
  44. data/ext/qiita_marker/man.c +278 -0
  45. data/ext/qiita_marker/map.c +122 -0
  46. data/ext/qiita_marker/map.h +41 -0
  47. data/ext/qiita_marker/node.c +979 -0
  48. data/ext/qiita_marker/node.h +125 -0
  49. data/ext/qiita_marker/parser.h +58 -0
  50. data/ext/qiita_marker/plaintext.c +235 -0
  51. data/ext/qiita_marker/plugin.c +36 -0
  52. data/ext/qiita_marker/plugin.h +34 -0
  53. data/ext/qiita_marker/qiita_marker.c +1321 -0
  54. data/ext/qiita_marker/qiita_marker.h +16 -0
  55. data/ext/qiita_marker/references.c +42 -0
  56. data/ext/qiita_marker/references.h +26 -0
  57. data/ext/qiita_marker/registry.c +63 -0
  58. data/ext/qiita_marker/registry.h +24 -0
  59. data/ext/qiita_marker/render.c +205 -0
  60. data/ext/qiita_marker/render.h +62 -0
  61. data/ext/qiita_marker/scanners.c +10520 -0
  62. data/ext/qiita_marker/scanners.h +62 -0
  63. data/ext/qiita_marker/scanners.re +341 -0
  64. data/ext/qiita_marker/strikethrough.c +167 -0
  65. data/ext/qiita_marker/strikethrough.h +9 -0
  66. data/ext/qiita_marker/syntax_extension.c +149 -0
  67. data/ext/qiita_marker/syntax_extension.h +34 -0
  68. data/ext/qiita_marker/table.c +822 -0
  69. data/ext/qiita_marker/table.h +12 -0
  70. data/ext/qiita_marker/tagfilter.c +60 -0
  71. data/ext/qiita_marker/tagfilter.h +8 -0
  72. data/ext/qiita_marker/tasklist.c +156 -0
  73. data/ext/qiita_marker/tasklist.h +8 -0
  74. data/ext/qiita_marker/utf8.c +317 -0
  75. data/ext/qiita_marker/utf8.h +35 -0
  76. data/ext/qiita_marker/xml.c +181 -0
  77. data/lib/qiita_marker/config.rb +52 -0
  78. data/lib/qiita_marker/node/inspect.rb +57 -0
  79. data/lib/qiita_marker/node.rb +83 -0
  80. data/lib/qiita_marker/renderer/html_renderer.rb +252 -0
  81. data/lib/qiita_marker/renderer.rb +135 -0
  82. data/lib/qiita_marker/version.rb +5 -0
  83. data/lib/qiita_marker.rb +45 -0
  84. data/qiita_marker.gemspec +40 -0
  85. data/test/benchmark.rb +32 -0
  86. data/test/fixtures/curly.md +1 -0
  87. data/test/fixtures/dingus.md +10 -0
  88. data/test/fixtures/strong.md +1 -0
  89. data/test/fixtures/table.md +10 -0
  90. data/test/test_attributes.rb +24 -0
  91. data/test/test_basics.rb +35 -0
  92. data/test/test_commands.rb +72 -0
  93. data/test/test_commonmark.rb +36 -0
  94. data/test/test_doc.rb +130 -0
  95. data/test/test_encoding.rb +23 -0
  96. data/test/test_extensions.rb +116 -0
  97. data/test/test_footnotes.rb +60 -0
  98. data/test/test_gc.rb +47 -0
  99. data/test/test_helper.rb +71 -0
  100. data/test/test_linebreaks.rb +15 -0
  101. data/test/test_maliciousness.rb +262 -0
  102. data/test/test_node.rb +89 -0
  103. data/test/test_options.rb +37 -0
  104. data/test/test_pathological_inputs.rb +94 -0
  105. data/test/test_plaintext.rb +46 -0
  106. data/test/test_renderer.rb +47 -0
  107. data/test/test_smartpunct.rb +27 -0
  108. data/test/test_spec.rb +30 -0
  109. data/test/test_tasklists.rb +43 -0
  110. data/test/test_xml.rb +107 -0
  111. metadata +313 -0
@@ -0,0 +1,24 @@
1
+ #include "chunk.h"
2
+ #include "cmark-gfm.h"
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
9
+ unsigned char *ptr, int len, bufsize_t offset);
10
+ bufsize_t _scan_table_start(const unsigned char *p);
11
+ bufsize_t _scan_table_cell(const unsigned char *p);
12
+ bufsize_t _scan_table_cell_end(const unsigned char *p);
13
+ bufsize_t _scan_table_row_end(const unsigned char *p);
14
+ bufsize_t _scan_tasklist(const unsigned char *p);
15
+
16
+ #define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
17
+ #define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
18
+ #define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
19
+ #define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
20
+ #define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
21
+
22
+ #ifdef __cplusplus
23
+ }
24
+ #endif
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+
5
+ $CFLAGS << ' -std=c99'
6
+
7
+ create_makefile('qiita_marker/qiita_marker')
@@ -0,0 +1,63 @@
1
+ #include "cmark-gfm.h"
2
+ #include "parser.h"
3
+ #include "footnotes.h"
4
+ #include "inlines.h"
5
+ #include "chunk.h"
6
+
7
+ static void footnote_free(cmark_map *map, cmark_map_entry *_ref) {
8
+ cmark_footnote *ref = (cmark_footnote *)_ref;
9
+ cmark_mem *mem = map->mem;
10
+ if (ref != NULL) {
11
+ mem->free(ref->entry.label);
12
+ if (ref->node)
13
+ cmark_node_free(ref->node);
14
+ mem->free(ref);
15
+ }
16
+ }
17
+
18
+ void cmark_footnote_create(cmark_map *map, cmark_node *node) {
19
+ cmark_footnote *ref;
20
+ unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal);
21
+
22
+ /* empty footnote name, or composed from only whitespace */
23
+ if (reflabel == NULL)
24
+ return;
25
+
26
+ assert(map->sorted == NULL);
27
+
28
+ ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref));
29
+ ref->entry.label = reflabel;
30
+ ref->node = node;
31
+ ref->entry.age = map->size;
32
+ ref->entry.next = map->refs;
33
+
34
+ map->refs = (cmark_map_entry *)ref;
35
+ map->size++;
36
+ }
37
+
38
+ cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
39
+ return cmark_map_new(mem, footnote_free);
40
+ }
41
+
42
+ // Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
43
+ // unlink all of the footnote nodes before freeing their memory.
44
+ //
45
+ // Sometimes, two (unused) footnote nodes can end up referencing each other,
46
+ // which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
47
+ // etc, can lead to a use-after-free error.
48
+ //
49
+ // Better to `unlink` every footnote node first, setting their next, prev, and
50
+ // parent pointers to NULL, and only then walk thru & free them up.
51
+ void cmark_unlink_footnotes_map(cmark_map *map) {
52
+ cmark_map_entry *ref;
53
+ cmark_map_entry *next;
54
+
55
+ ref = map->refs;
56
+ while(ref) {
57
+ next = ref->next;
58
+ if (((cmark_footnote *)ref)->node) {
59
+ cmark_node_unlink(((cmark_footnote *)ref)->node);
60
+ }
61
+ ref = next;
62
+ }
63
+ }
@@ -0,0 +1,27 @@
1
+ #ifndef CMARK_FOOTNOTES_H
2
+ #define CMARK_FOOTNOTES_H
3
+
4
+ #include "map.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ struct cmark_footnote {
11
+ cmark_map_entry entry;
12
+ cmark_node *node;
13
+ unsigned int ix;
14
+ };
15
+
16
+ typedef struct cmark_footnote cmark_footnote;
17
+
18
+ void cmark_footnote_create(cmark_map *map, cmark_node *node);
19
+ cmark_map *cmark_footnote_map_new(cmark_mem *mem);
20
+
21
+ void cmark_unlink_footnotes_map(cmark_map *map);
22
+
23
+ #ifdef __cplusplus
24
+ }
25
+ #endif
26
+
27
+ #endif
@@ -0,0 +1,57 @@
1
+ #ifndef CMARK_HOUDINI_H
2
+ #define CMARK_HOUDINI_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include <stdint.h>
9
+ #include "config.h"
10
+ #include "buffer.h"
11
+
12
+ #ifdef HAVE___BUILTIN_EXPECT
13
+ #define likely(x) __builtin_expect((x), 1)
14
+ #define unlikely(x) __builtin_expect((x), 0)
15
+ #else
16
+ #define likely(x) (x)
17
+ #define unlikely(x) (x)
18
+ #endif
19
+
20
+ #ifdef HOUDINI_USE_LOCALE
21
+ #define _isxdigit(c) isxdigit(c)
22
+ #define _isdigit(c) isdigit(c)
23
+ #else
24
+ /*
25
+ * Helper _isdigit methods -- do not trust the current locale
26
+ * */
27
+ #define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
28
+ #define _isdigit(c) ((c) >= '0' && (c) <= '9')
29
+ #endif
30
+
31
+ #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
32
+ #define HOUDINI_UNESCAPED_SIZE(x) (x)
33
+
34
+ CMARK_GFM_EXPORT
35
+ bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
36
+ bufsize_t size);
37
+ CMARK_GFM_EXPORT
38
+ int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
39
+ bufsize_t size);
40
+ CMARK_GFM_EXPORT
41
+ int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
42
+ bufsize_t size, int secure);
43
+ CMARK_GFM_EXPORT
44
+ int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
45
+ bufsize_t size);
46
+ CMARK_GFM_EXPORT
47
+ void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
48
+ bufsize_t size);
49
+ CMARK_GFM_EXPORT
50
+ int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
51
+ bufsize_t size);
52
+
53
+ #ifdef __cplusplus
54
+ }
55
+ #endif
56
+
57
+ #endif
@@ -0,0 +1,100 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /*
8
+ * The following characters will not be escaped:
9
+ *
10
+ * -_.+!*'(),%#@?=;:/,+&$~ alphanum
11
+ *
12
+ * Note that this character set is the addition of:
13
+ *
14
+ * - The characters which are safe to be in an URL
15
+ * - The characters which are *not* safe to be in
16
+ * an URL because they are RESERVED characters.
17
+ *
18
+ * We assume (lazily) that any RESERVED char that
19
+ * appears inside an URL is actually meant to
20
+ * have its native function (i.e. as an URL
21
+ * component/separator) and hence needs no escaping.
22
+ *
23
+ * There are two exceptions: the chacters & (amp)
24
+ * and ' (single quote) do not appear in the table.
25
+ * They are meant to appear in the URL as components,
26
+ * yet they require special HTML-entity escaping
27
+ * to generate valid HTML markup.
28
+ *
29
+ * All other characters will be escaped to %XX.
30
+ *
31
+ */
32
+ static const char HREF_SAFE[] = {
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
35
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
37
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
+ 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ };
45
+
46
+ int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
47
+ static const uint8_t hex_chars[] = "0123456789ABCDEF";
48
+ bufsize_t i = 0, org;
49
+ uint8_t hex_str[3];
50
+
51
+ hex_str[0] = '%';
52
+
53
+ while (i < size) {
54
+ org = i;
55
+ while (i < size && HREF_SAFE[src[i]] != 0)
56
+ i++;
57
+
58
+ if (likely(i > org))
59
+ cmark_strbuf_put(ob, src + org, i - org);
60
+
61
+ /* escaping */
62
+ if (i >= size)
63
+ break;
64
+
65
+ switch (src[i]) {
66
+ /* amp appears all the time in URLs, but needs
67
+ * HTML-entity escaping to be inside an href */
68
+ case '&':
69
+ cmark_strbuf_puts(ob, "&amp;");
70
+ break;
71
+
72
+ /* the single quote is a valid URL character
73
+ * according to the standard; it needs HTML
74
+ * entity escaping too */
75
+ case '\'':
76
+ cmark_strbuf_puts(ob, "&#x27;");
77
+ break;
78
+
79
+ /* the space can be escaped to %20 or a plus
80
+ * sign. we're going with the generic escape
81
+ * for now. the plus thing is more commonly seen
82
+ * when building GET strings */
83
+ #if 0
84
+ case ' ':
85
+ cmark_strbuf_putc(ob, '+');
86
+ break;
87
+ #endif
88
+
89
+ /* every other character goes with a %XX escaping */
90
+ default:
91
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
92
+ hex_str[2] = hex_chars[src[i] & 0xF];
93
+ cmark_strbuf_put(ob, hex_str, 3);
94
+ }
95
+
96
+ i++;
97
+ }
98
+
99
+ return 1;
100
+ }
@@ -0,0 +1,66 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ /**
8
+ * According to the OWASP rules:
9
+ *
10
+ * & --> &amp;
11
+ * < --> &lt;
12
+ * > --> &gt;
13
+ * " --> &quot;
14
+ * ' --> &#x27; &apos; is not recommended
15
+ * / --> &#x2F; forward slash is included as it helps end an HTML entity
16
+ *
17
+ */
18
+ static const char HTML_ESCAPE_TABLE[] = {
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ };
31
+
32
+ static const char *HTML_ESCAPES[] = {"", "&quot;", "&amp;", "&#39;",
33
+ "&#47;", "&lt;", "&gt;"};
34
+
35
+ int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
36
+ int secure) {
37
+ bufsize_t i = 0, org, esc = 0;
38
+
39
+ while (i < size) {
40
+ org = i;
41
+ while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
42
+ i++;
43
+
44
+ if (i > org)
45
+ cmark_strbuf_put(ob, src + org, i - org);
46
+
47
+ /* escaping */
48
+ if (unlikely(i >= size))
49
+ break;
50
+
51
+ /* The forward slash and single quote are only escaped in secure mode */
52
+ if ((src[i] == '/' || src[i] == '\'') && !secure) {
53
+ cmark_strbuf_putc(ob, src[i]);
54
+ } else {
55
+ cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
56
+ }
57
+
58
+ i++;
59
+ }
60
+
61
+ return 1;
62
+ }
63
+
64
+ int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
65
+ return houdini_escape_html0(ob, src, size, 1);
66
+ }
@@ -0,0 +1,149 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "buffer.h"
6
+ #include "houdini.h"
7
+ #include "utf8.h"
8
+ #include "entities.inc"
9
+
10
+ /* Binary tree lookup code for entities added by JGM */
11
+
12
+ static const unsigned char *S_lookup(int i, int low, int hi,
13
+ const unsigned char *s, int len) {
14
+ int j;
15
+ int cmp =
16
+ strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
17
+ if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
18
+ return (const unsigned char *)cmark_entities[i].bytes;
19
+ } else if (cmp <= 0 && i > low) {
20
+ j = i - ((i - low) / 2);
21
+ if (j == i)
22
+ j -= 1;
23
+ return S_lookup(j, low, i - 1, s, len);
24
+ } else if (cmp > 0 && i < hi) {
25
+ j = i + ((hi - i) / 2);
26
+ if (j == i)
27
+ j += 1;
28
+ return S_lookup(j, i + 1, hi, s, len);
29
+ } else {
30
+ return NULL;
31
+ }
32
+ }
33
+
34
+ static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
35
+ return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
36
+ }
37
+
38
+ bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
39
+ bufsize_t size) {
40
+ bufsize_t i = 0;
41
+
42
+ if (size >= 3 && src[0] == '#') {
43
+ int codepoint = 0;
44
+ int num_digits = 0;
45
+
46
+ if (_isdigit(src[1])) {
47
+ for (i = 1; i < size && _isdigit(src[i]); ++i) {
48
+ codepoint = (codepoint * 10) + (src[i] - '0');
49
+
50
+ if (codepoint >= 0x110000) {
51
+ // Keep counting digits but
52
+ // avoid integer overflow.
53
+ codepoint = 0x110000;
54
+ }
55
+ }
56
+
57
+ num_digits = i - 1;
58
+ }
59
+
60
+ else if (src[1] == 'x' || src[1] == 'X') {
61
+ for (i = 2; i < size && _isxdigit(src[i]); ++i) {
62
+ codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
63
+
64
+ if (codepoint >= 0x110000) {
65
+ // Keep counting digits but
66
+ // avoid integer overflow.
67
+ codepoint = 0x110000;
68
+ }
69
+ }
70
+
71
+ num_digits = i - 2;
72
+ }
73
+
74
+ if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
75
+ if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
76
+ codepoint >= 0x110000) {
77
+ codepoint = 0xFFFD;
78
+ }
79
+ cmark_utf8proc_encode_char(codepoint, ob);
80
+ return i + 1;
81
+ }
82
+ }
83
+
84
+ else {
85
+ if (size > CMARK_ENTITY_MAX_LENGTH)
86
+ size = CMARK_ENTITY_MAX_LENGTH;
87
+
88
+ for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
89
+ if (src[i] == ' ')
90
+ break;
91
+
92
+ if (src[i] == ';') {
93
+ const unsigned char *entity = S_lookup_entity(src, i);
94
+
95
+ if (entity != NULL) {
96
+ cmark_strbuf_puts(ob, (const char *)entity);
97
+ return i + 1;
98
+ }
99
+
100
+ break;
101
+ }
102
+ }
103
+ }
104
+
105
+ return 0;
106
+ }
107
+
108
+ int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
109
+ bufsize_t size) {
110
+ bufsize_t i = 0, org, ent;
111
+
112
+ while (i < size) {
113
+ org = i;
114
+ while (i < size && src[i] != '&')
115
+ i++;
116
+
117
+ if (likely(i > org)) {
118
+ if (unlikely(org == 0)) {
119
+ if (i >= size)
120
+ return 0;
121
+
122
+ cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
123
+ }
124
+
125
+ cmark_strbuf_put(ob, src + org, i - org);
126
+ }
127
+
128
+ /* escaping */
129
+ if (i >= size)
130
+ break;
131
+
132
+ i++;
133
+
134
+ ent = houdini_unescape_ent(ob, src + i, size - i);
135
+ i += ent;
136
+
137
+ /* not really an entity */
138
+ if (ent == 0)
139
+ cmark_strbuf_putc(ob, '&');
140
+ }
141
+
142
+ return 1;
143
+ }
144
+
145
+ void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
146
+ bufsize_t size) {
147
+ if (!houdini_unescape_html(ob, src, size))
148
+ cmark_strbuf_put(ob, src, size);
149
+ }