redcarpet_yt 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/COPYING +20 -0
  3. data/Gemfile +9 -0
  4. data/README.markdown +394 -0
  5. data/Rakefile +60 -0
  6. data/bin/redcarpet +7 -0
  7. data/ext/redcarpet/autolink.c +302 -0
  8. data/ext/redcarpet/autolink.h +55 -0
  9. data/ext/redcarpet/buffer.c +203 -0
  10. data/ext/redcarpet/buffer.h +89 -0
  11. data/ext/redcarpet/extconf.rb +6 -0
  12. data/ext/redcarpet/houdini.h +51 -0
  13. data/ext/redcarpet/houdini_href_e.c +124 -0
  14. data/ext/redcarpet/houdini_html_e.c +105 -0
  15. data/ext/redcarpet/html.c +825 -0
  16. data/ext/redcarpet/html.h +84 -0
  17. data/ext/redcarpet/html_blocks.h +229 -0
  18. data/ext/redcarpet/html_smartypants.c +457 -0
  19. data/ext/redcarpet/markdown.c +2917 -0
  20. data/ext/redcarpet/markdown.h +143 -0
  21. data/ext/redcarpet/rc_markdown.c +168 -0
  22. data/ext/redcarpet/rc_render.c +545 -0
  23. data/ext/redcarpet/redcarpet.h +52 -0
  24. data/ext/redcarpet/stack.c +84 -0
  25. data/ext/redcarpet/stack.h +48 -0
  26. data/lib/redcarpet/cli.rb +86 -0
  27. data/lib/redcarpet/compat.rb +73 -0
  28. data/lib/redcarpet/render_man.rb +65 -0
  29. data/lib/redcarpet/render_strip.rb +60 -0
  30. data/lib/redcarpet_yt.rb +103 -0
  31. data/redcarpet_yt.gemspec +71 -0
  32. data/test/benchmark.rb +24 -0
  33. data/test/custom_render_test.rb +28 -0
  34. data/test/fixtures/benchmark.md +232 -0
  35. data/test/html5_test.rb +69 -0
  36. data/test/html_render_test.rb +254 -0
  37. data/test/html_toc_render_test.rb +75 -0
  38. data/test/markdown_test.rb +371 -0
  39. data/test/pathological_inputs_test.rb +34 -0
  40. data/test/redcarpet_bin_test.rb +80 -0
  41. data/test/redcarpet_compat_test.rb +38 -0
  42. data/test/safe_render_test.rb +35 -0
  43. data/test/smarty_html_test.rb +45 -0
  44. data/test/smarty_pants_test.rb +53 -0
  45. data/test/stripdown_render_test.rb +61 -0
  46. data/test/test_helper.rb +39 -0
  47. metadata +151 -0
@@ -0,0 +1,84 @@
1
+ /*
2
+ * Copyright (c) 2015, Vicent Marti
3
+ *
4
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ * of this software and associated documentation files (the "Software"), to deal
6
+ * in the Software without restriction, including without limitation the rights
7
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ * copies of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be included in
12
+ * all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ * THE SOFTWARE.
21
+ */
22
+
23
+ #ifndef HTML_H__
24
+ #define HTML_H__
25
+
26
+ #include "markdown.h"
27
+ #include "buffer.h"
28
+ #include <stdlib.h>
29
+
30
+ #ifdef __cplusplus
31
+ extern "C" {
32
+ #endif
33
+
34
+ struct html_renderopt {
35
+ struct {
36
+ int current_level;
37
+ int level_offset;
38
+ int nesting_level;
39
+ } toc_data;
40
+
41
+ unsigned int flags;
42
+
43
+ /* extra callbacks */
44
+ void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
45
+ };
46
+
47
+ typedef enum {
48
+ HTML_SKIP_HTML = (1 << 0),
49
+ HTML_SKIP_STYLE = (1 << 1),
50
+ HTML_SKIP_IMAGES = (1 << 2),
51
+ HTML_SKIP_LINKS = (1 << 3),
52
+ HTML_EXPAND_TABS = (1 << 4),
53
+ HTML_SAFELINK = (1 << 5),
54
+ HTML_TOC = (1 << 6),
55
+ HTML_HARD_WRAP = (1 << 7),
56
+ HTML_USE_XHTML = (1 << 8),
57
+ HTML_ESCAPE = (1 << 9),
58
+ HTML_PRETTIFY = (1 << 10),
59
+ } html_render_mode;
60
+
61
+ typedef enum {
62
+ HTML_TAG_NONE = 0,
63
+ HTML_TAG_OPEN,
64
+ HTML_TAG_CLOSE,
65
+ } html_tag;
66
+
67
+ int
68
+ sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
69
+
70
+ extern void
71
+ sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
72
+
73
+ extern void
74
+ sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
75
+
76
+ extern void
77
+ sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
78
+
79
+ #ifdef __cplusplus
80
+ }
81
+ #endif
82
+
83
+ #endif
84
+
@@ -0,0 +1,229 @@
1
+ /* C code produced by gperf version 3.0.4 */
2
+ /* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
3
+ /* See http://git.io/RN0ncw for the list of recognized elements */
4
+ /* Computed positions: -k'1-2' */
5
+
6
+ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
7
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
8
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
9
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
10
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
11
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
12
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
13
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
14
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
15
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
16
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
17
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
18
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
19
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
20
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
21
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
22
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
23
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
24
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
25
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
26
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
27
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
28
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
29
+ /* The character set is not based on ISO-646. */
30
+ error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
31
+ #endif
32
+
33
+ /* maximum key range = 67, duplicates = 0 */
34
+
35
+ #ifndef GPERF_DOWNCASE
36
+ #define GPERF_DOWNCASE 1
37
+ static unsigned char gperf_downcase[256] =
38
+ {
39
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
40
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
41
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
42
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
43
+ 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
44
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
45
+ 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
46
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
47
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
48
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
49
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
50
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
51
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
52
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
53
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
54
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
55
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
56
+ 255
57
+ };
58
+ #endif
59
+
60
+ #ifndef GPERF_CASE_STRNCMP
61
+ #define GPERF_CASE_STRNCMP 1
62
+ static int
63
+ gperf_case_strncmp (s1, s2, n)
64
+ register const char *s1;
65
+ register const char *s2;
66
+ register unsigned int n;
67
+ {
68
+ for (; n > 0;)
69
+ {
70
+ unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
71
+ unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
72
+ if (c1 != 0 && c1 == c2)
73
+ {
74
+ n--;
75
+ continue;
76
+ }
77
+ return (int)c1 - (int)c2;
78
+ }
79
+ return 0;
80
+ }
81
+ #endif
82
+
83
+ #ifdef __GNUC__
84
+ __inline
85
+ #else
86
+ #ifdef __cplusplus
87
+ inline
88
+ #endif
89
+ #endif
90
+ static unsigned int
91
+ hash_block_tag (str, len)
92
+ register const char *str;
93
+ register unsigned int len;
94
+ {
95
+ static const unsigned char asso_values[] =
96
+ {
97
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
98
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
99
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
100
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
101
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
102
+ 55, 50, 45, 40, 35, 30, 68, 68, 68, 68,
103
+ 68, 68, 68, 68, 68, 15, 10, 15, 15, 15,
104
+ 0, 20, 10, 10, 5, 68, 68, 0, 20, 25,
105
+ 0, 68, 68, 0, 25, 0, 15, 68, 68, 68,
106
+ 68, 68, 68, 68, 68, 68, 68, 15, 10, 15,
107
+ 15, 15, 0, 20, 10, 10, 5, 68, 68, 0,
108
+ 20, 25, 0, 68, 68, 0, 25, 0, 15, 68,
109
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
110
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
111
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
112
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
113
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
114
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
115
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
116
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
117
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
118
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
119
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
120
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
121
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
122
+ 68, 68, 68, 68, 68, 68, 68
123
+ };
124
+ register int hval = len;
125
+
126
+ switch (hval)
127
+ {
128
+ default:
129
+ hval += asso_values[(unsigned char)str[1]+1];
130
+ /*FALLTHROUGH*/
131
+ case 1:
132
+ hval += asso_values[(unsigned char)str[0]];
133
+ break;
134
+ }
135
+ return hval;
136
+ }
137
+
138
+ #ifdef __GNUC__
139
+ __inline
140
+ #if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
141
+ __attribute__ ((__gnu_inline__))
142
+ #endif
143
+ #endif
144
+ const char *
145
+ find_block_tag (str, len)
146
+ register const char *str;
147
+ register unsigned int len;
148
+ {
149
+ enum
150
+ {
151
+ TOTAL_KEYWORDS = 41,
152
+ MIN_WORD_LENGTH = 1,
153
+ MAX_WORD_LENGTH = 10,
154
+ MIN_HASH_VALUE = 1,
155
+ MAX_HASH_VALUE = 67
156
+ };
157
+
158
+ static const char * const wordlist[] =
159
+ {
160
+ "",
161
+ "p",
162
+ "ul",
163
+ "pre",
164
+ "form",
165
+ "style",
166
+ "footer",
167
+ "section",
168
+ "", "", "",
169
+ "figure",
170
+ "hr",
171
+ "fieldset",
172
+ "math",
173
+ "figcaption",
174
+ "header",
175
+ "dl",
176
+ "del",
177
+ "",
178
+ "blockquote",
179
+ "script",
180
+ "article",
181
+ "div",
182
+ "",
183
+ "video",
184
+ "hgroup",
185
+ "ol",
186
+ "noscript",
187
+ "", "",
188
+ "canvas",
189
+ "dd",
190
+ "nav",
191
+ "abbr",
192
+ "audio",
193
+ "iframe",
194
+ "address",
195
+ "ins",
196
+ "",
197
+ "table",
198
+ "",
199
+ "h6",
200
+ "", "",
201
+ "aside",
202
+ "output",
203
+ "h5",
204
+ "", "",
205
+ "tfoot",
206
+ "",
207
+ "h4",
208
+ "", "", "", "",
209
+ "h3",
210
+ "", "", "", "",
211
+ "h2",
212
+ "", "", "", "",
213
+ "h1"
214
+ };
215
+
216
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
217
+ {
218
+ register int key = hash_block_tag (str, len);
219
+
220
+ if (key <= MAX_HASH_VALUE && key >= 0)
221
+ {
222
+ register const char *s = wordlist[key];
223
+
224
+ if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
225
+ return s;
226
+ }
227
+ }
228
+ return 0;
229
+ }
@@ -0,0 +1,457 @@
1
+ /*
2
+ * Copyright (c) 2015, Vicent Marti
3
+ *
4
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ * of this software and associated documentation files (the "Software"), to deal
6
+ * in the Software without restriction, including without limitation the rights
7
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ * copies of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be included in
12
+ * all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ * THE SOFTWARE.
21
+ */
22
+
23
+ #include "buffer.h"
24
+ #include "html.h"
25
+
26
+ #include <string.h>
27
+ #include <stdlib.h>
28
+ #include <stdio.h>
29
+ #include <ctype.h>
30
+
31
+ #if defined(_WIN32)
32
+ #define snprintf _snprintf
33
+ #endif
34
+
35
+ struct smartypants_data {
36
+ int in_squote;
37
+ int in_dquote;
38
+ };
39
+
40
+ static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
41
+ static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
42
+ static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
43
+ static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
44
+ static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
45
+ static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
46
+ static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
47
+ static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
48
+ static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
49
+ static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
50
+
51
+ static size_t (*smartypants_cb_ptrs[])
52
+ (struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
53
+ {
54
+ NULL, /* 0 */
55
+ smartypants_cb__dash, /* 1 */
56
+ smartypants_cb__parens, /* 2 */
57
+ smartypants_cb__squote, /* 3 */
58
+ smartypants_cb__dquote, /* 4 */
59
+ smartypants_cb__amp, /* 5 */
60
+ smartypants_cb__period, /* 6 */
61
+ smartypants_cb__number, /* 7 */
62
+ smartypants_cb__ltag, /* 8 */
63
+ smartypants_cb__backtick, /* 9 */
64
+ smartypants_cb__escape, /* 10 */
65
+ };
66
+
67
+ static const uint8_t smartypants_cb_chars[] = {
68
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
+ 0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
71
+ 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
72
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
74
+ 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
80
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
82
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84
+ };
85
+
86
+ static inline int
87
+ word_boundary(uint8_t c)
88
+ {
89
+ return c == 0 || isspace(c) || ispunct(c);
90
+ }
91
+
92
+ static inline int
93
+ fraction_boundary(uint8_t c)
94
+ {
95
+ return c == 0 || isspace(c) || (c != '/' && ispunct(c));
96
+ }
97
+
98
+ // If 'text' begins with any kind of single quote (e.g. "'" or "&apos;" etc.),
99
+ // returns the length of the sequence of characters that makes up the single-
100
+ // quote. Otherwise, returns zero.
101
+ static size_t
102
+ squote_len(const uint8_t *text, size_t size)
103
+ {
104
+ static char* single_quote_list[] = { "'", "&#39;", "&#x27;", "&apos;", NULL };
105
+ char** p;
106
+
107
+ for (p = single_quote_list; *p; ++p) {
108
+ size_t len = strlen(*p);
109
+ if (size >= len && memcmp(text, *p, len) == 0) {
110
+ return len;
111
+ }
112
+ }
113
+
114
+ return 0;
115
+ }
116
+
117
+ // Converts " or ' at very beginning or end of a word to left or right quote
118
+ static int
119
+ smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
120
+ {
121
+ char ent[8];
122
+
123
+ if (*is_open && !word_boundary(next_char))
124
+ return 0;
125
+
126
+ if (!(*is_open) && !word_boundary(previous_char))
127
+ return 0;
128
+
129
+ snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
130
+ *is_open = !(*is_open);
131
+ bufputs(ob, ent);
132
+ return 1;
133
+ }
134
+
135
+ // Converts ' to left or right single quote; but the initial ' might be in
136
+ // different forms, e.g. &apos; or &#39; or &#x27;.
137
+ // 'squote_text' points to the original single quote, and 'squote_size' is its length.
138
+ // 'text' points at the last character of the single-quote, e.g. ' or ;
139
+ static size_t
140
+ smartypants_squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
141
+ const uint8_t *squote_text, size_t squote_size)
142
+ {
143
+ if (size >= 2) {
144
+ uint8_t t1 = tolower(text[1]);
145
+ int next_squote_len = squote_len(text+1, size-1);
146
+
147
+ // convert '' to &ldquo; or &rdquo;
148
+ if (next_squote_len > 0) {
149
+ uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
150
+ if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
151
+ return next_squote_len;
152
+ }
153
+
154
+ // trailing single quotes: students', tryin'
155
+ if (word_boundary(t1)) {
156
+ BUFPUTSL(ob, "&rsquo;");
157
+ return 0;
158
+ }
159
+
160
+ // Tom's, isn't, I'm, I'd
161
+ if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
162
+ (size == 3 || word_boundary(text[2]))) {
163
+ BUFPUTSL(ob, "&rsquo;");
164
+ return 0;
165
+ }
166
+
167
+ // you're, you'll, you've
168
+ if (size >= 3) {
169
+ uint8_t t2 = tolower(text[2]);
170
+
171
+ if (((t1 == 'r' && t2 == 'e') ||
172
+ (t1 == 'l' && t2 == 'l') ||
173
+ (t1 == 'v' && t2 == 'e')) &&
174
+ (size == 4 || word_boundary(text[3]))) {
175
+ BUFPUTSL(ob, "&rsquo;");
176
+ return 0;
177
+ }
178
+ }
179
+ }
180
+
181
+ if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
182
+ return 0;
183
+
184
+ bufput(ob, squote_text, squote_size);
185
+ return 0;
186
+ }
187
+
188
+ // Converts ' to left or right single quote.
189
+ static size_t
190
+ smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
191
+ {
192
+ return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
193
+ }
194
+
195
+ // Converts (c), (r), (tm)
196
+ static size_t
197
+ smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
198
+ {
199
+ if (size >= 3) {
200
+ uint8_t t1 = tolower(text[1]);
201
+ uint8_t t2 = tolower(text[2]);
202
+
203
+ if (t1 == 'c' && t2 == ')') {
204
+ BUFPUTSL(ob, "&copy;");
205
+ return 2;
206
+ }
207
+
208
+ if (t1 == 'r' && t2 == ')') {
209
+ BUFPUTSL(ob, "&reg;");
210
+ return 2;
211
+ }
212
+
213
+ if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
214
+ BUFPUTSL(ob, "&trade;");
215
+ return 3;
216
+ }
217
+ }
218
+
219
+ bufputc(ob, text[0]);
220
+ return 0;
221
+ }
222
+
223
+ // Converts "--" to em-dash, etc.
224
+ static size_t
225
+ smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
226
+ {
227
+ if (size >= 3 && text[1] == '-' && text[2] == '-') {
228
+ BUFPUTSL(ob, "&mdash;");
229
+ return 2;
230
+ }
231
+
232
+ if (size >= 2 && text[1] == '-') {
233
+ BUFPUTSL(ob, "&ndash;");
234
+ return 1;
235
+ }
236
+
237
+ bufputc(ob, text[0]);
238
+ return 0;
239
+ }
240
+
241
+ // Converts &quot; etc.
242
+ static size_t
243
+ smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
244
+ {
245
+ if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
246
+ if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
247
+ return 5;
248
+ }
249
+
250
+ int len = squote_len(text, size);
251
+ if (len > 0) {
252
+ return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
253
+ }
254
+
255
+ if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
256
+ return 3;
257
+
258
+ bufputc(ob, '&');
259
+ return 0;
260
+ }
261
+
262
+ // Converts "..." to ellipsis
263
+ static size_t
264
+ smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
265
+ {
266
+ if (size >= 3 && text[1] == '.' && text[2] == '.') {
267
+ BUFPUTSL(ob, "&hellip;");
268
+ return 2;
269
+ }
270
+
271
+ if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
272
+ BUFPUTSL(ob, "&hellip;");
273
+ return 4;
274
+ }
275
+
276
+ bufputc(ob, text[0]);
277
+ return 0;
278
+ }
279
+
280
+ // Converts `` to opening double quote
281
+ static size_t
282
+ smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
283
+ {
284
+ if (size >= 2 && text[1] == '`') {
285
+ if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
286
+ return 1;
287
+ }
288
+
289
+ bufputc(ob, text[0]);
290
+ return 0;
291
+ }
292
+
293
+ // Converts 1/2, 1/4, 3/4
294
+ static size_t
295
+ smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
296
+ {
297
+ if (fraction_boundary(previous_char) && size >= 3) {
298
+ if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
299
+ if (size == 3 || fraction_boundary(text[3])) {
300
+ BUFPUTSL(ob, "&frac12;");
301
+ return 2;
302
+ }
303
+ }
304
+
305
+ if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
306
+ if (size == 3 || fraction_boundary(text[3]) ||
307
+ (size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
308
+ BUFPUTSL(ob, "&frac14;");
309
+ return 2;
310
+ }
311
+ }
312
+
313
+ if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
314
+ if (size == 3 || fraction_boundary(text[3]) ||
315
+ (size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
316
+ BUFPUTSL(ob, "&frac34;");
317
+ return 2;
318
+ }
319
+ }
320
+ }
321
+
322
+ bufputc(ob, text[0]);
323
+ return 0;
324
+ }
325
+
326
+ // Converts " to left or right double quote
327
+ static size_t
328
+ smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
329
+ {
330
+ if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
331
+ BUFPUTSL(ob, "&quot;");
332
+
333
+ return 0;
334
+ }
335
+
336
+ static size_t
337
+ smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
338
+ {
339
+ static const char *skip_tags[] = {
340
+ "pre", "code", "var", "samp", "kbd", "math", "script", "style"
341
+ };
342
+ static const size_t skip_tags_count = 8;
343
+
344
+ size_t tag, i = 0;
345
+
346
+ while (i < size && text[i] != '>')
347
+ i++;
348
+
349
+ for (tag = 0; tag < skip_tags_count; ++tag) {
350
+ if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
351
+ break;
352
+ }
353
+
354
+ if (tag < skip_tags_count) {
355
+ for (;;) {
356
+ while (i < size && text[i] != '<')
357
+ i++;
358
+
359
+ if (i == size)
360
+ break;
361
+
362
+ if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
363
+ break;
364
+
365
+ i++;
366
+ }
367
+
368
+ while (i < size && text[i] != '>')
369
+ i++;
370
+ }
371
+
372
+ bufput(ob, text, i + 1);
373
+ return i;
374
+ }
375
+
376
+ static size_t
377
+ smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
378
+ {
379
+ if (size < 2)
380
+ return 0;
381
+
382
+ switch (text[1]) {
383
+ case '\\':
384
+ case '"':
385
+ case '\'':
386
+ case '.':
387
+ case '-':
388
+ case '`':
389
+ bufputc(ob, text[1]);
390
+ return 1;
391
+
392
+ default:
393
+ bufputc(ob, '\\');
394
+ return 0;
395
+ }
396
+ }
397
+
398
+ #if 0
399
+ static struct {
400
+ uint8_t c0;
401
+ const uint8_t *pattern;
402
+ const uint8_t *entity;
403
+ int skip;
404
+ } smartypants_subs[] = {
405
+ { '\'', "'s>", "&rsquo;", 0 },
406
+ { '\'', "'t>", "&rsquo;", 0 },
407
+ { '\'', "'re>", "&rsquo;", 0 },
408
+ { '\'', "'ll>", "&rsquo;", 0 },
409
+ { '\'', "'ve>", "&rsquo;", 0 },
410
+ { '\'', "'m>", "&rsquo;", 0 },
411
+ { '\'', "'d>", "&rsquo;", 0 },
412
+ { '-', "--", "&mdash;", 1 },
413
+ { '-', "<->", "&ndash;", 0 },
414
+ { '.', "...", "&hellip;", 2 },
415
+ { '.', ". . .", "&hellip;", 4 },
416
+ { '(', "(c)", "&copy;", 2 },
417
+ { '(', "(r)", "&reg;", 2 },
418
+ { '(', "(tm)", "&trade;", 3 },
419
+ { '3', "<3/4>", "&frac34;", 2 },
420
+ { '3', "<3/4ths>", "&frac34;", 2 },
421
+ { '1', "<1/2>", "&frac12;", 2 },
422
+ { '1', "<1/4>", "&frac14;", 2 },
423
+ { '1', "<1/4th>", "&frac14;", 2 },
424
+ { '&', "&#0;", 0, 3 },
425
+ };
426
+ #endif
427
+
428
+ void
429
+ sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
430
+ {
431
+ size_t i;
432
+ struct smartypants_data smrt = {0, 0};
433
+
434
+ if (!text)
435
+ return;
436
+
437
+ bufgrow(ob, size);
438
+
439
+ for (i = 0; i < size; ++i) {
440
+ size_t org;
441
+ uint8_t action = 0;
442
+
443
+ org = i;
444
+ while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
445
+ i++;
446
+
447
+ if (i > org)
448
+ bufput(ob, text + org, i - org);
449
+
450
+ if (i < size) {
451
+ i += smartypants_cb_ptrs[(int)action]
452
+ (ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
453
+ }
454
+ }
455
+ }
456
+
457
+