greenmat 3.2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/COPYING +14 -0
  3. data/Gemfile +9 -0
  4. data/README.md +36 -0
  5. data/Rakefile +62 -0
  6. data/bin/greenmat +7 -0
  7. data/ext/greenmat/autolink.c +296 -0
  8. data/ext/greenmat/autolink.h +49 -0
  9. data/ext/greenmat/buffer.c +196 -0
  10. data/ext/greenmat/buffer.h +83 -0
  11. data/ext/greenmat/extconf.rb +6 -0
  12. data/ext/greenmat/gm_markdown.c +161 -0
  13. data/ext/greenmat/gm_render.c +534 -0
  14. data/ext/greenmat/greenmat.h +30 -0
  15. data/ext/greenmat/houdini.h +29 -0
  16. data/ext/greenmat/houdini_href_e.c +108 -0
  17. data/ext/greenmat/houdini_html_e.c +83 -0
  18. data/ext/greenmat/html.c +826 -0
  19. data/ext/greenmat/html.h +84 -0
  20. data/ext/greenmat/html_blocks.h +229 -0
  21. data/ext/greenmat/html_smartypants.c +445 -0
  22. data/ext/greenmat/markdown.c +2912 -0
  23. data/ext/greenmat/markdown.h +138 -0
  24. data/ext/greenmat/stack.c +62 -0
  25. data/ext/greenmat/stack.h +26 -0
  26. data/greenmat.gemspec +72 -0
  27. data/lib/greenmat.rb +92 -0
  28. data/lib/greenmat/compat.rb +73 -0
  29. data/lib/greenmat/render_man.rb +65 -0
  30. data/lib/greenmat/render_strip.rb +48 -0
  31. data/test/benchmark.rb +24 -0
  32. data/test/custom_render_test.rb +28 -0
  33. data/test/greenmat_compat_test.rb +38 -0
  34. data/test/html5_test.rb +69 -0
  35. data/test/html_render_test.rb +241 -0
  36. data/test/html_toc_render_test.rb +76 -0
  37. data/test/markdown_test.rb +337 -0
  38. data/test/pathological_inputs_test.rb +34 -0
  39. data/test/safe_render_test.rb +36 -0
  40. data/test/smarty_html_test.rb +45 -0
  41. data/test/smarty_pants_test.rb +48 -0
  42. data/test/stripdown_render_test.rb +40 -0
  43. data/test/test_helper.rb +33 -0
  44. metadata +158 -0
@@ -0,0 +1,84 @@
1
+ /*
2
+ * Copyright (c) 2011, Vicent Marti
3
+ *
4
+ * Permission to use, copy, modify, and distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #ifndef HTML_H__
18
+ #define HTML_H__
19
+
20
+ #include "markdown.h"
21
+ #include "buffer.h"
22
+ #include <stdlib.h>
23
+
24
+ #ifdef __cplusplus
25
+ extern "C" {
26
+ #endif
27
+
28
+ struct html_renderopt {
29
+ struct {
30
+ int current_level;
31
+ int level_offset;
32
+ int nesting_level;
33
+ } toc_data;
34
+
35
+ unsigned int flags;
36
+
37
+ /* extra callbacks */
38
+ void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
39
+ };
40
+
41
+ typedef enum {
42
+ HTML_SKIP_HTML = (1 << 0),
43
+ HTML_SKIP_STYLE = (1 << 1),
44
+ HTML_SKIP_IMAGES = (1 << 2),
45
+ HTML_SKIP_LINKS = (1 << 3),
46
+ HTML_EXPAND_TABS = (1 << 4),
47
+ HTML_SAFELINK = (1 << 5),
48
+ HTML_TOC = (1 << 6),
49
+ HTML_HARD_WRAP = (1 << 7),
50
+ HTML_USE_XHTML = (1 << 8),
51
+ HTML_ESCAPE = (1 << 9),
52
+ HTML_PRETTIFY = (1 << 10),
53
+ } html_render_mode;
54
+
55
+ typedef enum {
56
+ HTML_TAG_NONE = 0,
57
+ HTML_TAG_OPEN,
58
+ HTML_TAG_CLOSE,
59
+ } html_tag;
60
+
61
+ int
62
+ sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
63
+
64
+ extern void
65
+ sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
66
+
67
+ extern void
68
+ sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
69
+
70
+ extern void
71
+ sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
72
+
73
+ /* header method used internally in Greenmat */
74
+ char *header_anchor(const struct buf *buffer);
75
+
76
+ #define STRIPPED_CHARS " -&+$,/:;=?@\"#{}|^~[]`\\*()%.!'"
77
+ #define STRIPPED_CHAR(x) (strchr(STRIPPED_CHARS, x) != NULL)
78
+
79
+ #ifdef __cplusplus
80
+ }
81
+ #endif
82
+
83
+ #endif
84
+
@@ -0,0 +1,229 @@
1
+ /* C code produced by gperf version 3.0.4 */
2
+ /* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
3
+ /* See http://git.io/RN0ncw for the list of recognized elements */
4
+ /* Computed positions: -k'1-2' */
5
+
6
+ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
7
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
8
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
9
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
10
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
11
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
12
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
13
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
14
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
15
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
16
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
17
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
18
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
19
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
20
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
21
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
22
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
23
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
24
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
25
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
26
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
27
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
28
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
29
+ /* The character set is not based on ISO-646. */
30
+ error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
31
+ #endif
32
+
33
+ /* maximum key range = 67, duplicates = 0 */
34
+
35
+ #ifndef GPERF_DOWNCASE
36
+ #define GPERF_DOWNCASE 1
37
+ static unsigned char gperf_downcase[256] =
38
+ {
39
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
40
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
41
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
42
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
43
+ 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
44
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
45
+ 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
46
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
47
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
48
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
49
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
50
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
51
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
52
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
53
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
54
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
55
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
56
+ 255
57
+ };
58
+ #endif
59
+
60
+ #ifndef GPERF_CASE_STRNCMP
61
+ #define GPERF_CASE_STRNCMP 1
62
+ static int
63
+ gperf_case_strncmp (s1, s2, n)
64
+ register const char *s1;
65
+ register const char *s2;
66
+ register unsigned int n;
67
+ {
68
+ for (; n > 0;)
69
+ {
70
+ unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
71
+ unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
72
+ if (c1 != 0 && c1 == c2)
73
+ {
74
+ n--;
75
+ continue;
76
+ }
77
+ return (int)c1 - (int)c2;
78
+ }
79
+ return 0;
80
+ }
81
+ #endif
82
+
83
+ #ifdef __GNUC__
84
+ __inline
85
+ #else
86
+ #ifdef __cplusplus
87
+ inline
88
+ #endif
89
+ #endif
90
+ static unsigned int
91
+ hash_block_tag (str, len)
92
+ register const char *str;
93
+ register unsigned int len;
94
+ {
95
+ static const unsigned char asso_values[] =
96
+ {
97
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
98
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
99
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
100
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
101
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
102
+ 55, 50, 45, 40, 35, 30, 68, 68, 68, 68,
103
+ 68, 68, 68, 68, 68, 15, 10, 15, 15, 15,
104
+ 0, 20, 10, 10, 5, 68, 68, 0, 20, 25,
105
+ 0, 68, 68, 0, 25, 0, 15, 68, 68, 68,
106
+ 68, 68, 68, 68, 68, 68, 68, 15, 10, 15,
107
+ 15, 15, 0, 20, 10, 10, 5, 68, 68, 0,
108
+ 20, 25, 0, 68, 68, 0, 25, 0, 15, 68,
109
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
110
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
111
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
112
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
113
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
114
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
115
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
116
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
117
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
118
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
119
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
120
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
121
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
122
+ 68, 68, 68, 68, 68, 68, 68
123
+ };
124
+ register int hval = len;
125
+
126
+ switch (hval)
127
+ {
128
+ default:
129
+ hval += asso_values[(unsigned char)str[1]+1];
130
+ /*FALLTHROUGH*/
131
+ case 1:
132
+ hval += asso_values[(unsigned char)str[0]];
133
+ break;
134
+ }
135
+ return hval;
136
+ }
137
+
138
+ #ifdef __GNUC__
139
+ __inline
140
+ #if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
141
+ __attribute__ ((__gnu_inline__))
142
+ #endif
143
+ #endif
144
+ const char *
145
+ find_block_tag (str, len)
146
+ register const char *str;
147
+ register unsigned int len;
148
+ {
149
+ enum
150
+ {
151
+ TOTAL_KEYWORDS = 41,
152
+ MIN_WORD_LENGTH = 1,
153
+ MAX_WORD_LENGTH = 10,
154
+ MIN_HASH_VALUE = 1,
155
+ MAX_HASH_VALUE = 67
156
+ };
157
+
158
+ static const char * const wordlist[] =
159
+ {
160
+ "",
161
+ "p",
162
+ "ul",
163
+ "pre",
164
+ "form",
165
+ "style",
166
+ "footer",
167
+ "section",
168
+ "", "", "",
169
+ "figure",
170
+ "hr",
171
+ "fieldset",
172
+ "math",
173
+ "figcaption",
174
+ "header",
175
+ "dl",
176
+ "del",
177
+ "",
178
+ "blockquote",
179
+ "script",
180
+ "article",
181
+ "div",
182
+ "",
183
+ "video",
184
+ "hgroup",
185
+ "ol",
186
+ "noscript",
187
+ "", "",
188
+ "canvas",
189
+ "dd",
190
+ "nav",
191
+ "abbr",
192
+ "audio",
193
+ "iframe",
194
+ "address",
195
+ "ins",
196
+ "",
197
+ "table",
198
+ "",
199
+ "h6",
200
+ "", "",
201
+ "aside",
202
+ "output",
203
+ "h5",
204
+ "", "",
205
+ "tfoot",
206
+ "",
207
+ "h4",
208
+ "", "", "", "",
209
+ "h3",
210
+ "", "", "", "",
211
+ "h2",
212
+ "", "", "", "",
213
+ "h1"
214
+ };
215
+
216
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
217
+ {
218
+ register int key = hash_block_tag (str, len);
219
+
220
+ if (key <= MAX_HASH_VALUE && key >= 0)
221
+ {
222
+ register const char *s = wordlist[key];
223
+
224
+ if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
225
+ return s;
226
+ }
227
+ }
228
+ return 0;
229
+ }
@@ -0,0 +1,445 @@
1
+ /*
2
+ * Copyright (c) 2011, Vicent Marti
3
+ *
4
+ * Permission to use, copy, modify, and distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #include "buffer.h"
18
+ #include "html.h"
19
+
20
+ #include <string.h>
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <ctype.h>
24
+
25
+ #if defined(_WIN32)
26
+ #define snprintf _snprintf
27
+ #endif
28
+
29
+ struct smartypants_data {
30
+ int in_squote;
31
+ int in_dquote;
32
+ };
33
+
34
+ static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
35
+ static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
36
+ static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
37
+ static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
38
+ static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
39
+ static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
40
+ static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
41
+ static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
42
+ static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
43
+ static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
44
+
45
+ static size_t (*smartypants_cb_ptrs[])
46
+ (struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
47
+ {
48
+ NULL, /* 0 */
49
+ smartypants_cb__dash, /* 1 */
50
+ smartypants_cb__parens, /* 2 */
51
+ smartypants_cb__squote, /* 3 */
52
+ smartypants_cb__dquote, /* 4 */
53
+ smartypants_cb__amp, /* 5 */
54
+ smartypants_cb__period, /* 6 */
55
+ smartypants_cb__number, /* 7 */
56
+ smartypants_cb__ltag, /* 8 */
57
+ smartypants_cb__backtick, /* 9 */
58
+ smartypants_cb__escape, /* 10 */
59
+ };
60
+
61
+ static const uint8_t smartypants_cb_chars[] = {
62
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64
+ 0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
65
+ 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
66
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
67
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
68
+ 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
74
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78
+ };
79
+
80
+ static inline int
81
+ word_boundary(uint8_t c)
82
+ {
83
+ return c == 0 || isspace(c) || ispunct(c);
84
+ }
85
+
86
+ // If 'text' begins with any kind of single quote (e.g. "'" or "&apos;" etc.),
87
+ // returns the length of the sequence of characters that makes up the single-
88
+ // quote. Otherwise, returns zero.
89
+ static size_t
90
+ squote_len(const uint8_t *text, size_t size)
91
+ {
92
+ static char* single_quote_list[] = { "'", "&#39;", "&#x27;", "&apos;", NULL };
93
+ char** p;
94
+
95
+ for (p = single_quote_list; *p; ++p) {
96
+ size_t len = strlen(*p);
97
+ if (size >= len && memcmp(text, *p, len) == 0) {
98
+ return len;
99
+ }
100
+ }
101
+
102
+ return 0;
103
+ }
104
+
105
+ // Converts " or ' at very beginning or end of a word to left or right quote
106
+ static int
107
+ smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
108
+ {
109
+ char ent[8];
110
+
111
+ if (*is_open && !word_boundary(next_char))
112
+ return 0;
113
+
114
+ if (!(*is_open) && !word_boundary(previous_char))
115
+ return 0;
116
+
117
+ snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
118
+ *is_open = !(*is_open);
119
+ bufputs(ob, ent);
120
+ return 1;
121
+ }
122
+
123
+ // Converts ' to left or right single quote; but the initial ' might be in
124
+ // different forms, e.g. &apos; or &#39; or &#x27;.
125
+ // 'squote_text' points to the original single quote, and 'squote_size' is its length.
126
+ // 'text' points at the last character of the single-quote, e.g. ' or ;
127
+ static size_t
128
+ smartypants_squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
129
+ const uint8_t *squote_text, size_t squote_size)
130
+ {
131
+ if (size >= 2) {
132
+ uint8_t t1 = tolower(text[1]);
133
+ int next_squote_len = squote_len(text+1, size-1);
134
+
135
+ // convert '' to &ldquo; or &rdquo;
136
+ if (next_squote_len > 0) {
137
+ uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
138
+ if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
139
+ return next_squote_len;
140
+ }
141
+
142
+ // trailing single quotes: students', tryin'
143
+ if (word_boundary(t1)) {
144
+ BUFPUTSL(ob, "&rsquo;");
145
+ return 0;
146
+ }
147
+
148
+ // Tom's, isn't, I'm, I'd
149
+ if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
150
+ (size == 3 || word_boundary(text[2]))) {
151
+ BUFPUTSL(ob, "&rsquo;");
152
+ return 0;
153
+ }
154
+
155
+ // you're, you'll, you've
156
+ if (size >= 3) {
157
+ uint8_t t2 = tolower(text[2]);
158
+
159
+ if (((t1 == 'r' && t2 == 'e') ||
160
+ (t1 == 'l' && t2 == 'l') ||
161
+ (t1 == 'v' && t2 == 'e')) &&
162
+ (size == 4 || word_boundary(text[3]))) {
163
+ BUFPUTSL(ob, "&rsquo;");
164
+ return 0;
165
+ }
166
+ }
167
+ }
168
+
169
+ if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
170
+ return 0;
171
+
172
+ bufput(ob, squote_text, squote_size);
173
+ return 0;
174
+ }
175
+
176
+ // Converts ' to left or right single quote.
177
+ static size_t
178
+ smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
179
+ {
180
+ return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
181
+ }
182
+
183
+ // Converts (c), (r), (tm)
184
+ static size_t
185
+ smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
186
+ {
187
+ if (size >= 3) {
188
+ uint8_t t1 = tolower(text[1]);
189
+ uint8_t t2 = tolower(text[2]);
190
+
191
+ if (t1 == 'c' && t2 == ')') {
192
+ BUFPUTSL(ob, "&copy;");
193
+ return 2;
194
+ }
195
+
196
+ if (t1 == 'r' && t2 == ')') {
197
+ BUFPUTSL(ob, "&reg;");
198
+ return 2;
199
+ }
200
+
201
+ if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
202
+ BUFPUTSL(ob, "&trade;");
203
+ return 3;
204
+ }
205
+ }
206
+
207
+ bufputc(ob, text[0]);
208
+ return 0;
209
+ }
210
+
211
+ // Converts "--" to em-dash, etc.
212
+ static size_t
213
+ smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
214
+ {
215
+ if (size >= 3 && text[1] == '-' && text[2] == '-') {
216
+ BUFPUTSL(ob, "&mdash;");
217
+ return 2;
218
+ }
219
+
220
+ if (size >= 2 && text[1] == '-') {
221
+ BUFPUTSL(ob, "&ndash;");
222
+ return 1;
223
+ }
224
+
225
+ bufputc(ob, text[0]);
226
+ return 0;
227
+ }
228
+
229
+ // Converts &quot; etc.
230
+ static size_t
231
+ smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
232
+ {
233
+ if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
234
+ if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
235
+ return 5;
236
+ }
237
+
238
+ int len = squote_len(text, size);
239
+ if (len > 0) {
240
+ return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
241
+ }
242
+
243
+ if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
244
+ return 3;
245
+
246
+ bufputc(ob, '&');
247
+ return 0;
248
+ }
249
+
250
+ // Converts "..." to ellipsis
251
+ static size_t
252
+ smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
253
+ {
254
+ if (size >= 3 && text[1] == '.' && text[2] == '.') {
255
+ BUFPUTSL(ob, "&hellip;");
256
+ return 2;
257
+ }
258
+
259
+ if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
260
+ BUFPUTSL(ob, "&hellip;");
261
+ return 4;
262
+ }
263
+
264
+ bufputc(ob, text[0]);
265
+ return 0;
266
+ }
267
+
268
+ // Converts `` to opening double quote
269
+ static size_t
270
+ smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
271
+ {
272
+ if (size >= 2 && text[1] == '`') {
273
+ if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
274
+ return 1;
275
+ }
276
+
277
+ bufputc(ob, text[0]);
278
+ return 0;
279
+ }
280
+
281
+ // Converts 1/2, 1/4, 3/4
282
+ static size_t
283
+ smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
284
+ {
285
+ if (word_boundary(previous_char) && size >= 3) {
286
+ if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
287
+ if (size == 3 || word_boundary(text[3])) {
288
+ BUFPUTSL(ob, "&frac12;");
289
+ return 2;
290
+ }
291
+ }
292
+
293
+ if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
294
+ if (size == 3 || word_boundary(text[3]) ||
295
+ (size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
296
+ BUFPUTSL(ob, "&frac14;");
297
+ return 2;
298
+ }
299
+ }
300
+
301
+ if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
302
+ if (size == 3 || word_boundary(text[3]) ||
303
+ (size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
304
+ BUFPUTSL(ob, "&frac34;");
305
+ return 2;
306
+ }
307
+ }
308
+ }
309
+
310
+ bufputc(ob, text[0]);
311
+ return 0;
312
+ }
313
+
314
+ // Converts " to left or right double quote
315
+ static size_t
316
+ smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
317
+ {
318
+ if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
319
+ BUFPUTSL(ob, "&quot;");
320
+
321
+ return 0;
322
+ }
323
+
324
+ static size_t
325
+ smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
326
+ {
327
+ static const char *skip_tags[] = {
328
+ "pre", "code", "var", "samp", "kbd", "math", "script", "style"
329
+ };
330
+ static const size_t skip_tags_count = 8;
331
+
332
+ size_t tag, i = 0;
333
+
334
+ while (i < size && text[i] != '>')
335
+ i++;
336
+
337
+ for (tag = 0; tag < skip_tags_count; ++tag) {
338
+ if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
339
+ break;
340
+ }
341
+
342
+ if (tag < skip_tags_count) {
343
+ for (;;) {
344
+ while (i < size && text[i] != '<')
345
+ i++;
346
+
347
+ if (i == size)
348
+ break;
349
+
350
+ if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
351
+ break;
352
+
353
+ i++;
354
+ }
355
+
356
+ while (i < size && text[i] != '>')
357
+ i++;
358
+ }
359
+
360
+ bufput(ob, text, i + 1);
361
+ return i;
362
+ }
363
+
364
+ static size_t
365
+ smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
366
+ {
367
+ if (size < 2)
368
+ return 0;
369
+
370
+ switch (text[1]) {
371
+ case '\\':
372
+ case '"':
373
+ case '\'':
374
+ case '.':
375
+ case '-':
376
+ case '`':
377
+ bufputc(ob, text[1]);
378
+ return 1;
379
+
380
+ default:
381
+ bufputc(ob, '\\');
382
+ return 0;
383
+ }
384
+ }
385
+
386
+ #if 0
387
+ static struct {
388
+ uint8_t c0;
389
+ const uint8_t *pattern;
390
+ const uint8_t *entity;
391
+ int skip;
392
+ } smartypants_subs[] = {
393
+ { '\'', "'s>", "&rsquo;", 0 },
394
+ { '\'', "'t>", "&rsquo;", 0 },
395
+ { '\'', "'re>", "&rsquo;", 0 },
396
+ { '\'', "'ll>", "&rsquo;", 0 },
397
+ { '\'', "'ve>", "&rsquo;", 0 },
398
+ { '\'', "'m>", "&rsquo;", 0 },
399
+ { '\'', "'d>", "&rsquo;", 0 },
400
+ { '-', "--", "&mdash;", 1 },
401
+ { '-', "<->", "&ndash;", 0 },
402
+ { '.', "...", "&hellip;", 2 },
403
+ { '.', ". . .", "&hellip;", 4 },
404
+ { '(', "(c)", "&copy;", 2 },
405
+ { '(', "(r)", "&reg;", 2 },
406
+ { '(', "(tm)", "&trade;", 3 },
407
+ { '3', "<3/4>", "&frac34;", 2 },
408
+ { '3', "<3/4ths>", "&frac34;", 2 },
409
+ { '1', "<1/2>", "&frac12;", 2 },
410
+ { '1', "<1/4>", "&frac14;", 2 },
411
+ { '1', "<1/4th>", "&frac14;", 2 },
412
+ { '&', "&#0;", 0, 3 },
413
+ };
414
+ #endif
415
+
416
+ void
417
+ sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
418
+ {
419
+ size_t i;
420
+ struct smartypants_data smrt = {0, 0};
421
+
422
+ if (!text)
423
+ return;
424
+
425
+ bufgrow(ob, size);
426
+
427
+ for (i = 0; i < size; ++i) {
428
+ size_t org;
429
+ uint8_t action = 0;
430
+
431
+ org = i;
432
+ while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
433
+ i++;
434
+
435
+ if (i > org)
436
+ bufput(ob, text + org, i - org);
437
+
438
+ if (i < size) {
439
+ i += smartypants_cb_ptrs[(int)action]
440
+ (ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
441
+ }
442
+ }
443
+ }
444
+
445
+