tight-redcarpet 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/COPYING +14 -0
- data/Gemfile +9 -0
- data/README.markdown +386 -0
- data/Rakefile +60 -0
- data/bin/redcarpet +43 -0
- data/ext/redcarpet/autolink.c +296 -0
- data/ext/redcarpet/autolink.h +49 -0
- data/ext/redcarpet/buffer.c +196 -0
- data/ext/redcarpet/buffer.h +83 -0
- data/ext/redcarpet/extconf.rb +6 -0
- data/ext/redcarpet/houdini.h +29 -0
- data/ext/redcarpet/houdini_href_e.c +108 -0
- data/ext/redcarpet/houdini_html_e.c +83 -0
- data/ext/redcarpet/html.c +770 -0
- data/ext/redcarpet/html.h +78 -0
- data/ext/redcarpet/html_blocks.h +206 -0
- data/ext/redcarpet/html_smartypants.c +445 -0
- data/ext/redcarpet/markdown.c +2907 -0
- data/ext/redcarpet/markdown.h +141 -0
- data/ext/redcarpet/rc_markdown.c +165 -0
- data/ext/redcarpet/rc_render.c +529 -0
- data/ext/redcarpet/redcarpet.h +30 -0
- data/ext/redcarpet/stack.c +62 -0
- data/ext/redcarpet/stack.h +26 -0
- data/lib/redcarpet.rb +125 -0
- data/lib/redcarpet/compat.rb +3 -0
- data/lib/redcarpet/render_man.rb +65 -0
- data/lib/redcarpet/render_strip.rb +48 -0
- data/redcarpet.gemspec +65 -0
- data/test/custom_render_test.rb +28 -0
- data/test/html_render_test.rb +232 -0
- data/test/html_toc_render_test.rb +49 -0
- data/test/markdown_test.rb +311 -0
- data/test/pathological_inputs_test.rb +34 -0
- data/test/redcarpet_compat_test.rb +38 -0
- data/test/smarty_html_test.rb +45 -0
- data/test/smarty_pants_test.rb +48 -0
- data/test/stripdown_render_test.rb +42 -0
- data/test/test_helper.rb +18 -0
- metadata +141 -0
@@ -0,0 +1,78 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef HTML_H__
|
18
|
+
#define HTML_H__
|
19
|
+
|
20
|
+
#include "markdown.h"
|
21
|
+
#include "buffer.h"
|
22
|
+
#include <stdlib.h>
|
23
|
+
|
24
|
+
#ifdef __cplusplus
|
25
|
+
extern "C" {
|
26
|
+
#endif
|
27
|
+
|
28
|
+
struct html_renderopt {
|
29
|
+
struct {
|
30
|
+
int current_level;
|
31
|
+
int level_offset;
|
32
|
+
int nesting_level;
|
33
|
+
} toc_data;
|
34
|
+
|
35
|
+
unsigned int flags;
|
36
|
+
|
37
|
+
/* extra callbacks */
|
38
|
+
void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
|
39
|
+
};
|
40
|
+
|
41
|
+
typedef enum {
|
42
|
+
HTML_SKIP_HTML = (1 << 0),
|
43
|
+
HTML_SKIP_STYLE = (1 << 1),
|
44
|
+
HTML_SKIP_IMAGES = (1 << 2),
|
45
|
+
HTML_SKIP_LINKS = (1 << 3),
|
46
|
+
HTML_EXPAND_TABS = (1 << 4),
|
47
|
+
HTML_SAFELINK = (1 << 5),
|
48
|
+
HTML_TOC = (1 << 6),
|
49
|
+
HTML_HARD_WRAP = (1 << 7),
|
50
|
+
HTML_USE_XHTML = (1 << 8),
|
51
|
+
HTML_ESCAPE = (1 << 9),
|
52
|
+
HTML_PRETTIFY = (1 << 10),
|
53
|
+
} html_render_mode;
|
54
|
+
|
55
|
+
typedef enum {
|
56
|
+
HTML_TAG_NONE = 0,
|
57
|
+
HTML_TAG_OPEN,
|
58
|
+
HTML_TAG_CLOSE,
|
59
|
+
} html_tag;
|
60
|
+
|
61
|
+
int
|
62
|
+
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
|
63
|
+
|
64
|
+
extern void
|
65
|
+
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
|
66
|
+
|
67
|
+
extern void
|
68
|
+
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, int nesting_level);
|
69
|
+
|
70
|
+
extern void
|
71
|
+
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
|
72
|
+
|
73
|
+
#ifdef __cplusplus
|
74
|
+
}
|
75
|
+
#endif
|
76
|
+
|
77
|
+
#endif
|
78
|
+
|
@@ -0,0 +1,206 @@
|
|
1
|
+
/* C code produced by gperf version 3.0.3 */
|
2
|
+
/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
|
3
|
+
/* Computed positions: -k'1-2' */
|
4
|
+
|
5
|
+
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
6
|
+
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
7
|
+
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
8
|
+
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
9
|
+
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
10
|
+
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
11
|
+
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
12
|
+
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
13
|
+
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
14
|
+
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
15
|
+
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
16
|
+
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
17
|
+
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
18
|
+
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
19
|
+
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
20
|
+
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
21
|
+
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
22
|
+
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
23
|
+
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
24
|
+
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
25
|
+
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
26
|
+
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
27
|
+
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
28
|
+
/* The character set is not based on ISO-646. */
|
29
|
+
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
30
|
+
#endif
|
31
|
+
|
32
|
+
/* maximum key range = 37, duplicates = 0 */
|
33
|
+
|
34
|
+
#ifndef GPERF_DOWNCASE
|
35
|
+
#define GPERF_DOWNCASE 1
|
36
|
+
static unsigned char gperf_downcase[256] =
|
37
|
+
{
|
38
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
39
|
+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
|
40
|
+
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
|
41
|
+
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
42
|
+
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
|
43
|
+
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
44
|
+
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
|
45
|
+
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
|
46
|
+
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
|
47
|
+
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
48
|
+
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
49
|
+
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
50
|
+
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
|
51
|
+
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
|
52
|
+
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
|
53
|
+
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
54
|
+
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
55
|
+
255
|
56
|
+
};
|
57
|
+
#endif
|
58
|
+
|
59
|
+
#ifndef GPERF_CASE_STRNCMP
|
60
|
+
#define GPERF_CASE_STRNCMP 1
|
61
|
+
static int
|
62
|
+
gperf_case_strncmp (s1, s2, n)
|
63
|
+
register const char *s1;
|
64
|
+
register const char *s2;
|
65
|
+
register unsigned int n;
|
66
|
+
{
|
67
|
+
for (; n > 0;)
|
68
|
+
{
|
69
|
+
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
|
70
|
+
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
|
71
|
+
if (c1 != 0 && c1 == c2)
|
72
|
+
{
|
73
|
+
n--;
|
74
|
+
continue;
|
75
|
+
}
|
76
|
+
return (int)c1 - (int)c2;
|
77
|
+
}
|
78
|
+
return 0;
|
79
|
+
}
|
80
|
+
#endif
|
81
|
+
|
82
|
+
#ifdef __GNUC__
|
83
|
+
__inline
|
84
|
+
#else
|
85
|
+
#ifdef __cplusplus
|
86
|
+
inline
|
87
|
+
#endif
|
88
|
+
#endif
|
89
|
+
static unsigned int
|
90
|
+
hash_block_tag (str, len)
|
91
|
+
register const char *str;
|
92
|
+
register unsigned int len;
|
93
|
+
{
|
94
|
+
static const unsigned char asso_values[] =
|
95
|
+
{
|
96
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
97
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
98
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
99
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
100
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
101
|
+
8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
|
102
|
+
38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
|
103
|
+
5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
|
104
|
+
0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
|
105
|
+
38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
|
106
|
+
0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
|
107
|
+
15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
|
108
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
109
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
110
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
111
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
112
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
113
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
114
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
115
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
116
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
117
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
118
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
119
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
120
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
121
|
+
38, 38, 38, 38, 38, 38, 38
|
122
|
+
};
|
123
|
+
register int hval = len;
|
124
|
+
|
125
|
+
switch (hval)
|
126
|
+
{
|
127
|
+
default:
|
128
|
+
hval += asso_values[(unsigned char)str[1]+1];
|
129
|
+
/*FALLTHROUGH*/
|
130
|
+
case 1:
|
131
|
+
hval += asso_values[(unsigned char)str[0]];
|
132
|
+
break;
|
133
|
+
}
|
134
|
+
return hval;
|
135
|
+
}
|
136
|
+
|
137
|
+
#ifdef __GNUC__
|
138
|
+
__inline
|
139
|
+
#ifdef __GNUC_STDC_INLINE__
|
140
|
+
__attribute__ ((__gnu_inline__))
|
141
|
+
#endif
|
142
|
+
#endif
|
143
|
+
const char *
|
144
|
+
find_block_tag (str, len)
|
145
|
+
register const char *str;
|
146
|
+
register unsigned int len;
|
147
|
+
{
|
148
|
+
enum
|
149
|
+
{
|
150
|
+
TOTAL_KEYWORDS = 24,
|
151
|
+
MIN_WORD_LENGTH = 1,
|
152
|
+
MAX_WORD_LENGTH = 10,
|
153
|
+
MIN_HASH_VALUE = 1,
|
154
|
+
MAX_HASH_VALUE = 37
|
155
|
+
};
|
156
|
+
|
157
|
+
static const char * const wordlist[] =
|
158
|
+
{
|
159
|
+
"",
|
160
|
+
"p",
|
161
|
+
"dl",
|
162
|
+
"div",
|
163
|
+
"math",
|
164
|
+
"table",
|
165
|
+
"",
|
166
|
+
"ul",
|
167
|
+
"del",
|
168
|
+
"form",
|
169
|
+
"blockquote",
|
170
|
+
"figure",
|
171
|
+
"ol",
|
172
|
+
"fieldset",
|
173
|
+
"",
|
174
|
+
"h1",
|
175
|
+
"",
|
176
|
+
"h6",
|
177
|
+
"pre",
|
178
|
+
"", "",
|
179
|
+
"script",
|
180
|
+
"h5",
|
181
|
+
"noscript",
|
182
|
+
"",
|
183
|
+
"style",
|
184
|
+
"iframe",
|
185
|
+
"h4",
|
186
|
+
"ins",
|
187
|
+
"", "", "",
|
188
|
+
"h3",
|
189
|
+
"", "", "", "",
|
190
|
+
"h2"
|
191
|
+
};
|
192
|
+
|
193
|
+
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
194
|
+
{
|
195
|
+
register int key = hash_block_tag (str, len);
|
196
|
+
|
197
|
+
if (key <= MAX_HASH_VALUE && key >= 0)
|
198
|
+
{
|
199
|
+
register const char *s = wordlist[key];
|
200
|
+
|
201
|
+
if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
|
202
|
+
return s;
|
203
|
+
}
|
204
|
+
}
|
205
|
+
return 0;
|
206
|
+
}
|
@@ -0,0 +1,445 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include "buffer.h"
|
18
|
+
#include "html.h"
|
19
|
+
|
20
|
+
#include <string.h>
|
21
|
+
#include <stdlib.h>
|
22
|
+
#include <stdio.h>
|
23
|
+
#include <ctype.h>
|
24
|
+
|
25
|
+
#if defined(_WIN32)
|
26
|
+
#define snprintf _snprintf
|
27
|
+
#endif
|
28
|
+
|
29
|
+
struct smartypants_data {
|
30
|
+
int in_squote;
|
31
|
+
int in_dquote;
|
32
|
+
};
|
33
|
+
|
34
|
+
static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
35
|
+
static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
36
|
+
static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
37
|
+
static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
38
|
+
static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
39
|
+
static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
40
|
+
static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
41
|
+
static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
42
|
+
static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
43
|
+
static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
44
|
+
|
45
|
+
static size_t (*smartypants_cb_ptrs[])
|
46
|
+
(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
|
47
|
+
{
|
48
|
+
NULL, /* 0 */
|
49
|
+
smartypants_cb__dash, /* 1 */
|
50
|
+
smartypants_cb__parens, /* 2 */
|
51
|
+
smartypants_cb__squote, /* 3 */
|
52
|
+
smartypants_cb__dquote, /* 4 */
|
53
|
+
smartypants_cb__amp, /* 5 */
|
54
|
+
smartypants_cb__period, /* 6 */
|
55
|
+
smartypants_cb__number, /* 7 */
|
56
|
+
smartypants_cb__ltag, /* 8 */
|
57
|
+
smartypants_cb__backtick, /* 9 */
|
58
|
+
smartypants_cb__escape, /* 10 */
|
59
|
+
};
|
60
|
+
|
61
|
+
static const uint8_t smartypants_cb_chars[] = {
|
62
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
63
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
64
|
+
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
|
65
|
+
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
66
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
67
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
68
|
+
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
69
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
70
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
71
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
72
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
73
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
74
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
75
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
76
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
77
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
78
|
+
};
|
79
|
+
|
80
|
+
static inline int
|
81
|
+
word_boundary(uint8_t c)
|
82
|
+
{
|
83
|
+
return c == 0 || isspace(c) || ispunct(c);
|
84
|
+
}
|
85
|
+
|
86
|
+
// If 'text' begins with any kind of single quote (e.g. "'" or "'" etc.),
|
87
|
+
// returns the length of the sequence of characters that makes up the single-
|
88
|
+
// quote. Otherwise, returns zero.
|
89
|
+
static size_t
|
90
|
+
squote_len(const uint8_t *text, size_t size)
|
91
|
+
{
|
92
|
+
static char* single_quote_list[] = { "'", "'", "'", "'", NULL };
|
93
|
+
char** p;
|
94
|
+
|
95
|
+
for (p = single_quote_list; *p; ++p) {
|
96
|
+
size_t len = strlen(*p);
|
97
|
+
if (size >= len && memcmp(text, *p, len) == 0) {
|
98
|
+
return len;
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
return 0;
|
103
|
+
}
|
104
|
+
|
105
|
+
// Converts " or ' at very beginning or end of a word to left or right quote
|
106
|
+
static int
|
107
|
+
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
|
108
|
+
{
|
109
|
+
char ent[8];
|
110
|
+
|
111
|
+
if (*is_open && !word_boundary(next_char))
|
112
|
+
return 0;
|
113
|
+
|
114
|
+
if (!(*is_open) && !word_boundary(previous_char))
|
115
|
+
return 0;
|
116
|
+
|
117
|
+
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
|
118
|
+
*is_open = !(*is_open);
|
119
|
+
bufputs(ob, ent);
|
120
|
+
return 1;
|
121
|
+
}
|
122
|
+
|
123
|
+
// Converts ' to left or right single quote; but the initial ' might be in
|
124
|
+
// different forms, e.g. ' or ' or '.
|
125
|
+
// 'squote_text' points to the original single quote, and 'squote_size' is its length.
|
126
|
+
// 'text' points at the last character of the single-quote, e.g. ' or ;
|
127
|
+
static size_t
|
128
|
+
smartypants_squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
|
129
|
+
const uint8_t *squote_text, size_t squote_size)
|
130
|
+
{
|
131
|
+
if (size >= 2) {
|
132
|
+
uint8_t t1 = tolower(text[1]);
|
133
|
+
int next_squote_len = squote_len(text+1, size-1);
|
134
|
+
|
135
|
+
// convert '' to “ or ”
|
136
|
+
if (next_squote_len > 0) {
|
137
|
+
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
|
138
|
+
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
|
139
|
+
return next_squote_len;
|
140
|
+
}
|
141
|
+
|
142
|
+
// trailing single quotes: students', tryin'
|
143
|
+
if (word_boundary(t1)) {
|
144
|
+
BUFPUTSL(ob, "’");
|
145
|
+
return 0;
|
146
|
+
}
|
147
|
+
|
148
|
+
// Tom's, isn't, I'm, I'd
|
149
|
+
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
|
150
|
+
(size == 3 || word_boundary(text[2]))) {
|
151
|
+
BUFPUTSL(ob, "’");
|
152
|
+
return 0;
|
153
|
+
}
|
154
|
+
|
155
|
+
// you're, you'll, you've
|
156
|
+
if (size >= 3) {
|
157
|
+
uint8_t t2 = tolower(text[2]);
|
158
|
+
|
159
|
+
if (((t1 == 'r' && t2 == 'e') ||
|
160
|
+
(t1 == 'l' && t2 == 'l') ||
|
161
|
+
(t1 == 'v' && t2 == 'e')) &&
|
162
|
+
(size == 4 || word_boundary(text[3]))) {
|
163
|
+
BUFPUTSL(ob, "’");
|
164
|
+
return 0;
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
|
170
|
+
return 0;
|
171
|
+
|
172
|
+
bufput(ob, squote_text, squote_size);
|
173
|
+
return 0;
|
174
|
+
}
|
175
|
+
|
176
|
+
// Converts ' to left or right single quote.
|
177
|
+
static size_t
|
178
|
+
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
179
|
+
{
|
180
|
+
return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
|
181
|
+
}
|
182
|
+
|
183
|
+
// Converts (c), (r), (tm)
|
184
|
+
static size_t
|
185
|
+
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
186
|
+
{
|
187
|
+
if (size >= 3) {
|
188
|
+
uint8_t t1 = tolower(text[1]);
|
189
|
+
uint8_t t2 = tolower(text[2]);
|
190
|
+
|
191
|
+
if (t1 == 'c' && t2 == ')') {
|
192
|
+
BUFPUTSL(ob, "©");
|
193
|
+
return 2;
|
194
|
+
}
|
195
|
+
|
196
|
+
if (t1 == 'r' && t2 == ')') {
|
197
|
+
BUFPUTSL(ob, "®");
|
198
|
+
return 2;
|
199
|
+
}
|
200
|
+
|
201
|
+
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
|
202
|
+
BUFPUTSL(ob, "™");
|
203
|
+
return 3;
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
bufputc(ob, text[0]);
|
208
|
+
return 0;
|
209
|
+
}
|
210
|
+
|
211
|
+
// Converts "--" to em-dash, etc.
|
212
|
+
static size_t
|
213
|
+
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
214
|
+
{
|
215
|
+
if (size >= 3 && text[1] == '-' && text[2] == '-') {
|
216
|
+
BUFPUTSL(ob, "—");
|
217
|
+
return 2;
|
218
|
+
}
|
219
|
+
|
220
|
+
if (size >= 2 && text[1] == '-') {
|
221
|
+
BUFPUTSL(ob, "–");
|
222
|
+
return 1;
|
223
|
+
}
|
224
|
+
|
225
|
+
bufputc(ob, text[0]);
|
226
|
+
return 0;
|
227
|
+
}
|
228
|
+
|
229
|
+
// Converts " etc.
|
230
|
+
static size_t
|
231
|
+
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
232
|
+
{
|
233
|
+
if (size >= 6 && memcmp(text, """, 6) == 0) {
|
234
|
+
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
|
235
|
+
return 5;
|
236
|
+
}
|
237
|
+
|
238
|
+
int len = squote_len(text, size);
|
239
|
+
if (len > 0) {
|
240
|
+
return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
|
241
|
+
}
|
242
|
+
|
243
|
+
if (size >= 4 && memcmp(text, "�", 4) == 0)
|
244
|
+
return 3;
|
245
|
+
|
246
|
+
bufputc(ob, '&');
|
247
|
+
return 0;
|
248
|
+
}
|
249
|
+
|
250
|
+
// Converts "..." to ellipsis
|
251
|
+
static size_t
|
252
|
+
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
253
|
+
{
|
254
|
+
if (size >= 3 && text[1] == '.' && text[2] == '.') {
|
255
|
+
BUFPUTSL(ob, "…");
|
256
|
+
return 2;
|
257
|
+
}
|
258
|
+
|
259
|
+
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
|
260
|
+
BUFPUTSL(ob, "…");
|
261
|
+
return 4;
|
262
|
+
}
|
263
|
+
|
264
|
+
bufputc(ob, text[0]);
|
265
|
+
return 0;
|
266
|
+
}
|
267
|
+
|
268
|
+
// Converts `` to opening double quote
|
269
|
+
static size_t
|
270
|
+
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
271
|
+
{
|
272
|
+
if (size >= 2 && text[1] == '`') {
|
273
|
+
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
274
|
+
return 1;
|
275
|
+
}
|
276
|
+
|
277
|
+
bufputc(ob, text[0]);
|
278
|
+
return 0;
|
279
|
+
}
|
280
|
+
|
281
|
+
// Converts 1/2, 1/4, 3/4
|
282
|
+
static size_t
|
283
|
+
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
284
|
+
{
|
285
|
+
if (word_boundary(previous_char) && size >= 3) {
|
286
|
+
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
|
287
|
+
if (size == 3 || word_boundary(text[3])) {
|
288
|
+
BUFPUTSL(ob, "½");
|
289
|
+
return 2;
|
290
|
+
}
|
291
|
+
}
|
292
|
+
|
293
|
+
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
|
294
|
+
if (size == 3 || word_boundary(text[3]) ||
|
295
|
+
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
|
296
|
+
BUFPUTSL(ob, "¼");
|
297
|
+
return 2;
|
298
|
+
}
|
299
|
+
}
|
300
|
+
|
301
|
+
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
|
302
|
+
if (size == 3 || word_boundary(text[3]) ||
|
303
|
+
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
|
304
|
+
BUFPUTSL(ob, "¾");
|
305
|
+
return 2;
|
306
|
+
}
|
307
|
+
}
|
308
|
+
}
|
309
|
+
|
310
|
+
bufputc(ob, text[0]);
|
311
|
+
return 0;
|
312
|
+
}
|
313
|
+
|
314
|
+
// Converts " to left or right double quote
|
315
|
+
static size_t
|
316
|
+
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
317
|
+
{
|
318
|
+
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
|
319
|
+
BUFPUTSL(ob, """);
|
320
|
+
|
321
|
+
return 0;
|
322
|
+
}
|
323
|
+
|
324
|
+
static size_t
|
325
|
+
smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
326
|
+
{
|
327
|
+
static const char *skip_tags[] = {
|
328
|
+
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
|
329
|
+
};
|
330
|
+
static const size_t skip_tags_count = 8;
|
331
|
+
|
332
|
+
size_t tag, i = 0;
|
333
|
+
|
334
|
+
while (i < size && text[i] != '>')
|
335
|
+
i++;
|
336
|
+
|
337
|
+
for (tag = 0; tag < skip_tags_count; ++tag) {
|
338
|
+
if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
|
339
|
+
break;
|
340
|
+
}
|
341
|
+
|
342
|
+
if (tag < skip_tags_count) {
|
343
|
+
for (;;) {
|
344
|
+
while (i < size && text[i] != '<')
|
345
|
+
i++;
|
346
|
+
|
347
|
+
if (i == size)
|
348
|
+
break;
|
349
|
+
|
350
|
+
if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
|
351
|
+
break;
|
352
|
+
|
353
|
+
i++;
|
354
|
+
}
|
355
|
+
|
356
|
+
while (i < size && text[i] != '>')
|
357
|
+
i++;
|
358
|
+
}
|
359
|
+
|
360
|
+
bufput(ob, text, i + 1);
|
361
|
+
return i;
|
362
|
+
}
|
363
|
+
|
364
|
+
static size_t
|
365
|
+
smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
366
|
+
{
|
367
|
+
if (size < 2)
|
368
|
+
return 0;
|
369
|
+
|
370
|
+
switch (text[1]) {
|
371
|
+
case '\\':
|
372
|
+
case '"':
|
373
|
+
case '\'':
|
374
|
+
case '.':
|
375
|
+
case '-':
|
376
|
+
case '`':
|
377
|
+
bufputc(ob, text[1]);
|
378
|
+
return 1;
|
379
|
+
|
380
|
+
default:
|
381
|
+
bufputc(ob, '\\');
|
382
|
+
return 0;
|
383
|
+
}
|
384
|
+
}
|
385
|
+
|
386
|
+
#if 0
|
387
|
+
static struct {
|
388
|
+
uint8_t c0;
|
389
|
+
const uint8_t *pattern;
|
390
|
+
const uint8_t *entity;
|
391
|
+
int skip;
|
392
|
+
} smartypants_subs[] = {
|
393
|
+
{ '\'', "'s>", "’", 0 },
|
394
|
+
{ '\'', "'t>", "’", 0 },
|
395
|
+
{ '\'', "'re>", "’", 0 },
|
396
|
+
{ '\'', "'ll>", "’", 0 },
|
397
|
+
{ '\'', "'ve>", "’", 0 },
|
398
|
+
{ '\'', "'m>", "’", 0 },
|
399
|
+
{ '\'', "'d>", "’", 0 },
|
400
|
+
{ '-', "--", "—", 1 },
|
401
|
+
{ '-', "<->", "–", 0 },
|
402
|
+
{ '.', "...", "…", 2 },
|
403
|
+
{ '.', ". . .", "…", 4 },
|
404
|
+
{ '(', "(c)", "©", 2 },
|
405
|
+
{ '(', "(r)", "®", 2 },
|
406
|
+
{ '(', "(tm)", "™", 3 },
|
407
|
+
{ '3', "<3/4>", "¾", 2 },
|
408
|
+
{ '3', "<3/4ths>", "¾", 2 },
|
409
|
+
{ '1', "<1/2>", "½", 2 },
|
410
|
+
{ '1', "<1/4>", "¼", 2 },
|
411
|
+
{ '1', "<1/4th>", "¼", 2 },
|
412
|
+
{ '&', "�", 0, 3 },
|
413
|
+
};
|
414
|
+
#endif
|
415
|
+
|
416
|
+
void
|
417
|
+
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
|
418
|
+
{
|
419
|
+
size_t i;
|
420
|
+
struct smartypants_data smrt = {0, 0};
|
421
|
+
|
422
|
+
if (!text)
|
423
|
+
return;
|
424
|
+
|
425
|
+
bufgrow(ob, size);
|
426
|
+
|
427
|
+
for (i = 0; i < size; ++i) {
|
428
|
+
size_t org;
|
429
|
+
uint8_t action = 0;
|
430
|
+
|
431
|
+
org = i;
|
432
|
+
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
|
433
|
+
i++;
|
434
|
+
|
435
|
+
if (i > org)
|
436
|
+
bufput(ob, text + org, i - org);
|
437
|
+
|
438
|
+
if (i < size) {
|
439
|
+
i += smartypants_cb_ptrs[(int)action]
|
440
|
+
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
|
441
|
+
}
|
442
|
+
}
|
443
|
+
}
|
444
|
+
|
445
|
+
|