greenmat 3.2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/COPYING +14 -0
- data/Gemfile +9 -0
- data/README.md +36 -0
- data/Rakefile +62 -0
- data/bin/greenmat +7 -0
- data/ext/greenmat/autolink.c +296 -0
- data/ext/greenmat/autolink.h +49 -0
- data/ext/greenmat/buffer.c +196 -0
- data/ext/greenmat/buffer.h +83 -0
- data/ext/greenmat/extconf.rb +6 -0
- data/ext/greenmat/gm_markdown.c +161 -0
- data/ext/greenmat/gm_render.c +534 -0
- data/ext/greenmat/greenmat.h +30 -0
- data/ext/greenmat/houdini.h +29 -0
- data/ext/greenmat/houdini_href_e.c +108 -0
- data/ext/greenmat/houdini_html_e.c +83 -0
- data/ext/greenmat/html.c +826 -0
- data/ext/greenmat/html.h +84 -0
- data/ext/greenmat/html_blocks.h +229 -0
- data/ext/greenmat/html_smartypants.c +445 -0
- data/ext/greenmat/markdown.c +2912 -0
- data/ext/greenmat/markdown.h +138 -0
- data/ext/greenmat/stack.c +62 -0
- data/ext/greenmat/stack.h +26 -0
- data/greenmat.gemspec +72 -0
- data/lib/greenmat.rb +92 -0
- data/lib/greenmat/compat.rb +73 -0
- data/lib/greenmat/render_man.rb +65 -0
- data/lib/greenmat/render_strip.rb +48 -0
- data/test/benchmark.rb +24 -0
- data/test/custom_render_test.rb +28 -0
- data/test/greenmat_compat_test.rb +38 -0
- data/test/html5_test.rb +69 -0
- data/test/html_render_test.rb +241 -0
- data/test/html_toc_render_test.rb +76 -0
- data/test/markdown_test.rb +337 -0
- data/test/pathological_inputs_test.rb +34 -0
- data/test/safe_render_test.rb +36 -0
- data/test/smarty_html_test.rb +45 -0
- data/test/smarty_pants_test.rb +48 -0
- data/test/stripdown_render_test.rb +40 -0
- data/test/test_helper.rb +33 -0
- metadata +158 -0
data/ext/greenmat/html.h
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef HTML_H__
|
18
|
+
#define HTML_H__
|
19
|
+
|
20
|
+
#include "markdown.h"
|
21
|
+
#include "buffer.h"
|
22
|
+
#include <stdlib.h>
|
23
|
+
|
24
|
+
#ifdef __cplusplus
|
25
|
+
extern "C" {
|
26
|
+
#endif
|
27
|
+
|
28
|
+
struct html_renderopt {
|
29
|
+
struct {
|
30
|
+
int current_level;
|
31
|
+
int level_offset;
|
32
|
+
int nesting_level;
|
33
|
+
} toc_data;
|
34
|
+
|
35
|
+
unsigned int flags;
|
36
|
+
|
37
|
+
/* extra callbacks */
|
38
|
+
void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
|
39
|
+
};
|
40
|
+
|
41
|
+
typedef enum {
|
42
|
+
HTML_SKIP_HTML = (1 << 0),
|
43
|
+
HTML_SKIP_STYLE = (1 << 1),
|
44
|
+
HTML_SKIP_IMAGES = (1 << 2),
|
45
|
+
HTML_SKIP_LINKS = (1 << 3),
|
46
|
+
HTML_EXPAND_TABS = (1 << 4),
|
47
|
+
HTML_SAFELINK = (1 << 5),
|
48
|
+
HTML_TOC = (1 << 6),
|
49
|
+
HTML_HARD_WRAP = (1 << 7),
|
50
|
+
HTML_USE_XHTML = (1 << 8),
|
51
|
+
HTML_ESCAPE = (1 << 9),
|
52
|
+
HTML_PRETTIFY = (1 << 10),
|
53
|
+
} html_render_mode;
|
54
|
+
|
55
|
+
typedef enum {
|
56
|
+
HTML_TAG_NONE = 0,
|
57
|
+
HTML_TAG_OPEN,
|
58
|
+
HTML_TAG_CLOSE,
|
59
|
+
} html_tag;
|
60
|
+
|
61
|
+
int
|
62
|
+
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
|
63
|
+
|
64
|
+
extern void
|
65
|
+
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
|
66
|
+
|
67
|
+
extern void
|
68
|
+
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
|
69
|
+
|
70
|
+
extern void
|
71
|
+
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
|
72
|
+
|
73
|
+
/* header method used internally in Greenmat */
|
74
|
+
char *header_anchor(const struct buf *buffer);
|
75
|
+
|
76
|
+
#define STRIPPED_CHARS " -&+$,/:;=?@\"#{}|^~[]`\\*()%.!'"
|
77
|
+
#define STRIPPED_CHAR(x) (strchr(STRIPPED_CHARS, x) != NULL)
|
78
|
+
|
79
|
+
#ifdef __cplusplus
|
80
|
+
}
|
81
|
+
#endif
|
82
|
+
|
83
|
+
#endif
|
84
|
+
|
@@ -0,0 +1,229 @@
|
|
1
|
+
/* C code produced by gperf version 3.0.4 */
|
2
|
+
/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
|
3
|
+
/* See http://git.io/RN0ncw for the list of recognized elements */
|
4
|
+
/* Computed positions: -k'1-2' */
|
5
|
+
|
6
|
+
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
7
|
+
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
8
|
+
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
9
|
+
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
10
|
+
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
11
|
+
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
12
|
+
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
13
|
+
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
14
|
+
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
15
|
+
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
16
|
+
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
17
|
+
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
18
|
+
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
19
|
+
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
20
|
+
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
21
|
+
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
22
|
+
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
23
|
+
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
24
|
+
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
25
|
+
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
26
|
+
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
27
|
+
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
28
|
+
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
29
|
+
/* The character set is not based on ISO-646. */
|
30
|
+
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
31
|
+
#endif
|
32
|
+
|
33
|
+
/* maximum key range = 67, duplicates = 0 */
|
34
|
+
|
35
|
+
#ifndef GPERF_DOWNCASE
|
36
|
+
#define GPERF_DOWNCASE 1
|
37
|
+
static unsigned char gperf_downcase[256] =
|
38
|
+
{
|
39
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
40
|
+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
|
41
|
+
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
|
42
|
+
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
43
|
+
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
|
44
|
+
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
45
|
+
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
|
46
|
+
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
|
47
|
+
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
|
48
|
+
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
49
|
+
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
50
|
+
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
51
|
+
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
|
52
|
+
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
|
53
|
+
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
|
54
|
+
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
55
|
+
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
56
|
+
255
|
57
|
+
};
|
58
|
+
#endif
|
59
|
+
|
60
|
+
#ifndef GPERF_CASE_STRNCMP
|
61
|
+
#define GPERF_CASE_STRNCMP 1
|
62
|
+
static int
|
63
|
+
gperf_case_strncmp (s1, s2, n)
|
64
|
+
register const char *s1;
|
65
|
+
register const char *s2;
|
66
|
+
register unsigned int n;
|
67
|
+
{
|
68
|
+
for (; n > 0;)
|
69
|
+
{
|
70
|
+
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
|
71
|
+
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
|
72
|
+
if (c1 != 0 && c1 == c2)
|
73
|
+
{
|
74
|
+
n--;
|
75
|
+
continue;
|
76
|
+
}
|
77
|
+
return (int)c1 - (int)c2;
|
78
|
+
}
|
79
|
+
return 0;
|
80
|
+
}
|
81
|
+
#endif
|
82
|
+
|
83
|
+
#ifdef __GNUC__
|
84
|
+
__inline
|
85
|
+
#else
|
86
|
+
#ifdef __cplusplus
|
87
|
+
inline
|
88
|
+
#endif
|
89
|
+
#endif
|
90
|
+
static unsigned int
|
91
|
+
hash_block_tag (str, len)
|
92
|
+
register const char *str;
|
93
|
+
register unsigned int len;
|
94
|
+
{
|
95
|
+
static const unsigned char asso_values[] =
|
96
|
+
{
|
97
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
98
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
99
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
100
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
101
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
102
|
+
55, 50, 45, 40, 35, 30, 68, 68, 68, 68,
|
103
|
+
68, 68, 68, 68, 68, 15, 10, 15, 15, 15,
|
104
|
+
0, 20, 10, 10, 5, 68, 68, 0, 20, 25,
|
105
|
+
0, 68, 68, 0, 25, 0, 15, 68, 68, 68,
|
106
|
+
68, 68, 68, 68, 68, 68, 68, 15, 10, 15,
|
107
|
+
15, 15, 0, 20, 10, 10, 5, 68, 68, 0,
|
108
|
+
20, 25, 0, 68, 68, 0, 25, 0, 15, 68,
|
109
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
110
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
111
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
112
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
113
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
114
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
115
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
116
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
117
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
118
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
119
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
120
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
121
|
+
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
|
122
|
+
68, 68, 68, 68, 68, 68, 68
|
123
|
+
};
|
124
|
+
register int hval = len;
|
125
|
+
|
126
|
+
switch (hval)
|
127
|
+
{
|
128
|
+
default:
|
129
|
+
hval += asso_values[(unsigned char)str[1]+1];
|
130
|
+
/*FALLTHROUGH*/
|
131
|
+
case 1:
|
132
|
+
hval += asso_values[(unsigned char)str[0]];
|
133
|
+
break;
|
134
|
+
}
|
135
|
+
return hval;
|
136
|
+
}
|
137
|
+
|
138
|
+
#ifdef __GNUC__
|
139
|
+
__inline
|
140
|
+
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
|
141
|
+
__attribute__ ((__gnu_inline__))
|
142
|
+
#endif
|
143
|
+
#endif
|
144
|
+
const char *
|
145
|
+
find_block_tag (str, len)
|
146
|
+
register const char *str;
|
147
|
+
register unsigned int len;
|
148
|
+
{
|
149
|
+
enum
|
150
|
+
{
|
151
|
+
TOTAL_KEYWORDS = 41,
|
152
|
+
MIN_WORD_LENGTH = 1,
|
153
|
+
MAX_WORD_LENGTH = 10,
|
154
|
+
MIN_HASH_VALUE = 1,
|
155
|
+
MAX_HASH_VALUE = 67
|
156
|
+
};
|
157
|
+
|
158
|
+
static const char * const wordlist[] =
|
159
|
+
{
|
160
|
+
"",
|
161
|
+
"p",
|
162
|
+
"ul",
|
163
|
+
"pre",
|
164
|
+
"form",
|
165
|
+
"style",
|
166
|
+
"footer",
|
167
|
+
"section",
|
168
|
+
"", "", "",
|
169
|
+
"figure",
|
170
|
+
"hr",
|
171
|
+
"fieldset",
|
172
|
+
"math",
|
173
|
+
"figcaption",
|
174
|
+
"header",
|
175
|
+
"dl",
|
176
|
+
"del",
|
177
|
+
"",
|
178
|
+
"blockquote",
|
179
|
+
"script",
|
180
|
+
"article",
|
181
|
+
"div",
|
182
|
+
"",
|
183
|
+
"video",
|
184
|
+
"hgroup",
|
185
|
+
"ol",
|
186
|
+
"noscript",
|
187
|
+
"", "",
|
188
|
+
"canvas",
|
189
|
+
"dd",
|
190
|
+
"nav",
|
191
|
+
"abbr",
|
192
|
+
"audio",
|
193
|
+
"iframe",
|
194
|
+
"address",
|
195
|
+
"ins",
|
196
|
+
"",
|
197
|
+
"table",
|
198
|
+
"",
|
199
|
+
"h6",
|
200
|
+
"", "",
|
201
|
+
"aside",
|
202
|
+
"output",
|
203
|
+
"h5",
|
204
|
+
"", "",
|
205
|
+
"tfoot",
|
206
|
+
"",
|
207
|
+
"h4",
|
208
|
+
"", "", "", "",
|
209
|
+
"h3",
|
210
|
+
"", "", "", "",
|
211
|
+
"h2",
|
212
|
+
"", "", "", "",
|
213
|
+
"h1"
|
214
|
+
};
|
215
|
+
|
216
|
+
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
217
|
+
{
|
218
|
+
register int key = hash_block_tag (str, len);
|
219
|
+
|
220
|
+
if (key <= MAX_HASH_VALUE && key >= 0)
|
221
|
+
{
|
222
|
+
register const char *s = wordlist[key];
|
223
|
+
|
224
|
+
if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
|
225
|
+
return s;
|
226
|
+
}
|
227
|
+
}
|
228
|
+
return 0;
|
229
|
+
}
|
@@ -0,0 +1,445 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include "buffer.h"
|
18
|
+
#include "html.h"
|
19
|
+
|
20
|
+
#include <string.h>
|
21
|
+
#include <stdlib.h>
|
22
|
+
#include <stdio.h>
|
23
|
+
#include <ctype.h>
|
24
|
+
|
25
|
+
#if defined(_WIN32)
|
26
|
+
#define snprintf _snprintf
|
27
|
+
#endif
|
28
|
+
|
29
|
+
struct smartypants_data {
|
30
|
+
int in_squote;
|
31
|
+
int in_dquote;
|
32
|
+
};
|
33
|
+
|
34
|
+
static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
35
|
+
static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
36
|
+
static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
37
|
+
static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
38
|
+
static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
39
|
+
static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
40
|
+
static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
41
|
+
static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
42
|
+
static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
43
|
+
static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
44
|
+
|
45
|
+
static size_t (*smartypants_cb_ptrs[])
|
46
|
+
(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
|
47
|
+
{
|
48
|
+
NULL, /* 0 */
|
49
|
+
smartypants_cb__dash, /* 1 */
|
50
|
+
smartypants_cb__parens, /* 2 */
|
51
|
+
smartypants_cb__squote, /* 3 */
|
52
|
+
smartypants_cb__dquote, /* 4 */
|
53
|
+
smartypants_cb__amp, /* 5 */
|
54
|
+
smartypants_cb__period, /* 6 */
|
55
|
+
smartypants_cb__number, /* 7 */
|
56
|
+
smartypants_cb__ltag, /* 8 */
|
57
|
+
smartypants_cb__backtick, /* 9 */
|
58
|
+
smartypants_cb__escape, /* 10 */
|
59
|
+
};
|
60
|
+
|
61
|
+
static const uint8_t smartypants_cb_chars[] = {
|
62
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
63
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
64
|
+
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
|
65
|
+
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
66
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
67
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
68
|
+
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
69
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
70
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
71
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
72
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
73
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
74
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
75
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
76
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
77
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
78
|
+
};
|
79
|
+
|
80
|
+
static inline int
|
81
|
+
word_boundary(uint8_t c)
|
82
|
+
{
|
83
|
+
return c == 0 || isspace(c) || ispunct(c);
|
84
|
+
}
|
85
|
+
|
86
|
+
// If 'text' begins with any kind of single quote (e.g. "'" or "'" etc.),
|
87
|
+
// returns the length of the sequence of characters that makes up the single-
|
88
|
+
// quote. Otherwise, returns zero.
|
89
|
+
static size_t
|
90
|
+
squote_len(const uint8_t *text, size_t size)
|
91
|
+
{
|
92
|
+
static char* single_quote_list[] = { "'", "'", "'", "'", NULL };
|
93
|
+
char** p;
|
94
|
+
|
95
|
+
for (p = single_quote_list; *p; ++p) {
|
96
|
+
size_t len = strlen(*p);
|
97
|
+
if (size >= len && memcmp(text, *p, len) == 0) {
|
98
|
+
return len;
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
return 0;
|
103
|
+
}
|
104
|
+
|
105
|
+
// Converts " or ' at very beginning or end of a word to left or right quote
|
106
|
+
static int
|
107
|
+
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
|
108
|
+
{
|
109
|
+
char ent[8];
|
110
|
+
|
111
|
+
if (*is_open && !word_boundary(next_char))
|
112
|
+
return 0;
|
113
|
+
|
114
|
+
if (!(*is_open) && !word_boundary(previous_char))
|
115
|
+
return 0;
|
116
|
+
|
117
|
+
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
|
118
|
+
*is_open = !(*is_open);
|
119
|
+
bufputs(ob, ent);
|
120
|
+
return 1;
|
121
|
+
}
|
122
|
+
|
123
|
+
// Converts ' to left or right single quote; but the initial ' might be in
|
124
|
+
// different forms, e.g. ' or ' or '.
|
125
|
+
// 'squote_text' points to the original single quote, and 'squote_size' is its length.
|
126
|
+
// 'text' points at the last character of the single-quote, e.g. ' or ;
|
127
|
+
static size_t
|
128
|
+
smartypants_squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
|
129
|
+
const uint8_t *squote_text, size_t squote_size)
|
130
|
+
{
|
131
|
+
if (size >= 2) {
|
132
|
+
uint8_t t1 = tolower(text[1]);
|
133
|
+
int next_squote_len = squote_len(text+1, size-1);
|
134
|
+
|
135
|
+
// convert '' to “ or ”
|
136
|
+
if (next_squote_len > 0) {
|
137
|
+
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
|
138
|
+
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
|
139
|
+
return next_squote_len;
|
140
|
+
}
|
141
|
+
|
142
|
+
// trailing single quotes: students', tryin'
|
143
|
+
if (word_boundary(t1)) {
|
144
|
+
BUFPUTSL(ob, "’");
|
145
|
+
return 0;
|
146
|
+
}
|
147
|
+
|
148
|
+
// Tom's, isn't, I'm, I'd
|
149
|
+
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
|
150
|
+
(size == 3 || word_boundary(text[2]))) {
|
151
|
+
BUFPUTSL(ob, "’");
|
152
|
+
return 0;
|
153
|
+
}
|
154
|
+
|
155
|
+
// you're, you'll, you've
|
156
|
+
if (size >= 3) {
|
157
|
+
uint8_t t2 = tolower(text[2]);
|
158
|
+
|
159
|
+
if (((t1 == 'r' && t2 == 'e') ||
|
160
|
+
(t1 == 'l' && t2 == 'l') ||
|
161
|
+
(t1 == 'v' && t2 == 'e')) &&
|
162
|
+
(size == 4 || word_boundary(text[3]))) {
|
163
|
+
BUFPUTSL(ob, "’");
|
164
|
+
return 0;
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
|
170
|
+
return 0;
|
171
|
+
|
172
|
+
bufput(ob, squote_text, squote_size);
|
173
|
+
return 0;
|
174
|
+
}
|
175
|
+
|
176
|
+
// Converts ' to left or right single quote.
|
177
|
+
static size_t
|
178
|
+
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
179
|
+
{
|
180
|
+
return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
|
181
|
+
}
|
182
|
+
|
183
|
+
// Converts (c), (r), (tm)
|
184
|
+
static size_t
|
185
|
+
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
186
|
+
{
|
187
|
+
if (size >= 3) {
|
188
|
+
uint8_t t1 = tolower(text[1]);
|
189
|
+
uint8_t t2 = tolower(text[2]);
|
190
|
+
|
191
|
+
if (t1 == 'c' && t2 == ')') {
|
192
|
+
BUFPUTSL(ob, "©");
|
193
|
+
return 2;
|
194
|
+
}
|
195
|
+
|
196
|
+
if (t1 == 'r' && t2 == ')') {
|
197
|
+
BUFPUTSL(ob, "®");
|
198
|
+
return 2;
|
199
|
+
}
|
200
|
+
|
201
|
+
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
|
202
|
+
BUFPUTSL(ob, "™");
|
203
|
+
return 3;
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
bufputc(ob, text[0]);
|
208
|
+
return 0;
|
209
|
+
}
|
210
|
+
|
211
|
+
// Converts "--" to em-dash, etc.
|
212
|
+
static size_t
|
213
|
+
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
214
|
+
{
|
215
|
+
if (size >= 3 && text[1] == '-' && text[2] == '-') {
|
216
|
+
BUFPUTSL(ob, "—");
|
217
|
+
return 2;
|
218
|
+
}
|
219
|
+
|
220
|
+
if (size >= 2 && text[1] == '-') {
|
221
|
+
BUFPUTSL(ob, "–");
|
222
|
+
return 1;
|
223
|
+
}
|
224
|
+
|
225
|
+
bufputc(ob, text[0]);
|
226
|
+
return 0;
|
227
|
+
}
|
228
|
+
|
229
|
+
// Converts " etc.
|
230
|
+
static size_t
|
231
|
+
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
232
|
+
{
|
233
|
+
if (size >= 6 && memcmp(text, """, 6) == 0) {
|
234
|
+
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
|
235
|
+
return 5;
|
236
|
+
}
|
237
|
+
|
238
|
+
int len = squote_len(text, size);
|
239
|
+
if (len > 0) {
|
240
|
+
return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
|
241
|
+
}
|
242
|
+
|
243
|
+
if (size >= 4 && memcmp(text, "�", 4) == 0)
|
244
|
+
return 3;
|
245
|
+
|
246
|
+
bufputc(ob, '&');
|
247
|
+
return 0;
|
248
|
+
}
|
249
|
+
|
250
|
+
// Converts "..." to ellipsis
|
251
|
+
static size_t
|
252
|
+
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
253
|
+
{
|
254
|
+
if (size >= 3 && text[1] == '.' && text[2] == '.') {
|
255
|
+
BUFPUTSL(ob, "…");
|
256
|
+
return 2;
|
257
|
+
}
|
258
|
+
|
259
|
+
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
|
260
|
+
BUFPUTSL(ob, "…");
|
261
|
+
return 4;
|
262
|
+
}
|
263
|
+
|
264
|
+
bufputc(ob, text[0]);
|
265
|
+
return 0;
|
266
|
+
}
|
267
|
+
|
268
|
+
// Converts `` to opening double quote
|
269
|
+
static size_t
|
270
|
+
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
271
|
+
{
|
272
|
+
if (size >= 2 && text[1] == '`') {
|
273
|
+
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
274
|
+
return 1;
|
275
|
+
}
|
276
|
+
|
277
|
+
bufputc(ob, text[0]);
|
278
|
+
return 0;
|
279
|
+
}
|
280
|
+
|
281
|
+
// Converts 1/2, 1/4, 3/4
|
282
|
+
static size_t
|
283
|
+
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
284
|
+
{
|
285
|
+
if (word_boundary(previous_char) && size >= 3) {
|
286
|
+
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
|
287
|
+
if (size == 3 || word_boundary(text[3])) {
|
288
|
+
BUFPUTSL(ob, "½");
|
289
|
+
return 2;
|
290
|
+
}
|
291
|
+
}
|
292
|
+
|
293
|
+
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
|
294
|
+
if (size == 3 || word_boundary(text[3]) ||
|
295
|
+
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
|
296
|
+
BUFPUTSL(ob, "¼");
|
297
|
+
return 2;
|
298
|
+
}
|
299
|
+
}
|
300
|
+
|
301
|
+
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
|
302
|
+
if (size == 3 || word_boundary(text[3]) ||
|
303
|
+
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
|
304
|
+
BUFPUTSL(ob, "¾");
|
305
|
+
return 2;
|
306
|
+
}
|
307
|
+
}
|
308
|
+
}
|
309
|
+
|
310
|
+
bufputc(ob, text[0]);
|
311
|
+
return 0;
|
312
|
+
}
|
313
|
+
|
314
|
+
// Converts " to left or right double quote
|
315
|
+
static size_t
|
316
|
+
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
317
|
+
{
|
318
|
+
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
|
319
|
+
BUFPUTSL(ob, """);
|
320
|
+
|
321
|
+
return 0;
|
322
|
+
}
|
323
|
+
|
324
|
+
static size_t
|
325
|
+
smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
326
|
+
{
|
327
|
+
static const char *skip_tags[] = {
|
328
|
+
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
|
329
|
+
};
|
330
|
+
static const size_t skip_tags_count = 8;
|
331
|
+
|
332
|
+
size_t tag, i = 0;
|
333
|
+
|
334
|
+
while (i < size && text[i] != '>')
|
335
|
+
i++;
|
336
|
+
|
337
|
+
for (tag = 0; tag < skip_tags_count; ++tag) {
|
338
|
+
if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
|
339
|
+
break;
|
340
|
+
}
|
341
|
+
|
342
|
+
if (tag < skip_tags_count) {
|
343
|
+
for (;;) {
|
344
|
+
while (i < size && text[i] != '<')
|
345
|
+
i++;
|
346
|
+
|
347
|
+
if (i == size)
|
348
|
+
break;
|
349
|
+
|
350
|
+
if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
|
351
|
+
break;
|
352
|
+
|
353
|
+
i++;
|
354
|
+
}
|
355
|
+
|
356
|
+
while (i < size && text[i] != '>')
|
357
|
+
i++;
|
358
|
+
}
|
359
|
+
|
360
|
+
bufput(ob, text, i + 1);
|
361
|
+
return i;
|
362
|
+
}
|
363
|
+
|
364
|
+
static size_t
|
365
|
+
smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
366
|
+
{
|
367
|
+
if (size < 2)
|
368
|
+
return 0;
|
369
|
+
|
370
|
+
switch (text[1]) {
|
371
|
+
case '\\':
|
372
|
+
case '"':
|
373
|
+
case '\'':
|
374
|
+
case '.':
|
375
|
+
case '-':
|
376
|
+
case '`':
|
377
|
+
bufputc(ob, text[1]);
|
378
|
+
return 1;
|
379
|
+
|
380
|
+
default:
|
381
|
+
bufputc(ob, '\\');
|
382
|
+
return 0;
|
383
|
+
}
|
384
|
+
}
|
385
|
+
|
386
|
+
#if 0
|
387
|
+
static struct {
|
388
|
+
uint8_t c0;
|
389
|
+
const uint8_t *pattern;
|
390
|
+
const uint8_t *entity;
|
391
|
+
int skip;
|
392
|
+
} smartypants_subs[] = {
|
393
|
+
{ '\'', "'s>", "’", 0 },
|
394
|
+
{ '\'', "'t>", "’", 0 },
|
395
|
+
{ '\'', "'re>", "’", 0 },
|
396
|
+
{ '\'', "'ll>", "’", 0 },
|
397
|
+
{ '\'', "'ve>", "’", 0 },
|
398
|
+
{ '\'', "'m>", "’", 0 },
|
399
|
+
{ '\'', "'d>", "’", 0 },
|
400
|
+
{ '-', "--", "—", 1 },
|
401
|
+
{ '-', "<->", "–", 0 },
|
402
|
+
{ '.', "...", "…", 2 },
|
403
|
+
{ '.', ". . .", "…", 4 },
|
404
|
+
{ '(', "(c)", "©", 2 },
|
405
|
+
{ '(', "(r)", "®", 2 },
|
406
|
+
{ '(', "(tm)", "™", 3 },
|
407
|
+
{ '3', "<3/4>", "¾", 2 },
|
408
|
+
{ '3', "<3/4ths>", "¾", 2 },
|
409
|
+
{ '1', "<1/2>", "½", 2 },
|
410
|
+
{ '1', "<1/4>", "¼", 2 },
|
411
|
+
{ '1', "<1/4th>", "¼", 2 },
|
412
|
+
{ '&', "�", 0, 3 },
|
413
|
+
};
|
414
|
+
#endif
|
415
|
+
|
416
|
+
void
|
417
|
+
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
|
418
|
+
{
|
419
|
+
size_t i;
|
420
|
+
struct smartypants_data smrt = {0, 0};
|
421
|
+
|
422
|
+
if (!text)
|
423
|
+
return;
|
424
|
+
|
425
|
+
bufgrow(ob, size);
|
426
|
+
|
427
|
+
for (i = 0; i < size; ++i) {
|
428
|
+
size_t org;
|
429
|
+
uint8_t action = 0;
|
430
|
+
|
431
|
+
org = i;
|
432
|
+
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
|
433
|
+
i++;
|
434
|
+
|
435
|
+
if (i > org)
|
436
|
+
bufput(ob, text + org, i - org);
|
437
|
+
|
438
|
+
if (i < size) {
|
439
|
+
i += smartypants_cb_ptrs[(int)action]
|
440
|
+
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
|
441
|
+
}
|
442
|
+
}
|
443
|
+
}
|
444
|
+
|
445
|
+
|