walters 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +28 -0
- data/README.md +20 -0
- data/Rakefile +6 -0
- data/ext/Rakefile +5 -0
- data/ext/houdini/buffer.c +249 -0
- data/ext/houdini/buffer.h +113 -0
- data/ext/houdini/houdini.h +44 -0
- data/ext/houdini/houdini_href_e.c +115 -0
- data/ext/houdini/houdini_html_e.c +90 -0
- data/ext/houdini/houdini_html_u.c +122 -0
- data/ext/houdini/houdini_js_e.c +90 -0
- data/ext/houdini/houdini_js_u.c +60 -0
- data/ext/houdini/houdini_uri_e.c +101 -0
- data/ext/houdini/houdini_uri_u.c +65 -0
- data/ext/houdini/houdini_xml_e.c +136 -0
- data/ext/houdini/html_unescape.h +754 -0
- data/ext/walters.cpp +105 -0
- data/ext/walters_buffer.cpp +113 -0
- data/ext/x86_64-darwin/__xni_walters.cpp +450 -0
- data/ext/x86_64-darwin/walters.h +43 -0
- data/lib/walters.rb +2 -0
- data/lib/walters/version.rb +3 -0
- data/lib/walters/walters.rb +87 -0
- data/walters.gemspec +21 -0
- metadata +134 -0
@@ -0,0 +1,101 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
static const char URL_SAFE[] = {
|
8
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
9
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
10
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
|
11
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
12
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
13
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
14
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
15
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
16
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
18
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
};
|
25
|
+
|
26
|
+
static const char URI_SAFE[] = {
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
30
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
31
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
32
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
33
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
34
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
43
|
+
};
|
44
|
+
|
45
|
+
static int
|
46
|
+
escape(gh_buf *ob, const uint8_t *src, size_t size, int is_url)
|
47
|
+
{
|
48
|
+
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
49
|
+
const char *safe_table = is_url ? URL_SAFE : URI_SAFE;
|
50
|
+
|
51
|
+
size_t i = 0, org;
|
52
|
+
uint8_t hex_str[3];
|
53
|
+
|
54
|
+
hex_str[0] = '%';
|
55
|
+
|
56
|
+
while (i < size) {
|
57
|
+
org = i;
|
58
|
+
while (i < size && safe_table[src[i]] != 0)
|
59
|
+
i++;
|
60
|
+
|
61
|
+
if (likely(i > org)) {
|
62
|
+
if (unlikely(org == 0)) {
|
63
|
+
if (i >= size)
|
64
|
+
return 0;
|
65
|
+
|
66
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
67
|
+
}
|
68
|
+
|
69
|
+
gh_buf_put(ob, src + org, i - org);
|
70
|
+
}
|
71
|
+
|
72
|
+
/* escaping */
|
73
|
+
if (i >= size)
|
74
|
+
break;
|
75
|
+
|
76
|
+
if (src[i] == ' ' && is_url) {
|
77
|
+
gh_buf_putc(ob, '+');
|
78
|
+
} else {
|
79
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
80
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
81
|
+
gh_buf_put(ob, hex_str, 3);
|
82
|
+
}
|
83
|
+
|
84
|
+
i++;
|
85
|
+
}
|
86
|
+
|
87
|
+
return 1;
|
88
|
+
}
|
89
|
+
|
90
|
+
int
|
91
|
+
houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size)
|
92
|
+
{
|
93
|
+
return escape(ob, src, size, 0);
|
94
|
+
}
|
95
|
+
|
96
|
+
int
|
97
|
+
houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size)
|
98
|
+
{
|
99
|
+
return escape(ob, src, size, 1);
|
100
|
+
}
|
101
|
+
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define hex2c(c) ((c | 32) % 39 - 9)
|
8
|
+
|
9
|
+
static int
|
10
|
+
unescape(gh_buf *ob, const uint8_t *src, size_t size, int is_url)
|
11
|
+
{
|
12
|
+
size_t i = 0, org;
|
13
|
+
|
14
|
+
while (i < size) {
|
15
|
+
org = i;
|
16
|
+
while (i < size && src[i] != '%')
|
17
|
+
i++;
|
18
|
+
|
19
|
+
if (likely(i > org)) {
|
20
|
+
if (unlikely(org == 0)) {
|
21
|
+
if (i >= size && !is_url)
|
22
|
+
return 0;
|
23
|
+
|
24
|
+
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
25
|
+
}
|
26
|
+
|
27
|
+
gh_buf_put(ob, src + org, i - org);
|
28
|
+
}
|
29
|
+
|
30
|
+
/* escaping */
|
31
|
+
if (i >= size)
|
32
|
+
break;
|
33
|
+
|
34
|
+
i++;
|
35
|
+
|
36
|
+
if (i + 1 < size && _isxdigit(src[i]) && _isxdigit(src[i + 1])) {
|
37
|
+
unsigned char new_char = (hex2c(src[i]) << 4) + hex2c(src[i + 1]);
|
38
|
+
gh_buf_putc(ob, new_char);
|
39
|
+
i += 2;
|
40
|
+
} else {
|
41
|
+
gh_buf_putc(ob, '%');
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
if (is_url) {
|
46
|
+
char *find = (char *)gh_buf_cstr(ob);
|
47
|
+
while ((find = strchr(find, '+')) != NULL)
|
48
|
+
*find = ' ';
|
49
|
+
}
|
50
|
+
|
51
|
+
return 1;
|
52
|
+
}
|
53
|
+
|
54
|
+
int
|
55
|
+
houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size)
|
56
|
+
{
|
57
|
+
return unescape(ob, src, size, 0);
|
58
|
+
}
|
59
|
+
|
60
|
+
int
|
61
|
+
houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size)
|
62
|
+
{
|
63
|
+
return unescape(ob, src, size, 1);
|
64
|
+
}
|
65
|
+
|
@@ -0,0 +1,136 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
/**
|
8
|
+
* & --> &
|
9
|
+
* < --> <
|
10
|
+
* > --> >
|
11
|
+
* " --> "
|
12
|
+
* ' --> '
|
13
|
+
*/
|
14
|
+
static const char *LOOKUP_CODES[] = {
|
15
|
+
"", /* reserved: use literal single character */
|
16
|
+
"", /* unused */
|
17
|
+
"", /* reserved: 2 character UTF-8 */
|
18
|
+
"", /* reserved: 3 character UTF-8 */
|
19
|
+
"", /* reserved: 4 character UTF-8 */
|
20
|
+
"?", /* invalid UTF-8 character */
|
21
|
+
""",
|
22
|
+
"&",
|
23
|
+
"'",
|
24
|
+
"<",
|
25
|
+
">"
|
26
|
+
};
|
27
|
+
|
28
|
+
static const char CODE_INVALID = 5;
|
29
|
+
|
30
|
+
static const char XML_LOOKUP_TABLE[] = {
|
31
|
+
/* ASCII: 0xxxxxxx */
|
32
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 0, 5, 5,
|
33
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
34
|
+
0, 0, 6, 0, 0, 0, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0,10, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
+
|
41
|
+
/* Invalid UTF-8 char start: 10xxxxxx */
|
42
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
43
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
44
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
45
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
46
|
+
|
47
|
+
/* Multibyte UTF-8 */
|
48
|
+
|
49
|
+
/* 2 bytes: 110xxxxx */
|
50
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
51
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
52
|
+
|
53
|
+
/* 3 bytes: 1110xxxx */
|
54
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
55
|
+
|
56
|
+
/* 4 bytes: 11110xxx */
|
57
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
58
|
+
|
59
|
+
/* Invalid UTF-8: 11111xxx */
|
60
|
+
5, 5, 5, 5, 5, 5, 5, 5,
|
61
|
+
};
|
62
|
+
|
63
|
+
int
|
64
|
+
houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size)
|
65
|
+
{
|
66
|
+
size_t i = 0;
|
67
|
+
unsigned char code = 0;
|
68
|
+
|
69
|
+
gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
|
70
|
+
|
71
|
+
while (i < size) {
|
72
|
+
size_t start, end;
|
73
|
+
|
74
|
+
start = end = i;
|
75
|
+
|
76
|
+
while (i < size) {
|
77
|
+
unsigned int byte;
|
78
|
+
|
79
|
+
byte = src[i++];
|
80
|
+
code = XML_LOOKUP_TABLE[byte];
|
81
|
+
|
82
|
+
if (!code) {
|
83
|
+
/* single character used literally */
|
84
|
+
} else if (code >= CODE_INVALID) {
|
85
|
+
break; /* insert lookup code string */
|
86
|
+
} else if (code > size - end) {
|
87
|
+
code = CODE_INVALID; /* truncated UTF-8 character */
|
88
|
+
break;
|
89
|
+
} else {
|
90
|
+
unsigned int chr = byte & (0xff >> code);
|
91
|
+
|
92
|
+
while (--code) {
|
93
|
+
byte = src[i++];
|
94
|
+
if ((byte & 0xc0) != 0x80) {
|
95
|
+
code = CODE_INVALID;
|
96
|
+
break;
|
97
|
+
}
|
98
|
+
chr = (chr << 6) + (byte & 0x3f);
|
99
|
+
}
|
100
|
+
|
101
|
+
switch (i - end) {
|
102
|
+
case 2:
|
103
|
+
if (chr < 0x80)
|
104
|
+
code = CODE_INVALID;
|
105
|
+
break;
|
106
|
+
case 3:
|
107
|
+
if (chr < 0x800 ||
|
108
|
+
(chr > 0xd7ff && chr < 0xe000) ||
|
109
|
+
chr > 0xfffd)
|
110
|
+
code = CODE_INVALID;
|
111
|
+
break;
|
112
|
+
case 4:
|
113
|
+
if (chr < 0x10000 || chr > 0x10ffff)
|
114
|
+
code = CODE_INVALID;
|
115
|
+
break;
|
116
|
+
default:
|
117
|
+
break;
|
118
|
+
}
|
119
|
+
if (code == CODE_INVALID)
|
120
|
+
break;
|
121
|
+
}
|
122
|
+
end = i;
|
123
|
+
}
|
124
|
+
|
125
|
+
if (end > start)
|
126
|
+
gh_buf_put(ob, src + start, end - start);
|
127
|
+
|
128
|
+
/* escaping */
|
129
|
+
if (end >= size)
|
130
|
+
break;
|
131
|
+
|
132
|
+
gh_buf_puts(ob, LOOKUP_CODES[code]);
|
133
|
+
}
|
134
|
+
|
135
|
+
return 1;
|
136
|
+
}
|
@@ -0,0 +1,754 @@
|
|
1
|
+
/* C code produced by gperf version 3.0.3 */
|
2
|
+
/* Command-line: gperf -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m100 html_unescape.gperf */
|
3
|
+
/* Computed positions: -k'1-3,5,$' */
|
4
|
+
|
5
|
+
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
6
|
+
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
7
|
+
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
8
|
+
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
9
|
+
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
10
|
+
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
11
|
+
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
12
|
+
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
13
|
+
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
14
|
+
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
15
|
+
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
16
|
+
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
17
|
+
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
18
|
+
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
19
|
+
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
20
|
+
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
21
|
+
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
22
|
+
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
23
|
+
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
24
|
+
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
25
|
+
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
26
|
+
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
27
|
+
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
28
|
+
/* The character set is not based on ISO-646. */
|
29
|
+
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#line 1 "html_unescape.gperf"
|
33
|
+
struct html_ent {
|
34
|
+
const char *entity;
|
35
|
+
unsigned char utf8_len;
|
36
|
+
unsigned char utf8[3];
|
37
|
+
};
|
38
|
+
|
39
|
+
#define TOTAL_KEYWORDS 252
|
40
|
+
#define MIN_WORD_LENGTH 2
|
41
|
+
#define MAX_WORD_LENGTH 8
|
42
|
+
#define MIN_HASH_VALUE 10
|
43
|
+
#define MAX_HASH_VALUE 418
|
44
|
+
/* maximum key range = 409, duplicates = 0 */
|
45
|
+
|
46
|
+
#ifdef __GNUC__
|
47
|
+
__inline
|
48
|
+
#else
|
49
|
+
#ifdef __cplusplus
|
50
|
+
inline
|
51
|
+
#endif
|
52
|
+
#endif
|
53
|
+
static unsigned int
|
54
|
+
hash_entity (str, len)
|
55
|
+
register const char *str;
|
56
|
+
register unsigned int len;
|
57
|
+
{
|
58
|
+
static const unsigned short asso_values[] =
|
59
|
+
{
|
60
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
61
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
62
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
63
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
64
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 12,
|
65
|
+
29, 24, 1, 419, 419, 419, 419, 419, 419, 419,
|
66
|
+
419, 419, 419, 419, 419, 45, 137, 28, 17, 87,
|
67
|
+
3, 16, 8, 103, 419, 7, 11, 1, 5, 76,
|
68
|
+
116, 419, 1, 9, 16, 86, 419, 419, 9, 5,
|
69
|
+
2, 419, 419, 419, 419, 419, 419, 2, 28, 26,
|
70
|
+
4, 3, 109, 87, 141, 4, 197, 1, 36, 85,
|
71
|
+
12, 1, 1, 189, 55, 17, 6, 34, 61, 10,
|
72
|
+
5, 110, 11, 1, 419, 419, 419, 419, 419, 419,
|
73
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
74
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
75
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
76
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
77
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
78
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
79
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
80
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
81
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
82
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
83
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
84
|
+
419, 419, 419, 419, 419, 419, 419, 419, 419, 419,
|
85
|
+
419, 419, 419, 419, 419, 419, 419
|
86
|
+
};
|
87
|
+
register int hval = len;
|
88
|
+
|
89
|
+
switch (hval)
|
90
|
+
{
|
91
|
+
default:
|
92
|
+
hval += asso_values[(unsigned char)str[4]];
|
93
|
+
/*FALLTHROUGH*/
|
94
|
+
case 4:
|
95
|
+
case 3:
|
96
|
+
hval += asso_values[(unsigned char)str[2]];
|
97
|
+
/*FALLTHROUGH*/
|
98
|
+
case 2:
|
99
|
+
hval += asso_values[(unsigned char)str[1]+1];
|
100
|
+
/*FALLTHROUGH*/
|
101
|
+
case 1:
|
102
|
+
hval += asso_values[(unsigned char)str[0]];
|
103
|
+
break;
|
104
|
+
}
|
105
|
+
return hval + asso_values[(unsigned char)str[len - 1]];
|
106
|
+
}
|
107
|
+
|
108
|
+
#ifdef __GNUC__
|
109
|
+
__inline
|
110
|
+
#ifdef __GNUC_STDC_INLINE__
|
111
|
+
__attribute__ ((__gnu_inline__))
|
112
|
+
#endif
|
113
|
+
#endif
|
114
|
+
const struct html_ent *
|
115
|
+
find_entity (str, len)
|
116
|
+
register const char *str;
|
117
|
+
register unsigned int len;
|
118
|
+
{
|
119
|
+
static const unsigned char lengthtable[] =
|
120
|
+
{
|
121
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0,
|
122
|
+
3, 0, 3, 4, 3, 3, 3, 0, 5, 6, 4, 5, 4, 4,
|
123
|
+
3, 5, 4, 4, 5, 5, 6, 0, 5, 4, 6, 5, 5, 3,
|
124
|
+
6, 3, 3, 5, 0, 0, 5, 5, 0, 5, 6, 6, 0, 0,
|
125
|
+
6, 0, 0, 3, 4, 0, 3, 6, 3, 6, 5, 5, 5, 5,
|
126
|
+
6, 6, 6, 6, 6, 2, 6, 5, 2, 6, 0, 6, 0, 3,
|
127
|
+
4, 6, 4, 0, 0, 0, 4, 7, 3, 0, 4, 4, 6, 5,
|
128
|
+
2, 5, 5, 5, 2, 6, 6, 3, 0, 4, 8, 2, 5, 4,
|
129
|
+
4, 4, 6, 3, 4, 0, 0, 0, 5, 3, 6, 4, 4, 5,
|
130
|
+
2, 6, 3, 2, 4, 3, 4, 3, 5, 4, 6, 3, 5, 5,
|
131
|
+
5, 5, 4, 5, 5, 6, 4, 6, 5, 4, 2, 5, 5, 0,
|
132
|
+
0, 6, 6, 4, 5, 6, 5, 6, 4, 6, 0, 4, 7, 4,
|
133
|
+
5, 6, 4, 5, 6, 0, 0, 6, 4, 0, 4, 6, 3, 0,
|
134
|
+
2, 6, 5, 6, 4, 4, 4, 4, 4, 4, 3, 0, 0, 5,
|
135
|
+
6, 4, 4, 7, 0, 2, 5, 0, 2, 5, 4, 6, 2, 5,
|
136
|
+
5, 6, 2, 4, 0, 2, 5, 0, 0, 5, 4, 6, 0, 6,
|
137
|
+
4, 0, 3, 5, 0, 4, 0, 4, 0, 5, 6, 5, 0, 0,
|
138
|
+
5, 5, 6, 5, 5, 6, 3, 5, 3, 0, 0, 0, 5, 3,
|
139
|
+
0, 0, 5, 4, 0, 5, 4, 0, 5, 4, 4, 5, 7, 5,
|
140
|
+
0, 6, 6, 6, 6, 0, 4, 4, 0, 6, 0, 0, 0, 5,
|
141
|
+
0, 6, 6, 4, 0, 4, 0, 4, 0, 4, 3, 0, 0, 0,
|
142
|
+
5, 7, 4, 6, 0, 6, 6, 0, 5, 0, 5, 0, 4, 0,
|
143
|
+
4, 0, 5, 6, 0, 3, 0, 5, 0, 0, 0, 2, 0, 0,
|
144
|
+
3, 3, 0, 5, 5, 5, 0, 0, 0, 0, 0, 5, 0, 0,
|
145
|
+
0, 0, 0, 0, 0, 3, 6, 0, 0, 0, 0, 7, 7, 0,
|
146
|
+
0, 0, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
147
|
+
0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 5,
|
148
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
|
149
|
+
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
150
|
+
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 6
|
151
|
+
};
|
152
|
+
static const struct html_ent wordlist[] =
|
153
|
+
{
|
154
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
155
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
156
|
+
{(char*)0}, {(char*)0},
|
157
|
+
#line 132 "html_unescape.gperf"
|
158
|
+
{"Rho", 2, { 0xCE, 0xA1 }},
|
159
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
160
|
+
#line 226 "html_unescape.gperf"
|
161
|
+
{"and", 3, { 0xE2, 0x88, 0xA7 }},
|
162
|
+
{(char*)0},
|
163
|
+
#line 161 "html_unescape.gperf"
|
164
|
+
{"phi", 2, { 0xCF, 0x86 }},
|
165
|
+
#line 148 "html_unescape.gperf"
|
166
|
+
{"iota", 2, { 0xCE, 0xB9 }},
|
167
|
+
#line 163 "html_unescape.gperf"
|
168
|
+
{"psi", 2, { 0xCF, 0x88 }},
|
169
|
+
#line 8 "html_unescape.gperf"
|
170
|
+
{"amp", 1, { 0x26 }},
|
171
|
+
#line 230 "html_unescape.gperf"
|
172
|
+
{"int", 3, { 0xE2, 0x88, 0xAB }},
|
173
|
+
{(char*)0},
|
174
|
+
#line 147 "html_unescape.gperf"
|
175
|
+
{"theta", 2, { 0xCE, 0xB8 }},
|
176
|
+
#line 231 "html_unescape.gperf"
|
177
|
+
{"there4", 3, { 0xE2, 0x88, 0xB4 }},
|
178
|
+
#line 223 "html_unescape.gperf"
|
179
|
+
{"prop", 3, { 0xE2, 0x88, 0x9D }},
|
180
|
+
#line 164 "html_unescape.gperf"
|
181
|
+
{"omega", 2, { 0xCF, 0x89 }},
|
182
|
+
#line 167 "html_unescape.gperf"
|
183
|
+
{"ensp", 3, { 0xE2, 0x80, 0x82 }},
|
184
|
+
#line 218 "html_unescape.gperf"
|
185
|
+
{"prod", 3, { 0xE2, 0x88, 0x8F }},
|
186
|
+
#line 24 "html_unescape.gperf"
|
187
|
+
{"not", 2, { 0xC2, 0xAC }},
|
188
|
+
#line 194 "html_unescape.gperf"
|
189
|
+
{"image", 3, { 0xE2, 0x84, 0x91 }},
|
190
|
+
#line 215 "html_unescape.gperf"
|
191
|
+
{"isin", 3, { 0xE2, 0x88, 0x88 }},
|
192
|
+
#line 247 "html_unescape.gperf"
|
193
|
+
{"sdot", 3, { 0xE2, 0x8B, 0x85 }},
|
194
|
+
#line 123 "html_unescape.gperf"
|
195
|
+
{"Theta", 2, { 0xCE, 0x98 }},
|
196
|
+
#line 187 "html_unescape.gperf"
|
197
|
+
{"prime", 3, { 0xE2, 0x80, 0xB2 }},
|
198
|
+
#line 110 "html_unescape.gperf"
|
199
|
+
{"Scaron", 2, { 0xC5, 0xA0 }},
|
200
|
+
{(char*)0},
|
201
|
+
#line 197 "html_unescape.gperf"
|
202
|
+
{"trade", 3, { 0xE2, 0x84, 0xA2 }},
|
203
|
+
#line 168 "html_unescape.gperf"
|
204
|
+
{"emsp", 3, { 0xE2, 0x80, 0x83 }},
|
205
|
+
#line 169 "html_unescape.gperf"
|
206
|
+
{"thinsp", 3, { 0xE2, 0x80, 0x89 }},
|
207
|
+
#line 149 "html_unescape.gperf"
|
208
|
+
{"kappa", 2, { 0xCE, 0xBA }},
|
209
|
+
#line 106 "html_unescape.gperf"
|
210
|
+
{"thorn", 2, { 0xC3, 0xBE }},
|
211
|
+
#line 162 "html_unescape.gperf"
|
212
|
+
{"chi", 2, { 0xCF, 0x87 }},
|
213
|
+
#line 111 "html_unescape.gperf"
|
214
|
+
{"scaron", 2, { 0xC5, 0xA1 }},
|
215
|
+
#line 137 "html_unescape.gperf"
|
216
|
+
{"Chi", 2, { 0xCE, 0xA7 }},
|
217
|
+
#line 146 "html_unescape.gperf"
|
218
|
+
{"eta", 2, { 0xCE, 0xB7 }},
|
219
|
+
#line 125 "html_unescape.gperf"
|
220
|
+
{"Kappa", 2, { 0xCE, 0x9A }},
|
221
|
+
{(char*)0}, {(char*)0},
|
222
|
+
#line 216 "html_unescape.gperf"
|
223
|
+
{"notin", 3, { 0xE2, 0x88, 0x89 }},
|
224
|
+
#line 15 "html_unescape.gperf"
|
225
|
+
{"pound", 2, { 0xC2, 0xA3 }},
|
226
|
+
{(char*)0},
|
227
|
+
#line 32 "html_unescape.gperf"
|
228
|
+
{"acute", 2, { 0xC2, 0xB4 }},
|
229
|
+
#line 97 "html_unescape.gperf"
|
230
|
+
{"otilde", 2, { 0xC3, 0xB5 }},
|
231
|
+
#line 79 "html_unescape.gperf"
|
232
|
+
{"atilde", 2, { 0xC3, 0xA3 }},
|
233
|
+
{(char*)0}, {(char*)0},
|
234
|
+
#line 61 "html_unescape.gperf"
|
235
|
+
{"Ntilde", 2, { 0xC3, 0x91 }},
|
236
|
+
{(char*)0}, {(char*)0},
|
237
|
+
#line 228 "html_unescape.gperf"
|
238
|
+
{"cap", 3, { 0xE2, 0x88, 0xA9 }},
|
239
|
+
#line 12 "html_unescape.gperf"
|
240
|
+
{"nbsp", 2, { 0xC2, 0xA0 }},
|
241
|
+
{(char*)0},
|
242
|
+
#line 254 "html_unescape.gperf"
|
243
|
+
{"loz", 3, { 0xE2, 0x97, 0x8A }},
|
244
|
+
#line 93 "html_unescape.gperf"
|
245
|
+
{"ntilde", 2, { 0xC3, 0xB1 }},
|
246
|
+
#line 156 "html_unescape.gperf"
|
247
|
+
{"rho", 2, { 0xCF, 0x81 }},
|
248
|
+
#line 245 "html_unescape.gperf"
|
249
|
+
{"otimes", 3, { 0xE2, 0x8A, 0x97 }},
|
250
|
+
#line 96 "html_unescape.gperf"
|
251
|
+
{"ocirc", 2, { 0xC3, 0xB4 }},
|
252
|
+
#line 78 "html_unescape.gperf"
|
253
|
+
{"acirc", 2, { 0xC3, 0xA2 }},
|
254
|
+
#line 86 "html_unescape.gperf"
|
255
|
+
{"ecirc", 2, { 0xC3, 0xAA }},
|
256
|
+
#line 90 "html_unescape.gperf"
|
257
|
+
{"icirc", 2, { 0xC3, 0xAE }},
|
258
|
+
#line 95 "html_unescape.gperf"
|
259
|
+
{"oacute", 2, { 0xC3, 0xB3 }},
|
260
|
+
#line 77 "html_unescape.gperf"
|
261
|
+
{"aacute", 2, { 0xC3, 0xA1 }},
|
262
|
+
#line 85 "html_unescape.gperf"
|
263
|
+
{"eacute", 2, { 0xC3, 0xA9 }},
|
264
|
+
#line 89 "html_unescape.gperf"
|
265
|
+
{"iacute", 2, { 0xC3, 0xAD }},
|
266
|
+
#line 73 "html_unescape.gperf"
|
267
|
+
{"Yacute", 2, { 0xC3, 0x9D }},
|
268
|
+
#line 227 "html_unescape.gperf"
|
269
|
+
{"or", 3, { 0xE2, 0x88, 0xA8 }},
|
270
|
+
#line 221 "html_unescape.gperf"
|
271
|
+
{"lowast", 3, { 0xE2, 0x88, 0x97 }},
|
272
|
+
#line 214 "html_unescape.gperf"
|
273
|
+
{"nabla", 3, { 0xE2, 0x88, 0x87 }},
|
274
|
+
#line 10 "html_unescape.gperf"
|
275
|
+
{"lt", 1, { 0x3C }},
|
276
|
+
#line 83 "html_unescape.gperf"
|
277
|
+
{"ccedil", 2, { 0xC3, 0xA7 }},
|
278
|
+
{(char*)0},
|
279
|
+
#line 51 "html_unescape.gperf"
|
280
|
+
{"Ccedil", 2, { 0xC3, 0x87 }},
|
281
|
+
{(char*)0},
|
282
|
+
#line 240 "html_unescape.gperf"
|
283
|
+
{"sup", 3, { 0xE2, 0x8A, 0x83 }},
|
284
|
+
#line 241 "html_unescape.gperf"
|
285
|
+
{"nsub", 3, { 0xE2, 0x8A, 0x84 }},
|
286
|
+
#line 189 "html_unescape.gperf"
|
287
|
+
{"lsaquo", 3, { 0xE2, 0x80, 0xB9 }},
|
288
|
+
#line 243 "html_unescape.gperf"
|
289
|
+
{"supe", 3, { 0xE2, 0x8A, 0x87 }},
|
290
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
291
|
+
#line 34 "html_unescape.gperf"
|
292
|
+
{"para", 2, { 0xC2, 0xB6 }},
|
293
|
+
#line 154 "html_unescape.gperf"
|
294
|
+
{"omicron", 2, { 0xCE, 0xBF }},
|
295
|
+
#line 229 "html_unescape.gperf"
|
296
|
+
{"cup", 3, { 0xE2, 0x88, 0xAA }},
|
297
|
+
{(char*)0},
|
298
|
+
#line 211 "html_unescape.gperf"
|
299
|
+
{"part", 3, { 0xE2, 0x88, 0x82 }},
|
300
|
+
#line 37 "html_unescape.gperf"
|
301
|
+
{"sup1", 2, { 0xC2, 0xB9 }},
|
302
|
+
#line 47 "html_unescape.gperf"
|
303
|
+
{"Atilde", 2, { 0xC3, 0x83 }},
|
304
|
+
#line 140 "html_unescape.gperf"
|
305
|
+
{"alpha", 2, { 0xCE, 0xB1 }},
|
306
|
+
#line 127 "html_unescape.gperf"
|
307
|
+
{"Mu", 2, { 0xCE, 0x9C }},
|
308
|
+
#line 103 "html_unescape.gperf"
|
309
|
+
{"ucirc", 2, { 0xC3, 0xBB }},
|
310
|
+
#line 139 "html_unescape.gperf"
|
311
|
+
{"Omega", 2, { 0xCE, 0xA9 }},
|
312
|
+
#line 191 "html_unescape.gperf"
|
313
|
+
{"oline", 3, { 0xE2, 0x80, 0xBE }},
|
314
|
+
#line 128 "html_unescape.gperf"
|
315
|
+
{"Nu", 2, { 0xCE, 0x9D }},
|
316
|
+
#line 102 "html_unescape.gperf"
|
317
|
+
{"uacute", 2, { 0xC3, 0xBA }},
|
318
|
+
#line 190 "html_unescape.gperf"
|
319
|
+
{"rsaquo", 3, { 0xE2, 0x80, 0xBA }},
|
320
|
+
#line 159 "html_unescape.gperf"
|
321
|
+
{"tau", 2, { 0xCF, 0x84 }},
|
322
|
+
{(char*)0},
|
323
|
+
#line 31 "html_unescape.gperf"
|
324
|
+
{"sup3", 2, { 0xC2, 0xB3 }},
|
325
|
+
#line 165 "html_unescape.gperf"
|
326
|
+
{"thetasym", 2, { 0xCF, 0x91 }},
|
327
|
+
#line 152 "html_unescape.gperf"
|
328
|
+
{"nu", 2, { 0xCE, 0xBD }},
|
329
|
+
#line 46 "html_unescape.gperf"
|
330
|
+
{"Acirc", 2, { 0xC3, 0x82 }},
|
331
|
+
#line 38 "html_unescape.gperf"
|
332
|
+
{"ordm", 2, { 0xC2, 0xBA }},
|
333
|
+
#line 30 "html_unescape.gperf"
|
334
|
+
{"sup2", 2, { 0xC2, 0xB2 }},
|
335
|
+
#line 242 "html_unescape.gperf"
|
336
|
+
{"sube", 3, { 0xE2, 0x8A, 0x86 }},
|
337
|
+
#line 45 "html_unescape.gperf"
|
338
|
+
{"Aacute", 2, { 0xC3, 0x81 }},
|
339
|
+
#line 134 "html_unescape.gperf"
|
340
|
+
{"Tau", 2, { 0xCE, 0xA4 }},
|
341
|
+
#line 124 "html_unescape.gperf"
|
342
|
+
{"Iota", 2, { 0xCE, 0x99 }},
|
343
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
344
|
+
#line 248 "html_unescape.gperf"
|
345
|
+
{"lceil", 3, { 0xE2, 0x8C, 0x88 }},
|
346
|
+
#line 20 "html_unescape.gperf"
|
347
|
+
{"uml", 2, { 0xC2, 0xA8 }},
|
348
|
+
#line 43 "html_unescape.gperf"
|
349
|
+
{"iquest", 2, { 0xC2, 0xBF }},
|
350
|
+
#line 121 "html_unescape.gperf"
|
351
|
+
{"Zeta", 2, { 0xCE, 0x96 }},
|
352
|
+
#line 193 "html_unescape.gperf"
|
353
|
+
{"euro", 3, { 0xE2, 0x82, 0xAC }},
|
354
|
+
#line 234 "html_unescape.gperf"
|
355
|
+
{"asymp", 3, { 0xE2, 0x89, 0x88 }},
|
356
|
+
#line 235 "html_unescape.gperf"
|
357
|
+
{"ne", 3, { 0xE2, 0x89, 0xA0 }},
|
358
|
+
#line 65 "html_unescape.gperf"
|
359
|
+
{"Otilde", 2, { 0xC3, 0x95 }},
|
360
|
+
#line 122 "html_unescape.gperf"
|
361
|
+
{"Eta", 2, { 0xCE, 0x97 }},
|
362
|
+
#line 11 "html_unescape.gperf"
|
363
|
+
{"gt", 1, { 0x3E }},
|
364
|
+
#line 233 "html_unescape.gperf"
|
365
|
+
{"cong", 3, { 0xE2, 0x89, 0x85 }},
|
366
|
+
#line 136 "html_unescape.gperf"
|
367
|
+
{"Phi", 2, { 0xCE, 0xA6 }},
|
368
|
+
#line 145 "html_unescape.gperf"
|
369
|
+
{"zeta", 2, { 0xCE, 0xB6 }},
|
370
|
+
#line 138 "html_unescape.gperf"
|
371
|
+
{"Psi", 2, { 0xCE, 0xA8 }},
|
372
|
+
#line 212 "html_unescape.gperf"
|
373
|
+
{"exist", 3, { 0xE2, 0x88, 0x83 }},
|
374
|
+
#line 22 "html_unescape.gperf"
|
375
|
+
{"ordf", 2, { 0xC2, 0xAA }},
|
376
|
+
#line 126 "html_unescape.gperf"
|
377
|
+
{"Lambda", 2, { 0xCE, 0x9B }},
|
378
|
+
#line 239 "html_unescape.gperf"
|
379
|
+
{"sub", 3, { 0xE2, 0x8A, 0x82 }},
|
380
|
+
#line 118 "html_unescape.gperf"
|
381
|
+
{"Gamma", 2, { 0xCE, 0x93 }},
|
382
|
+
#line 249 "html_unescape.gperf"
|
383
|
+
{"rceil", 3, { 0xE2, 0x8C, 0x89 }},
|
384
|
+
#line 116 "html_unescape.gperf"
|
385
|
+
{"Alpha", 2, { 0xCE, 0x91 }},
|
386
|
+
#line 64 "html_unescape.gperf"
|
387
|
+
{"Ocirc", 2, { 0xC3, 0x94 }},
|
388
|
+
#line 21 "html_unescape.gperf"
|
389
|
+
{"copy", 2, { 0xC2, 0xA9 }},
|
390
|
+
#line 224 "html_unescape.gperf"
|
391
|
+
{"infin", 3, { 0xE2, 0x88, 0x9E }},
|
392
|
+
#line 222 "html_unescape.gperf"
|
393
|
+
{"radic", 3, { 0xE2, 0x88, 0x9A }},
|
394
|
+
#line 63 "html_unescape.gperf"
|
395
|
+
{"Oacute", 2, { 0xC3, 0x93 }},
|
396
|
+
#line 202 "html_unescape.gperf"
|
397
|
+
{"darr", 3, { 0xE2, 0x86, 0x93 }},
|
398
|
+
#line 40 "html_unescape.gperf"
|
399
|
+
{"frac14", 2, { 0xC2, 0xBC }},
|
400
|
+
#line 188 "html_unescape.gperf"
|
401
|
+
{"Prime", 3, { 0xE2, 0x80, 0xB3 }},
|
402
|
+
#line 141 "html_unescape.gperf"
|
403
|
+
{"beta", 2, { 0xCE, 0xB2 }},
|
404
|
+
#line 237 "html_unescape.gperf"
|
405
|
+
{"le", 3, { 0xE2, 0x89, 0xA4 }},
|
406
|
+
#line 71 "html_unescape.gperf"
|
407
|
+
{"Ucirc", 2, { 0xC3, 0x9B }},
|
408
|
+
#line 54 "html_unescape.gperf"
|
409
|
+
{"Ecirc", 2, { 0xC3, 0x8A }},
|
410
|
+
{(char*)0}, {(char*)0},
|
411
|
+
#line 70 "html_unescape.gperf"
|
412
|
+
{"Uacute", 2, { 0xC3, 0x9A }},
|
413
|
+
#line 53 "html_unescape.gperf"
|
414
|
+
{"Eacute", 2, { 0xC3, 0x89 }},
|
415
|
+
#line 14 "html_unescape.gperf"
|
416
|
+
{"cent", 2, { 0xC2, 0xA2 }},
|
417
|
+
#line 143 "html_unescape.gperf"
|
418
|
+
{"delta", 2, { 0xCE, 0xB4 }},
|
419
|
+
#line 42 "html_unescape.gperf"
|
420
|
+
{"frac34", 2, { 0xC2, 0xBE }},
|
421
|
+
#line 204 "html_unescape.gperf"
|
422
|
+
{"crarr", 3, { 0xE2, 0x86, 0xB5 }},
|
423
|
+
#line 150 "html_unescape.gperf"
|
424
|
+
{"lambda", 2, { 0xCE, 0xBB }},
|
425
|
+
#line 19 "html_unescape.gperf"
|
426
|
+
{"sect", 2, { 0xC2, 0xA7 }},
|
427
|
+
#line 16 "html_unescape.gperf"
|
428
|
+
{"curren", 2, { 0xC2, 0xA4 }},
|
429
|
+
{(char*)0},
|
430
|
+
#line 184 "html_unescape.gperf"
|
431
|
+
{"bull", 3, { 0xE2, 0x80, 0xA2 }},
|
432
|
+
#line 130 "html_unescape.gperf"
|
433
|
+
{"Omicron", 2, { 0xCE, 0x9F }},
|
434
|
+
#line 252 "html_unescape.gperf"
|
435
|
+
{"lang", 3, { 0xE2, 0x9F, 0xA8 }},
|
436
|
+
#line 58 "html_unescape.gperf"
|
437
|
+
{"Icirc", 2, { 0xC3, 0x8E }},
|
438
|
+
#line 18 "html_unescape.gperf"
|
439
|
+
{"brvbar", 2, { 0xC2, 0xA6 }},
|
440
|
+
#line 246 "html_unescape.gperf"
|
441
|
+
{"perp", 3, { 0xE2, 0x8A, 0xA5 }},
|
442
|
+
#line 119 "html_unescape.gperf"
|
443
|
+
{"Delta", 2, { 0xCE, 0x94 }},
|
444
|
+
#line 57 "html_unescape.gperf"
|
445
|
+
{"Iacute", 2, { 0xC3, 0x8D }},
|
446
|
+
{(char*)0}, {(char*)0},
|
447
|
+
#line 41 "html_unescape.gperf"
|
448
|
+
{"frac12", 2, { 0xC2, 0xBD }},
|
449
|
+
#line 200 "html_unescape.gperf"
|
450
|
+
{"uarr", 3, { 0xE2, 0x86, 0x91 }},
|
451
|
+
{(char*)0},
|
452
|
+
#line 199 "html_unescape.gperf"
|
453
|
+
{"larr", 3, { 0xE2, 0x86, 0x90 }},
|
454
|
+
#line 105 "html_unescape.gperf"
|
455
|
+
{"yacute", 2, { 0xC3, 0xBD }},
|
456
|
+
#line 225 "html_unescape.gperf"
|
457
|
+
{"ang", 3, { 0xE2, 0x88, 0xA0 }},
|
458
|
+
{(char*)0},
|
459
|
+
#line 151 "html_unescape.gperf"
|
460
|
+
{"mu", 2, { 0xCE, 0xBC }},
|
461
|
+
#line 182 "html_unescape.gperf"
|
462
|
+
{"dagger", 3, { 0xE2, 0x80, 0xA0 }},
|
463
|
+
#line 256 "html_unescape.gperf"
|
464
|
+
{"clubs", 3, { 0xE2, 0x99, 0xA3 }},
|
465
|
+
#line 195 "html_unescape.gperf"
|
466
|
+
{"weierp", 3, { 0xE2, 0x84, 0x98 }},
|
467
|
+
#line 253 "html_unescape.gperf"
|
468
|
+
{"rang", 3, { 0xE2, 0x9F, 0xA9 }},
|
469
|
+
#line 98 "html_unescape.gperf"
|
470
|
+
{"ouml", 2, { 0xC3, 0xB6 }},
|
471
|
+
#line 80 "html_unescape.gperf"
|
472
|
+
{"auml", 2, { 0xC3, 0xA4 }},
|
473
|
+
#line 87 "html_unescape.gperf"
|
474
|
+
{"euml", 2, { 0xC3, 0xAB }},
|
475
|
+
#line 91 "html_unescape.gperf"
|
476
|
+
{"iuml", 2, { 0xC3, 0xAF }},
|
477
|
+
#line 112 "html_unescape.gperf"
|
478
|
+
{"Yuml", 2, { 0xC5, 0xB8 }},
|
479
|
+
#line 60 "html_unescape.gperf"
|
480
|
+
{"ETH", 2, { 0xC3, 0x90 }},
|
481
|
+
{(char*)0}, {(char*)0},
|
482
|
+
#line 13 "html_unescape.gperf"
|
483
|
+
{"iexcl", 2, { 0xC2, 0xA1 }},
|
484
|
+
#line 183 "html_unescape.gperf"
|
485
|
+
{"Dagger", 3, { 0xE2, 0x80, 0xA1 }},
|
486
|
+
#line 201 "html_unescape.gperf"
|
487
|
+
{"rarr", 3, { 0xE2, 0x86, 0x92 }},
|
488
|
+
#line 27 "html_unescape.gperf"
|
489
|
+
{"macr", 2, { 0xC2, 0xAF }},
|
490
|
+
#line 198 "html_unescape.gperf"
|
491
|
+
{"alefsym", 3, { 0xE2, 0x84, 0xB5 }},
|
492
|
+
{(char*)0},
|
493
|
+
#line 238 "html_unescape.gperf"
|
494
|
+
{"ge", 3, { 0xE2, 0x89, 0xA5 }},
|
495
|
+
#line 81 "html_unescape.gperf"
|
496
|
+
{"aring", 2, { 0xC3, 0xA5 }},
|
497
|
+
{(char*)0},
|
498
|
+
#line 155 "html_unescape.gperf"
|
499
|
+
{"pi", 2, { 0xCF, 0x80 }},
|
500
|
+
#line 192 "html_unescape.gperf"
|
501
|
+
{"frasl", 3, { 0xE2, 0x81, 0x84 }},
|
502
|
+
#line 196 "html_unescape.gperf"
|
503
|
+
{"real", 3, { 0xE2, 0x84, 0x9C }},
|
504
|
+
#line 100 "html_unescape.gperf"
|
505
|
+
{"oslash", 2, { 0xC3, 0xB8 }},
|
506
|
+
#line 153 "html_unescape.gperf"
|
507
|
+
{"xi", 2, { 0xCE, 0xBE }},
|
508
|
+
#line 142 "html_unescape.gperf"
|
509
|
+
{"gamma", 2, { 0xCE, 0xB3 }},
|
510
|
+
#line 74 "html_unescape.gperf"
|
511
|
+
{"THORN", 2, { 0xC3, 0x9E }},
|
512
|
+
#line 186 "html_unescape.gperf"
|
513
|
+
{"permil", 3, { 0xE2, 0x80, 0xB0 }},
|
514
|
+
#line 129 "html_unescape.gperf"
|
515
|
+
{"Xi", 2, { 0xCE, 0x9E }},
|
516
|
+
#line 9 "html_unescape.gperf"
|
517
|
+
{"apos", 1, { 0x27 }},
|
518
|
+
{(char*)0},
|
519
|
+
#line 217 "html_unescape.gperf"
|
520
|
+
{"ni", 3, { 0xE2, 0x88, 0x8B }},
|
521
|
+
#line 36 "html_unescape.gperf"
|
522
|
+
{"cedil", 2, { 0xC2, 0xB8 }},
|
523
|
+
{(char*)0}, {(char*)0},
|
524
|
+
#line 236 "html_unescape.gperf"
|
525
|
+
{"equiv", 3, { 0xE2, 0x89, 0xA1 }},
|
526
|
+
#line 104 "html_unescape.gperf"
|
527
|
+
{"uuml", 2, { 0xC3, 0xBC }},
|
528
|
+
#line 250 "html_unescape.gperf"
|
529
|
+
{"lfloor", 3, { 0xE2, 0x8C, 0x8A }},
|
530
|
+
{(char*)0},
|
531
|
+
#line 29 "html_unescape.gperf"
|
532
|
+
{"plusmn", 2, { 0xC2, 0xB1 }},
|
533
|
+
#line 113 "html_unescape.gperf"
|
534
|
+
{"fnof", 2, { 0xC6, 0x92 }},
|
535
|
+
{(char*)0},
|
536
|
+
#line 172 "html_unescape.gperf"
|
537
|
+
{"lrm", 3, { 0xE2, 0x80, 0x8E }},
|
538
|
+
#line 181 "html_unescape.gperf"
|
539
|
+
{"bdquo", 3, { 0xE2, 0x80, 0x9E }},
|
540
|
+
{(char*)0},
|
541
|
+
#line 170 "html_unescape.gperf"
|
542
|
+
{"zwnj", 3, { 0xE2, 0x80, 0x8C }},
|
543
|
+
{(char*)0},
|
544
|
+
#line 48 "html_unescape.gperf"
|
545
|
+
{"Auml", 2, { 0xC3, 0x84 }},
|
546
|
+
{(char*)0},
|
547
|
+
#line 75 "html_unescape.gperf"
|
548
|
+
{"szlig", 2, { 0xC3, 0x9F }},
|
549
|
+
#line 255 "html_unescape.gperf"
|
550
|
+
{"spades", 3, { 0xE2, 0x99, 0xA0 }},
|
551
|
+
#line 179 "html_unescape.gperf"
|
552
|
+
{"ldquo", 3, { 0xE2, 0x80, 0x9C }},
|
553
|
+
{(char*)0}, {(char*)0},
|
554
|
+
#line 176 "html_unescape.gperf"
|
555
|
+
{"lsquo", 3, { 0xE2, 0x80, 0x98 }},
|
556
|
+
#line 178 "html_unescape.gperf"
|
557
|
+
{"sbquo", 3, { 0xE2, 0x80, 0x9A }},
|
558
|
+
#line 251 "html_unescape.gperf"
|
559
|
+
{"rfloor", 3, { 0xE2, 0x8C, 0x8B }},
|
560
|
+
#line 213 "html_unescape.gperf"
|
561
|
+
{"empty", 3, { 0xE2, 0x88, 0x85 }},
|
562
|
+
#line 258 "html_unescape.gperf"
|
563
|
+
{"diams", 3, { 0xE2, 0x99, 0xA6 }},
|
564
|
+
#line 210 "html_unescape.gperf"
|
565
|
+
{"forall", 3, { 0xE2, 0x88, 0x80 }},
|
566
|
+
#line 25 "html_unescape.gperf"
|
567
|
+
{"shy", 2, { 0xC2, 0xAD }},
|
568
|
+
#line 49 "html_unescape.gperf"
|
569
|
+
{"Aring", 2, { 0xC3, 0x85 }},
|
570
|
+
#line 17 "html_unescape.gperf"
|
571
|
+
{"yen", 2, { 0xC2, 0xA5 }},
|
572
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
573
|
+
#line 115 "html_unescape.gperf"
|
574
|
+
{"tilde", 2, { 0xCB, 0x9C }},
|
575
|
+
#line 219 "html_unescape.gperf"
|
576
|
+
{"sum", 3, { 0xE2, 0x88, 0x91 }},
|
577
|
+
{(char*)0}, {(char*)0},
|
578
|
+
#line 180 "html_unescape.gperf"
|
579
|
+
{"rdquo", 3, { 0xE2, 0x80, 0x9D }},
|
580
|
+
#line 208 "html_unescape.gperf"
|
581
|
+
{"dArr", 3, { 0xE2, 0x87, 0x93 }},
|
582
|
+
{(char*)0},
|
583
|
+
#line 177 "html_unescape.gperf"
|
584
|
+
{"rsquo", 3, { 0xE2, 0x80, 0x99 }},
|
585
|
+
#line 117 "html_unescape.gperf"
|
586
|
+
{"Beta", 2, { 0xCE, 0x92 }},
|
587
|
+
{(char*)0},
|
588
|
+
#line 23 "html_unescape.gperf"
|
589
|
+
{"laquo", 2, { 0xC2, 0xAB }},
|
590
|
+
#line 7 "html_unescape.gperf"
|
591
|
+
{"quot", 1, { 0x22 }},
|
592
|
+
#line 66 "html_unescape.gperf"
|
593
|
+
{"Ouml", 2, { 0xC3, 0x96 }},
|
594
|
+
#line 50 "html_unescape.gperf"
|
595
|
+
{"AElig", 2, { 0xC3, 0x86 }},
|
596
|
+
#line 144 "html_unescape.gperf"
|
597
|
+
{"epsilon", 2, { 0xCE, 0xB5 }},
|
598
|
+
#line 244 "html_unescape.gperf"
|
599
|
+
{"oplus", 3, { 0xE2, 0x8A, 0x95 }},
|
600
|
+
{(char*)0},
|
601
|
+
#line 94 "html_unescape.gperf"
|
602
|
+
{"ograve", 2, { 0xC3, 0xB2 }},
|
603
|
+
#line 76 "html_unescape.gperf"
|
604
|
+
{"agrave", 2, { 0xC3, 0xA0 }},
|
605
|
+
#line 84 "html_unescape.gperf"
|
606
|
+
{"egrave", 2, { 0xC3, 0xA8 }},
|
607
|
+
#line 88 "html_unescape.gperf"
|
608
|
+
{"igrave", 2, { 0xC3, 0xAC }},
|
609
|
+
{(char*)0},
|
610
|
+
#line 72 "html_unescape.gperf"
|
611
|
+
{"Uuml", 2, { 0xC3, 0x9C }},
|
612
|
+
#line 55 "html_unescape.gperf"
|
613
|
+
{"Euml", 2, { 0xC3, 0x8B }},
|
614
|
+
{(char*)0},
|
615
|
+
#line 99 "html_unescape.gperf"
|
616
|
+
{"divide", 2, { 0xC3, 0xB7 }},
|
617
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
618
|
+
#line 39 "html_unescape.gperf"
|
619
|
+
{"raquo", 2, { 0xC2, 0xBB }},
|
620
|
+
{(char*)0},
|
621
|
+
#line 257 "html_unescape.gperf"
|
622
|
+
{"hearts", 3, { 0xE2, 0x99, 0xA5 }},
|
623
|
+
#line 68 "html_unescape.gperf"
|
624
|
+
{"Oslash", 2, { 0xC3, 0x98 }},
|
625
|
+
#line 203 "html_unescape.gperf"
|
626
|
+
{"harr", 3, { 0xE2, 0x86, 0x94 }},
|
627
|
+
{(char*)0},
|
628
|
+
#line 206 "html_unescape.gperf"
|
629
|
+
{"uArr", 3, { 0xE2, 0x87, 0x91 }},
|
630
|
+
{(char*)0},
|
631
|
+
#line 205 "html_unescape.gperf"
|
632
|
+
{"lArr", 3, { 0xE2, 0x87, 0x90 }},
|
633
|
+
{(char*)0},
|
634
|
+
#line 59 "html_unescape.gperf"
|
635
|
+
{"Iuml", 2, { 0xC3, 0x8F }},
|
636
|
+
#line 28 "html_unescape.gperf"
|
637
|
+
{"deg", 2, { 0xC2, 0xB0 }},
|
638
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
639
|
+
#line 108 "html_unescape.gperf"
|
640
|
+
{"OElig", 2, { 0xC5, 0x92 }},
|
641
|
+
#line 160 "html_unescape.gperf"
|
642
|
+
{"upsilon", 2, { 0xCF, 0x85 }},
|
643
|
+
#line 107 "html_unescape.gperf"
|
644
|
+
{"yuml", 2, { 0xC3, 0xBF }},
|
645
|
+
#line 185 "html_unescape.gperf"
|
646
|
+
{"hellip", 3, { 0xE2, 0x80, 0xA6 }},
|
647
|
+
{(char*)0},
|
648
|
+
#line 35 "html_unescape.gperf"
|
649
|
+
{"middot", 2, { 0xC2, 0xB7 }},
|
650
|
+
#line 101 "html_unescape.gperf"
|
651
|
+
{"ugrave", 2, { 0xC3, 0xB9 }},
|
652
|
+
{(char*)0},
|
653
|
+
#line 133 "html_unescape.gperf"
|
654
|
+
{"Sigma", 2, { 0xCE, 0xA3 }},
|
655
|
+
{(char*)0},
|
656
|
+
#line 174 "html_unescape.gperf"
|
657
|
+
{"ndash", 3, { 0xE2, 0x80, 0x93 }},
|
658
|
+
{(char*)0},
|
659
|
+
#line 207 "html_unescape.gperf"
|
660
|
+
{"rArr", 3, { 0xE2, 0x87, 0x92 }},
|
661
|
+
{(char*)0},
|
662
|
+
#line 114 "html_unescape.gperf"
|
663
|
+
{"circ", 2, { 0xCB, 0x86 }},
|
664
|
+
{(char*)0},
|
665
|
+
#line 158 "html_unescape.gperf"
|
666
|
+
{"sigma", 2, { 0xCF, 0x83 }},
|
667
|
+
#line 44 "html_unescape.gperf"
|
668
|
+
{"Agrave", 2, { 0xC3, 0x80 }},
|
669
|
+
{(char*)0},
|
670
|
+
#line 173 "html_unescape.gperf"
|
671
|
+
{"rlm", 3, { 0xE2, 0x80, 0x8F }},
|
672
|
+
{(char*)0},
|
673
|
+
#line 33 "html_unescape.gperf"
|
674
|
+
{"micro", 2, { 0xC2, 0xB5 }},
|
675
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
676
|
+
#line 131 "html_unescape.gperf"
|
677
|
+
{"Pi", 2, { 0xCE, 0xA0 }},
|
678
|
+
{(char*)0}, {(char*)0},
|
679
|
+
#line 92 "html_unescape.gperf"
|
680
|
+
{"eth", 2, { 0xC3, 0xB0 }},
|
681
|
+
#line 166 "html_unescape.gperf"
|
682
|
+
{"piv", 2, { 0xCF, 0x96 }},
|
683
|
+
{(char*)0},
|
684
|
+
#line 109 "html_unescape.gperf"
|
685
|
+
{"oelig", 2, { 0xC5, 0x93 }},
|
686
|
+
#line 82 "html_unescape.gperf"
|
687
|
+
{"aelig", 2, { 0xC3, 0xA6 }},
|
688
|
+
#line 67 "html_unescape.gperf"
|
689
|
+
{"times", 2, { 0xC3, 0x97 }},
|
690
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
691
|
+
{(char*)0},
|
692
|
+
#line 220 "html_unescape.gperf"
|
693
|
+
{"minus", 3, { 0xE2, 0x88, 0x92 }},
|
694
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
695
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
696
|
+
#line 26 "html_unescape.gperf"
|
697
|
+
{"reg", 2, { 0xC2, 0xAE }},
|
698
|
+
#line 62 "html_unescape.gperf"
|
699
|
+
{"Ograve", 2, { 0xC3, 0x92 }},
|
700
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
701
|
+
#line 135 "html_unescape.gperf"
|
702
|
+
{"Upsilon", 2, { 0xCE, 0xA5 }},
|
703
|
+
#line 120 "html_unescape.gperf"
|
704
|
+
{"Epsilon", 2, { 0xCE, 0x95 }},
|
705
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
706
|
+
#line 69 "html_unescape.gperf"
|
707
|
+
{"Ugrave", 2, { 0xC3, 0x99 }},
|
708
|
+
#line 52 "html_unescape.gperf"
|
709
|
+
{"Egrave", 2, { 0xC3, 0x88 }},
|
710
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
711
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
712
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
713
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
714
|
+
#line 56 "html_unescape.gperf"
|
715
|
+
{"Igrave", 2, { 0xC3, 0x8C }},
|
716
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
717
|
+
{(char*)0}, {(char*)0}, {(char*)0},
|
718
|
+
#line 175 "html_unescape.gperf"
|
719
|
+
{"mdash", 3, { 0xE2, 0x80, 0x94 }},
|
720
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
721
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
722
|
+
{(char*)0},
|
723
|
+
#line 232 "html_unescape.gperf"
|
724
|
+
{"sim", 3, { 0xE2, 0x88, 0xBC }},
|
725
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
726
|
+
#line 209 "html_unescape.gperf"
|
727
|
+
{"hArr", 3, { 0xE2, 0x87, 0x94 }},
|
728
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
729
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
730
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
731
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
732
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
733
|
+
#line 171 "html_unescape.gperf"
|
734
|
+
{"zwj", 3, { 0xE2, 0x80, 0x8D }},
|
735
|
+
{(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
|
736
|
+
#line 157 "html_unescape.gperf"
|
737
|
+
{"sigmaf", 2, { 0xCF, 0x82 }}
|
738
|
+
};
|
739
|
+
|
740
|
+
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
741
|
+
{
|
742
|
+
register int key = hash_entity (str, len);
|
743
|
+
|
744
|
+
if (key <= MAX_HASH_VALUE && key >= 0)
|
745
|
+
if (len == lengthtable[key])
|
746
|
+
{
|
747
|
+
register const char *s = wordlist[key].entity;
|
748
|
+
|
749
|
+
if (s && *str == *s && !memcmp (str + 1, s + 1, len - 1))
|
750
|
+
return &wordlist[key];
|
751
|
+
}
|
752
|
+
}
|
753
|
+
return 0;
|
754
|
+
}
|