nokogiri 1.11.6 → 1.12.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +6 -5
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +181 -103
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +20 -18
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +5 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +51 -38
- data/ext/nokogiri/nokogiri.h +16 -9
- data/ext/nokogiri/xml_document.c +13 -13
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +11 -6
- data/ext/nokogiri/xml_namespace.c +2 -0
- data/ext/nokogiri/xml_node.c +102 -102
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +2 -0
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +67 -65
- data/ext/nokogiri/xslt_stylesheet.c +2 -1
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri.rb +31 -29
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/extension.rb +2 -2
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -2
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xml/node.rb +6 -5
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/xpath.rb +2 -2
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- metadata +101 -58
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
@@ -0,0 +1,77 @@
|
|
1
|
+
%{
|
2
|
+
#include "replacement.h"
|
3
|
+
#include "macros.h"
|
4
|
+
#include "ascii.h"
|
5
|
+
%}
|
6
|
+
|
7
|
+
%ignore-case
|
8
|
+
%struct-type
|
9
|
+
%omit-struct-type
|
10
|
+
%compare-lengths
|
11
|
+
%readonly-tables
|
12
|
+
%null-strings
|
13
|
+
%includes
|
14
|
+
%define lookup-function-name gumbo_get_svg_attr_replacement
|
15
|
+
%define slot-name from
|
16
|
+
%define initializer-suffix ,(char*)0
|
17
|
+
StringReplacement;
|
18
|
+
|
19
|
+
%%
|
20
|
+
"attributename", "attributeName"
|
21
|
+
"attributetype", "attributeType"
|
22
|
+
"basefrequency", "baseFrequency"
|
23
|
+
"baseprofile", "baseProfile"
|
24
|
+
"calcmode", "calcMode"
|
25
|
+
"clippathunits", "clipPathUnits"
|
26
|
+
"diffuseconstant", "diffuseConstant"
|
27
|
+
"edgemode", "edgeMode"
|
28
|
+
"filterunits", "filterUnits"
|
29
|
+
"glyphref", "glyphRef"
|
30
|
+
"gradienttransform", "gradientTransform"
|
31
|
+
"gradientunits", "gradientUnits"
|
32
|
+
"kernelmatrix", "kernelMatrix"
|
33
|
+
"kernelunitlength", "kernelUnitLength"
|
34
|
+
"keypoints", "keyPoints"
|
35
|
+
"keysplines", "keySplines"
|
36
|
+
"keytimes", "keyTimes"
|
37
|
+
"lengthadjust", "lengthAdjust"
|
38
|
+
"limitingconeangle", "limitingConeAngle"
|
39
|
+
"markerheight", "markerHeight"
|
40
|
+
"markerunits", "markerUnits"
|
41
|
+
"markerwidth", "markerWidth"
|
42
|
+
"maskcontentunits", "maskContentUnits"
|
43
|
+
"maskunits", "maskUnits"
|
44
|
+
"numoctaves", "numOctaves"
|
45
|
+
"pathlength", "pathLength"
|
46
|
+
"patterncontentunits", "patternContentUnits"
|
47
|
+
"patterntransform", "patternTransform"
|
48
|
+
"patternunits", "patternUnits"
|
49
|
+
"pointsatx", "pointsAtX"
|
50
|
+
"pointsaty", "pointsAtY"
|
51
|
+
"pointsatz", "pointsAtZ"
|
52
|
+
"preservealpha", "preserveAlpha"
|
53
|
+
"preserveaspectratio", "preserveAspectRatio"
|
54
|
+
"primitiveunits", "primitiveUnits"
|
55
|
+
"refx", "refX"
|
56
|
+
"refy", "refY"
|
57
|
+
"repeatcount", "repeatCount"
|
58
|
+
"repeatdur", "repeatDur"
|
59
|
+
"requiredextensions", "requiredExtensions"
|
60
|
+
"requiredfeatures", "requiredFeatures"
|
61
|
+
"specularconstant", "specularConstant"
|
62
|
+
"specularexponent", "specularExponent"
|
63
|
+
"spreadmethod", "spreadMethod"
|
64
|
+
"startoffset", "startOffset"
|
65
|
+
"stddeviation", "stdDeviation"
|
66
|
+
"stitchtiles", "stitchTiles"
|
67
|
+
"surfacescale", "surfaceScale"
|
68
|
+
"systemlanguage", "systemLanguage"
|
69
|
+
"tablevalues", "tableValues"
|
70
|
+
"targetx", "targetX"
|
71
|
+
"targety", "targetY"
|
72
|
+
"textlength", "textLength"
|
73
|
+
"viewbox", "viewBox"
|
74
|
+
"viewtarget", "viewTarget"
|
75
|
+
"xchannelselector", "xChannelSelector"
|
76
|
+
"ychannelselector", "yChannelSelector"
|
77
|
+
"zoomandpan", "zoomAndPan"
|
@@ -0,0 +1,137 @@
|
|
1
|
+
/* ANSI-C code produced by gperf version 3.1 */
|
2
|
+
/* Command-line: gperf -m100 lib/svg_tags.gperf */
|
3
|
+
/* Computed positions: -k'3,7' */
|
4
|
+
/* Filtered by: mk/gperf-filter.sed */
|
5
|
+
|
6
|
+
#include "replacement.h"
|
7
|
+
#include "macros.h"
|
8
|
+
#include "ascii.h"
|
9
|
+
#include <string.h>
|
10
|
+
|
11
|
+
#define TOTAL_KEYWORDS 36
|
12
|
+
#define MIN_WORD_LENGTH 6
|
13
|
+
#define MAX_WORD_LENGTH 19
|
14
|
+
#define MIN_HASH_VALUE 6
|
15
|
+
#define MAX_HASH_VALUE 42
|
16
|
+
/* maximum key range = 37, duplicates = 0 */
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
static inline unsigned int
|
21
|
+
hash (register const char *str, register size_t len)
|
22
|
+
{
|
23
|
+
static const unsigned char asso_values[] =
|
24
|
+
{
|
25
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
26
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
27
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
28
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
29
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
30
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
31
|
+
43, 43, 43, 43, 43, 43, 12, 2, 10, 22,
|
32
|
+
1, 28, 15, 1, 43, 43, 43, 0, 9, 26,
|
33
|
+
3, 17, 1, 11, 0, 22, 5, 43, 3, 2,
|
34
|
+
43, 43, 43, 43, 43, 43, 43, 43, 12, 2,
|
35
|
+
10, 22, 1, 28, 15, 1, 43, 43, 43, 0,
|
36
|
+
9, 26, 3, 17, 1, 11, 0, 22, 5, 43,
|
37
|
+
3, 2, 43, 43, 43, 43, 43, 43, 43, 43,
|
38
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
39
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
40
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
41
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
42
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
43
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
44
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
45
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
46
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
47
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
48
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
49
|
+
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
|
50
|
+
43, 43, 43, 43, 43, 43, 43
|
51
|
+
};
|
52
|
+
register unsigned int hval = len;
|
53
|
+
|
54
|
+
switch (hval)
|
55
|
+
{
|
56
|
+
default:
|
57
|
+
hval += asso_values[(unsigned char)str[6]+1];
|
58
|
+
/*FALLTHROUGH*/
|
59
|
+
case 6:
|
60
|
+
case 5:
|
61
|
+
case 4:
|
62
|
+
case 3:
|
63
|
+
hval += asso_values[(unsigned char)str[2]];
|
64
|
+
break;
|
65
|
+
}
|
66
|
+
return hval;
|
67
|
+
}
|
68
|
+
|
69
|
+
const StringReplacement *
|
70
|
+
gumbo_get_svg_tag_replacement (register const char *str, register size_t len)
|
71
|
+
{
|
72
|
+
static const unsigned char lengthtable[] =
|
73
|
+
{
|
74
|
+
0, 0, 0, 0, 0, 0, 6, 0, 7, 7, 7, 8, 11, 12,
|
75
|
+
12, 13, 11, 12, 16, 7, 7, 16, 11, 7, 19, 8, 13, 17,
|
76
|
+
11, 12, 7, 8, 17, 8, 18, 8, 14, 12, 14, 14, 13, 7,
|
77
|
+
14
|
78
|
+
};
|
79
|
+
static const StringReplacement wordlist[] =
|
80
|
+
{
|
81
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
82
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
83
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
84
|
+
{"fetile", "feTile"},
|
85
|
+
{(char*)0,(char*)0},
|
86
|
+
{"femerge", "feMerge"},
|
87
|
+
{"feimage", "feImage"},
|
88
|
+
{"fefuncb", "feFuncB"},
|
89
|
+
{"glyphref", "glyphRef"},
|
90
|
+
{"femergenode", "feMergeNode"},
|
91
|
+
{"femorphology", "feMorphology"},
|
92
|
+
{"animatecolor", "animateColor"},
|
93
|
+
{"animatemotion", "animateMotion"},
|
94
|
+
{"fecomposite", "feComposite"},
|
95
|
+
{"feturbulence", "feTurbulence"},
|
96
|
+
{"animatetransform", "animateTransform"},
|
97
|
+
{"fefuncr", "feFuncR"},
|
98
|
+
{"fefunca", "feFuncA"},
|
99
|
+
{"feconvolvematrix", "feConvolveMatrix"},
|
100
|
+
{"fespotlight", "feSpotLight"},
|
101
|
+
{"fefuncg", "feFuncG"},
|
102
|
+
{"fecomponenttransfer", "feComponentTransfer"},
|
103
|
+
{"altglyph", "altGlyph"},
|
104
|
+
{"fecolormatrix", "feColorMatrix"},
|
105
|
+
{"fedisplacementmap", "feDisplacementMap"},
|
106
|
+
{"altglyphdef", "altGlyphDef"},
|
107
|
+
{"altglyphitem", "altGlyphItem"},
|
108
|
+
{"feflood", "feFlood"},
|
109
|
+
{"clippath", "clipPath"},
|
110
|
+
{"fediffuselighting", "feDiffuseLighting"},
|
111
|
+
{"textpath", "textPath"},
|
112
|
+
{"fespecularlighting", "feSpecularLighting"},
|
113
|
+
{"feoffset", "feOffset"},
|
114
|
+
{"fedistantlight", "feDistantLight"},
|
115
|
+
{"fepointlight", "fePointLight"},
|
116
|
+
{"lineargradient", "linearGradient"},
|
117
|
+
{"radialgradient", "radialGradient"},
|
118
|
+
{"foreignobject", "foreignObject"},
|
119
|
+
{"feblend", "feBlend"},
|
120
|
+
{"fegaussianblur", "feGaussianBlur"}
|
121
|
+
};
|
122
|
+
|
123
|
+
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
124
|
+
{
|
125
|
+
register unsigned int key = hash (str, len);
|
126
|
+
|
127
|
+
if (key <= MAX_HASH_VALUE)
|
128
|
+
if (len == lengthtable[key])
|
129
|
+
{
|
130
|
+
register const char *s = wordlist[key].from;
|
131
|
+
|
132
|
+
if (s && (((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gumbo_ascii_strncasecmp(str, s, len))
|
133
|
+
return &wordlist[key];
|
134
|
+
}
|
135
|
+
}
|
136
|
+
return 0;
|
137
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
%{
|
2
|
+
#include "replacement.h"
|
3
|
+
#include "macros.h"
|
4
|
+
#include "ascii.h"
|
5
|
+
%}
|
6
|
+
|
7
|
+
%ignore-case
|
8
|
+
%struct-type
|
9
|
+
%omit-struct-type
|
10
|
+
%compare-lengths
|
11
|
+
%readonly-tables
|
12
|
+
%null-strings
|
13
|
+
%includes
|
14
|
+
%define lookup-function-name gumbo_get_svg_tag_replacement
|
15
|
+
%define slot-name from
|
16
|
+
%define initializer-suffix ,(char*)0
|
17
|
+
StringReplacement;
|
18
|
+
|
19
|
+
%%
|
20
|
+
"altglyph", "altGlyph"
|
21
|
+
"altglyphdef", "altGlyphDef"
|
22
|
+
"altglyphitem", "altGlyphItem"
|
23
|
+
"animatecolor", "animateColor"
|
24
|
+
"animatemotion", "animateMotion"
|
25
|
+
"animatetransform", "animateTransform"
|
26
|
+
"clippath", "clipPath"
|
27
|
+
"feblend", "feBlend"
|
28
|
+
"fecolormatrix", "feColorMatrix"
|
29
|
+
"fecomponenttransfer", "feComponentTransfer"
|
30
|
+
"fecomposite", "feComposite"
|
31
|
+
"feconvolvematrix", "feConvolveMatrix"
|
32
|
+
"fediffuselighting", "feDiffuseLighting"
|
33
|
+
"fedisplacementmap", "feDisplacementMap"
|
34
|
+
"fedistantlight", "feDistantLight"
|
35
|
+
"feflood", "feFlood"
|
36
|
+
"fefunca", "feFuncA"
|
37
|
+
"fefuncb", "feFuncB"
|
38
|
+
"fefuncg", "feFuncG"
|
39
|
+
"fefuncr", "feFuncR"
|
40
|
+
"fegaussianblur", "feGaussianBlur"
|
41
|
+
"feimage", "feImage"
|
42
|
+
"femerge", "feMerge"
|
43
|
+
"femergenode", "feMergeNode"
|
44
|
+
"femorphology", "feMorphology"
|
45
|
+
"feoffset", "feOffset"
|
46
|
+
"fepointlight", "fePointLight"
|
47
|
+
"fespecularlighting", "feSpecularLighting"
|
48
|
+
"fespotlight", "feSpotLight"
|
49
|
+
"fetile", "feTile"
|
50
|
+
"feturbulence", "feTurbulence"
|
51
|
+
"foreignobject", "foreignObject"
|
52
|
+
"glyphref", "glyphRef"
|
53
|
+
"lineargradient", "linearGradient"
|
54
|
+
"radialgradient", "radialGradient"
|
55
|
+
"textpath", "textPath"
|
@@ -0,0 +1,222 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2011 Google Inc.
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include "gumbo.h"
|
18
|
+
#include "util.h"
|
19
|
+
#include "tag_lookup.h"
|
20
|
+
|
21
|
+
#include <assert.h>
|
22
|
+
#include <string.h>
|
23
|
+
|
24
|
+
static const char kGumboTagNames[GUMBO_TAG_LAST+1][15] = {
|
25
|
+
[GUMBO_TAG_HTML] = "html",
|
26
|
+
[GUMBO_TAG_HEAD] = "head",
|
27
|
+
[GUMBO_TAG_TITLE] = "title",
|
28
|
+
[GUMBO_TAG_BASE] = "base",
|
29
|
+
[GUMBO_TAG_LINK] = "link",
|
30
|
+
[GUMBO_TAG_META] = "meta",
|
31
|
+
[GUMBO_TAG_STYLE] = "style",
|
32
|
+
[GUMBO_TAG_SCRIPT] = "script",
|
33
|
+
[GUMBO_TAG_NOSCRIPT] = "noscript",
|
34
|
+
[GUMBO_TAG_TEMPLATE] = "template",
|
35
|
+
[GUMBO_TAG_BODY] = "body",
|
36
|
+
[GUMBO_TAG_ARTICLE] = "article",
|
37
|
+
[GUMBO_TAG_SECTION] = "section",
|
38
|
+
[GUMBO_TAG_NAV] = "nav",
|
39
|
+
[GUMBO_TAG_ASIDE] = "aside",
|
40
|
+
[GUMBO_TAG_H1] = "h1",
|
41
|
+
[GUMBO_TAG_H2] = "h2",
|
42
|
+
[GUMBO_TAG_H3] = "h3",
|
43
|
+
[GUMBO_TAG_H4] = "h4",
|
44
|
+
[GUMBO_TAG_H5] = "h5",
|
45
|
+
[GUMBO_TAG_H6] = "h6",
|
46
|
+
[GUMBO_TAG_HGROUP] = "hgroup",
|
47
|
+
[GUMBO_TAG_HEADER] = "header",
|
48
|
+
[GUMBO_TAG_FOOTER] = "footer",
|
49
|
+
[GUMBO_TAG_ADDRESS] = "address",
|
50
|
+
[GUMBO_TAG_P] = "p",
|
51
|
+
[GUMBO_TAG_HR] = "hr",
|
52
|
+
[GUMBO_TAG_PRE] = "pre",
|
53
|
+
[GUMBO_TAG_BLOCKQUOTE] = "blockquote",
|
54
|
+
[GUMBO_TAG_OL] = "ol",
|
55
|
+
[GUMBO_TAG_UL] = "ul",
|
56
|
+
[GUMBO_TAG_LI] = "li",
|
57
|
+
[GUMBO_TAG_DL] = "dl",
|
58
|
+
[GUMBO_TAG_DT] = "dt",
|
59
|
+
[GUMBO_TAG_DD] = "dd",
|
60
|
+
[GUMBO_TAG_FIGURE] = "figure",
|
61
|
+
[GUMBO_TAG_FIGCAPTION] = "figcaption",
|
62
|
+
[GUMBO_TAG_MAIN] = "main",
|
63
|
+
[GUMBO_TAG_DIV] = "div",
|
64
|
+
[GUMBO_TAG_A] = "a",
|
65
|
+
[GUMBO_TAG_EM] = "em",
|
66
|
+
[GUMBO_TAG_STRONG] = "strong",
|
67
|
+
[GUMBO_TAG_SMALL] = "small",
|
68
|
+
[GUMBO_TAG_S] = "s",
|
69
|
+
[GUMBO_TAG_CITE] = "cite",
|
70
|
+
[GUMBO_TAG_Q] = "q",
|
71
|
+
[GUMBO_TAG_DFN] = "dfn",
|
72
|
+
[GUMBO_TAG_ABBR] = "abbr",
|
73
|
+
[GUMBO_TAG_DATA] = "data",
|
74
|
+
[GUMBO_TAG_TIME] = "time",
|
75
|
+
[GUMBO_TAG_CODE] = "code",
|
76
|
+
[GUMBO_TAG_VAR] = "var",
|
77
|
+
[GUMBO_TAG_SAMP] = "samp",
|
78
|
+
[GUMBO_TAG_KBD] = "kbd",
|
79
|
+
[GUMBO_TAG_SUB] = "sub",
|
80
|
+
[GUMBO_TAG_SUP] = "sup",
|
81
|
+
[GUMBO_TAG_I] = "i",
|
82
|
+
[GUMBO_TAG_B] = "b",
|
83
|
+
[GUMBO_TAG_U] = "u",
|
84
|
+
[GUMBO_TAG_MARK] = "mark",
|
85
|
+
[GUMBO_TAG_RUBY] = "ruby",
|
86
|
+
[GUMBO_TAG_RT] = "rt",
|
87
|
+
[GUMBO_TAG_RP] = "rp",
|
88
|
+
[GUMBO_TAG_BDI] = "bdi",
|
89
|
+
[GUMBO_TAG_BDO] = "bdo",
|
90
|
+
[GUMBO_TAG_SPAN] = "span",
|
91
|
+
[GUMBO_TAG_BR] = "br",
|
92
|
+
[GUMBO_TAG_WBR] = "wbr",
|
93
|
+
[GUMBO_TAG_INS] = "ins",
|
94
|
+
[GUMBO_TAG_DEL] = "del",
|
95
|
+
[GUMBO_TAG_IMAGE] = "image",
|
96
|
+
[GUMBO_TAG_IMG] = "img",
|
97
|
+
[GUMBO_TAG_IFRAME] = "iframe",
|
98
|
+
[GUMBO_TAG_EMBED] = "embed",
|
99
|
+
[GUMBO_TAG_OBJECT] = "object",
|
100
|
+
[GUMBO_TAG_PARAM] = "param",
|
101
|
+
[GUMBO_TAG_VIDEO] = "video",
|
102
|
+
[GUMBO_TAG_AUDIO] = "audio",
|
103
|
+
[GUMBO_TAG_SOURCE] = "source",
|
104
|
+
[GUMBO_TAG_TRACK] = "track",
|
105
|
+
[GUMBO_TAG_CANVAS] = "canvas",
|
106
|
+
[GUMBO_TAG_MAP] = "map",
|
107
|
+
[GUMBO_TAG_AREA] = "area",
|
108
|
+
[GUMBO_TAG_MATH] = "math",
|
109
|
+
[GUMBO_TAG_MI] = "mi",
|
110
|
+
[GUMBO_TAG_MO] = "mo",
|
111
|
+
[GUMBO_TAG_MN] = "mn",
|
112
|
+
[GUMBO_TAG_MS] = "ms",
|
113
|
+
[GUMBO_TAG_MTEXT] = "mtext",
|
114
|
+
[GUMBO_TAG_MGLYPH] = "mglyph",
|
115
|
+
[GUMBO_TAG_MALIGNMARK] = "malignmark",
|
116
|
+
[GUMBO_TAG_ANNOTATION_XML] = "annotation-xml",
|
117
|
+
[GUMBO_TAG_SVG] = "svg",
|
118
|
+
[GUMBO_TAG_FOREIGNOBJECT] = "foreignobject",
|
119
|
+
[GUMBO_TAG_DESC] = "desc",
|
120
|
+
[GUMBO_TAG_TABLE] = "table",
|
121
|
+
[GUMBO_TAG_CAPTION] = "caption",
|
122
|
+
[GUMBO_TAG_COLGROUP] = "colgroup",
|
123
|
+
[GUMBO_TAG_COL] = "col",
|
124
|
+
[GUMBO_TAG_TBODY] = "tbody",
|
125
|
+
[GUMBO_TAG_THEAD] = "thead",
|
126
|
+
[GUMBO_TAG_TFOOT] = "tfoot",
|
127
|
+
[GUMBO_TAG_TR] = "tr",
|
128
|
+
[GUMBO_TAG_TD] = "td",
|
129
|
+
[GUMBO_TAG_TH] = "th",
|
130
|
+
[GUMBO_TAG_FORM] = "form",
|
131
|
+
[GUMBO_TAG_FIELDSET] = "fieldset",
|
132
|
+
[GUMBO_TAG_LEGEND] = "legend",
|
133
|
+
[GUMBO_TAG_LABEL] = "label",
|
134
|
+
[GUMBO_TAG_INPUT] = "input",
|
135
|
+
[GUMBO_TAG_BUTTON] = "button",
|
136
|
+
[GUMBO_TAG_SELECT] = "select",
|
137
|
+
[GUMBO_TAG_DATALIST] = "datalist",
|
138
|
+
[GUMBO_TAG_OPTGROUP] = "optgroup",
|
139
|
+
[GUMBO_TAG_OPTION] = "option",
|
140
|
+
[GUMBO_TAG_TEXTAREA] = "textarea",
|
141
|
+
[GUMBO_TAG_KEYGEN] = "keygen",
|
142
|
+
[GUMBO_TAG_OUTPUT] = "output",
|
143
|
+
[GUMBO_TAG_PROGRESS] = "progress",
|
144
|
+
[GUMBO_TAG_METER] = "meter",
|
145
|
+
[GUMBO_TAG_DETAILS] = "details",
|
146
|
+
[GUMBO_TAG_SUMMARY] = "summary",
|
147
|
+
[GUMBO_TAG_MENU] = "menu",
|
148
|
+
[GUMBO_TAG_MENUITEM] = "menuitem",
|
149
|
+
[GUMBO_TAG_APPLET] = "applet",
|
150
|
+
[GUMBO_TAG_ACRONYM] = "acronym",
|
151
|
+
[GUMBO_TAG_BGSOUND] = "bgsound",
|
152
|
+
[GUMBO_TAG_DIR] = "dir",
|
153
|
+
[GUMBO_TAG_FRAME] = "frame",
|
154
|
+
[GUMBO_TAG_FRAMESET] = "frameset",
|
155
|
+
[GUMBO_TAG_NOFRAMES] = "noframes",
|
156
|
+
[GUMBO_TAG_LISTING] = "listing",
|
157
|
+
[GUMBO_TAG_XMP] = "xmp",
|
158
|
+
[GUMBO_TAG_NEXTID] = "nextid",
|
159
|
+
[GUMBO_TAG_NOEMBED] = "noembed",
|
160
|
+
[GUMBO_TAG_PLAINTEXT] = "plaintext",
|
161
|
+
[GUMBO_TAG_RB] = "rb",
|
162
|
+
[GUMBO_TAG_STRIKE] = "strike",
|
163
|
+
[GUMBO_TAG_BASEFONT] = "basefont",
|
164
|
+
[GUMBO_TAG_BIG] = "big",
|
165
|
+
[GUMBO_TAG_BLINK] = "blink",
|
166
|
+
[GUMBO_TAG_CENTER] = "center",
|
167
|
+
[GUMBO_TAG_FONT] = "font",
|
168
|
+
[GUMBO_TAG_MARQUEE] = "marquee",
|
169
|
+
[GUMBO_TAG_MULTICOL] = "multicol",
|
170
|
+
[GUMBO_TAG_NOBR] = "nobr",
|
171
|
+
[GUMBO_TAG_SPACER] = "spacer",
|
172
|
+
[GUMBO_TAG_TT] = "tt",
|
173
|
+
[GUMBO_TAG_RTC] = "rtc",
|
174
|
+
[GUMBO_TAG_DIALOG] = "dialog",
|
175
|
+
|
176
|
+
[GUMBO_TAG_UNKNOWN] = "",
|
177
|
+
[GUMBO_TAG_LAST] = "",
|
178
|
+
};
|
179
|
+
|
180
|
+
const char* gumbo_normalized_tagname(GumboTag tag) {
|
181
|
+
assert(tag <= GUMBO_TAG_LAST);
|
182
|
+
const char *tagname = kGumboTagNames[tag];
|
183
|
+
assert(tagname);
|
184
|
+
return tagname;
|
185
|
+
}
|
186
|
+
|
187
|
+
void gumbo_tag_from_original_text(GumboStringPiece* text) {
|
188
|
+
if (text->data == NULL) {
|
189
|
+
return;
|
190
|
+
}
|
191
|
+
|
192
|
+
assert(text->length >= 2);
|
193
|
+
assert(text->data[0] == '<');
|
194
|
+
assert(text->data[text->length - 1] == '>');
|
195
|
+
|
196
|
+
if (text->data[1] == '/') {
|
197
|
+
// End tag
|
198
|
+
assert(text->length >= 3);
|
199
|
+
text->data += 2; // Move past </
|
200
|
+
text->length -= 3;
|
201
|
+
} else {
|
202
|
+
// Start tag
|
203
|
+
text->data += 1; // Move past <
|
204
|
+
text->length -= 2;
|
205
|
+
for (const char* c = text->data; c != text->data + text->length; ++c) {
|
206
|
+
switch (*c) {
|
207
|
+
case '\t':
|
208
|
+
case '\n':
|
209
|
+
case '\f':
|
210
|
+
case ' ':
|
211
|
+
case '/':
|
212
|
+
text->length = c - text->data;
|
213
|
+
return;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
219
|
+
GumboTag gumbo_tagn_enum(const char *tagname, size_t tagname_length) {
|
220
|
+
const TagHashSlot *slot = gumbo_tag_lookup(tagname, tagname_length);
|
221
|
+
return slot ? slot->tag : GUMBO_TAG_UNKNOWN;
|
222
|
+
}
|