nokogumbo 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +0 -3
- data/Rakefile +10 -9
- metadata +3 -29
- data/work/attribute.c +0 -44
- data/work/attribute.h +0 -37
- data/work/char_ref.c +0 -2561
- data/work/char_ref.h +0 -61
- data/work/error.c +0 -258
- data/work/error.h +0 -225
- data/work/gumbo.h +0 -800
- data/work/insertion_mode.h +0 -54
- data/work/nokogumbo.c +0 -254
- data/work/parser.c +0 -3893
- data/work/parser.h +0 -57
- data/work/string_buffer.c +0 -106
- data/work/string_buffer.h +0 -82
- data/work/string_piece.c +0 -49
- data/work/string_piece.h +0 -39
- data/work/tag.c +0 -222
- data/work/token_type.h +0 -40
- data/work/tokenizer.c +0 -2978
- data/work/tokenizer.h +0 -123
- data/work/tokenizer_states.h +0 -103
- data/work/utf8.c +0 -268
- data/work/utf8.h +0 -127
- data/work/util.c +0 -58
- data/work/util.h +0 -57
- data/work/vector.c +0 -121
- data/work/vector.h +0 -66
data/work/parser.h
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
//
|
17
|
-
// Contains the definition of the top-level GumboParser structure that's
|
18
|
-
// threaded through basically every internal function in the library.
|
19
|
-
|
20
|
-
#ifndef GUMBO_PARSER_H_
|
21
|
-
#define GUMBO_PARSER_H_
|
22
|
-
|
23
|
-
#ifdef __cplusplus
|
24
|
-
extern "C" {
|
25
|
-
#endif
|
26
|
-
|
27
|
-
struct _GumboParserState;
|
28
|
-
struct _GumboOutput;
|
29
|
-
struct _GumboOptions;
|
30
|
-
struct _GumboTokenizerState;
|
31
|
-
|
32
|
-
// An overarching struct that's threaded through (nearly) all functions in the
|
33
|
-
// library, OOP-style. This gives each function access to the options and
|
34
|
-
// output, along with any internal state needed for the parse.
|
35
|
-
typedef struct _GumboParser {
|
36
|
-
// Settings for this parse run.
|
37
|
-
const struct _GumboOptions* _options;
|
38
|
-
|
39
|
-
// Output for the parse.
|
40
|
-
struct _GumboOutput* _output;
|
41
|
-
|
42
|
-
// The internal tokenizer state, defined as a pointer to avoid a cyclic
|
43
|
-
// dependency on html5tokenizer.h. The main parse routine is responsible for
|
44
|
-
// initializing this on parse start, and destroying it on parse end.
|
45
|
-
// End-users will never see a non-garbage value in this pointer.
|
46
|
-
struct _GumboTokenizerState* _tokenizer_state;
|
47
|
-
|
48
|
-
// The internal parser state. Initialized on parse start and destroyed on
|
49
|
-
// parse end; end-users will never see a non-garbage value in this pointer.
|
50
|
-
struct _GumboParserState* _parser_state;
|
51
|
-
} GumboParser;
|
52
|
-
|
53
|
-
#ifdef __cplusplus
|
54
|
-
}
|
55
|
-
#endif
|
56
|
-
|
57
|
-
#endif // GUMBO_PARSER_H_
|
data/work/string_buffer.c
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
|
17
|
-
#include "string_buffer.h"
|
18
|
-
|
19
|
-
#include <assert.h>
|
20
|
-
#include <stdlib.h>
|
21
|
-
#include <string.h>
|
22
|
-
#include <strings.h>
|
23
|
-
|
24
|
-
#include "string_piece.h"
|
25
|
-
#include "util.h"
|
26
|
-
|
27
|
-
struct _GumboParser;
|
28
|
-
|
29
|
-
static const size_t kDefaultStringBufferSize = 10;
|
30
|
-
|
31
|
-
static void maybe_resize_string_buffer(
|
32
|
-
struct _GumboParser* parser, size_t additional_chars,
|
33
|
-
GumboStringBuffer* buffer) {
|
34
|
-
size_t new_length = buffer->length + additional_chars;
|
35
|
-
size_t new_capacity = buffer->capacity;
|
36
|
-
while (new_capacity < new_length) {
|
37
|
-
new_capacity *= 2;
|
38
|
-
}
|
39
|
-
if (new_capacity != buffer->capacity) {
|
40
|
-
char* new_data = gumbo_parser_allocate(parser, new_capacity);
|
41
|
-
memcpy(new_data, buffer->data, buffer->length);
|
42
|
-
gumbo_parser_deallocate(parser, buffer->data);
|
43
|
-
buffer->data = new_data;
|
44
|
-
buffer->capacity = new_capacity;
|
45
|
-
}
|
46
|
-
}
|
47
|
-
|
48
|
-
void gumbo_string_buffer_init(
|
49
|
-
struct _GumboParser* parser, GumboStringBuffer* output) {
|
50
|
-
output->data = gumbo_parser_allocate(parser, kDefaultStringBufferSize);
|
51
|
-
output->length = 0;
|
52
|
-
output->capacity = kDefaultStringBufferSize;
|
53
|
-
}
|
54
|
-
|
55
|
-
void gumbo_string_buffer_reserve(
|
56
|
-
struct _GumboParser* parser, size_t min_capacity,
|
57
|
-
GumboStringBuffer* output) {
|
58
|
-
maybe_resize_string_buffer(parser, min_capacity - output->length, output);
|
59
|
-
}
|
60
|
-
|
61
|
-
void gumbo_string_buffer_append_codepoint(
|
62
|
-
struct _GumboParser* parser, int c, GumboStringBuffer* output) {
|
63
|
-
// num_bytes is actually the number of continuation bytes, 1 less than the
|
64
|
-
// total number of bytes. This is done to keep the loop below simple and
|
65
|
-
// should probably change if we unroll it.
|
66
|
-
int num_bytes, prefix;
|
67
|
-
if (c <= 0x7f) {
|
68
|
-
num_bytes = 0;
|
69
|
-
prefix = 0;
|
70
|
-
} else if (c <= 0x7ff) {
|
71
|
-
num_bytes = 1;
|
72
|
-
prefix = 0xc0;
|
73
|
-
} else if (c <= 0xffff) {
|
74
|
-
num_bytes = 2;
|
75
|
-
prefix = 0xe0;
|
76
|
-
} else {
|
77
|
-
num_bytes = 3;
|
78
|
-
prefix = 0xf0;
|
79
|
-
}
|
80
|
-
maybe_resize_string_buffer(parser, num_bytes + 1, output);
|
81
|
-
output->data[output->length++] = prefix | (c >> (num_bytes * 6));
|
82
|
-
for (int i = num_bytes - 1; i >= 0; --i) {
|
83
|
-
output->data[output->length++] = 0x80 | (0x3f & (c >> (i * 6)));
|
84
|
-
}
|
85
|
-
}
|
86
|
-
|
87
|
-
void gumbo_string_buffer_append_string(
|
88
|
-
struct _GumboParser* parser, GumboStringPiece* str,
|
89
|
-
GumboStringBuffer* output) {
|
90
|
-
maybe_resize_string_buffer(parser, str->length, output);
|
91
|
-
memcpy(output->data + output->length, str->data, str->length);
|
92
|
-
output->length += str->length;
|
93
|
-
}
|
94
|
-
|
95
|
-
char* gumbo_string_buffer_to_string(
|
96
|
-
struct _GumboParser* parser, GumboStringBuffer* input) {
|
97
|
-
char* buffer = gumbo_parser_allocate(parser, input->length + 1);
|
98
|
-
memcpy(buffer, input->data, input->length);
|
99
|
-
buffer[input->length] = '\0';
|
100
|
-
return buffer;
|
101
|
-
}
|
102
|
-
|
103
|
-
void gumbo_string_buffer_destroy(
|
104
|
-
struct _GumboParser* parser, GumboStringBuffer* buffer) {
|
105
|
-
gumbo_parser_deallocate(parser, buffer->data);
|
106
|
-
}
|
data/work/string_buffer.h
DELETED
@@ -1,82 +0,0 @@
|
|
1
|
-
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
//
|
17
|
-
#ifndef GUMBO_STRING_BUFFER_H_
|
18
|
-
#define GUMBO_STRING_BUFFER_H_
|
19
|
-
|
20
|
-
#include <stdbool.h>
|
21
|
-
#include <stddef.h>
|
22
|
-
|
23
|
-
#ifdef __cplusplus
|
24
|
-
extern "C" {
|
25
|
-
#endif
|
26
|
-
|
27
|
-
// Forward declaration since it's passed into some of the functions in this
|
28
|
-
// header.
|
29
|
-
struct _GumboParser;
|
30
|
-
struct _GumboStringPiece;
|
31
|
-
|
32
|
-
// A struct representing a mutable, growable string. This consists of a
|
33
|
-
// heap-allocated buffer that may grow (by doubling) as necessary. When
|
34
|
-
// converting to a string, this allocates a new buffer that is only as long as
|
35
|
-
// it needs to be. Note that the internal buffer here is *not* nul-terminated,
|
36
|
-
// so be sure not to use ordinary string manipulation functions on it.
|
37
|
-
typedef struct _GumboStringBuffer {
|
38
|
-
// A pointer to the beginning of the string. NULL iff length == 0.
|
39
|
-
char* data;
|
40
|
-
|
41
|
-
// The length of the string fragment, in bytes. May be zero.
|
42
|
-
size_t length;
|
43
|
-
|
44
|
-
// The capacity of the buffer, in bytes.
|
45
|
-
size_t capacity;
|
46
|
-
} GumboStringBuffer;
|
47
|
-
|
48
|
-
// Initializes a new GumboStringBuffer.
|
49
|
-
void gumbo_string_buffer_init(
|
50
|
-
struct _GumboParser* parser, GumboStringBuffer* output);
|
51
|
-
|
52
|
-
// Ensures that the buffer contains at least a certain amount of space. Most
|
53
|
-
// useful with snprintf and the other length-delimited string functions, which
|
54
|
-
// may want to write directly into the buffer.
|
55
|
-
void gumbo_string_buffer_reserve(
|
56
|
-
struct _GumboParser* parser, size_t min_capacity,
|
57
|
-
GumboStringBuffer* output);
|
58
|
-
|
59
|
-
// Appends a single Unicode codepoint onto the end of the GumboStringBuffer.
|
60
|
-
// This is essentially a UTF-8 encoder, and may add 1-4 bytes depending on the
|
61
|
-
// value of the codepoint.
|
62
|
-
void gumbo_string_buffer_append_codepoint(
|
63
|
-
struct _GumboParser* parser, int c, GumboStringBuffer* output);
|
64
|
-
|
65
|
-
// Appends a string onto the end of the GumboStringBuffer.
|
66
|
-
void gumbo_string_buffer_append_string(
|
67
|
-
struct _GumboParser* parser, struct _GumboStringPiece* str,
|
68
|
-
GumboStringBuffer* output);
|
69
|
-
|
70
|
-
// Converts this string buffer to const char*, alloctaing a new buffer for it.
|
71
|
-
char* gumbo_string_buffer_to_string(
|
72
|
-
struct _GumboParser* parser, GumboStringBuffer* input);
|
73
|
-
|
74
|
-
// Deallocates this GumboStringBuffer.
|
75
|
-
void gumbo_string_buffer_destroy(
|
76
|
-
struct _GumboParser* parser, GumboStringBuffer* buffer);
|
77
|
-
|
78
|
-
#ifdef __cplusplus
|
79
|
-
}
|
80
|
-
#endif
|
81
|
-
|
82
|
-
#endif // GUMBO_STRING_BUFFER_H_
|
data/work/string_piece.c
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
|
17
|
-
#include "string_piece.h"
|
18
|
-
|
19
|
-
#include <assert.h>
|
20
|
-
#include <stdlib.h>
|
21
|
-
#include <string.h>
|
22
|
-
#include <strings.h>
|
23
|
-
|
24
|
-
#include "util.h"
|
25
|
-
|
26
|
-
struct _GumboParser;
|
27
|
-
|
28
|
-
const GumboStringPiece kGumboEmptyString = { NULL, 0 };
|
29
|
-
|
30
|
-
bool gumbo_string_equals(
|
31
|
-
const GumboStringPiece* str1, const GumboStringPiece* str2) {
|
32
|
-
return str1->length == str2->length &&
|
33
|
-
!memcmp(str1->data, str2->data, str1->length);
|
34
|
-
}
|
35
|
-
|
36
|
-
bool gumbo_string_equals_ignore_case(
|
37
|
-
const GumboStringPiece* str1, const GumboStringPiece* str2) {
|
38
|
-
return str1->length == str2->length &&
|
39
|
-
!strncasecmp(str1->data, str2->data, str1->length);
|
40
|
-
}
|
41
|
-
|
42
|
-
void gumbo_string_copy(
|
43
|
-
struct _GumboParser* parser, GumboStringPiece* dest,
|
44
|
-
const GumboStringPiece* source) {
|
45
|
-
dest->length = source->length;
|
46
|
-
char* buffer = gumbo_parser_allocate(parser, source->length);
|
47
|
-
memcpy(buffer, source->data, source->length);
|
48
|
-
dest->data = buffer;
|
49
|
-
}
|
data/work/string_piece.h
DELETED
@@ -1,39 +0,0 @@
|
|
1
|
-
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
|
17
|
-
#ifndef GUMBO_STRING_PIECE_H_
|
18
|
-
#define GUMBO_STRING_PIECE_H_
|
19
|
-
|
20
|
-
#include "gumbo.h"
|
21
|
-
|
22
|
-
#ifdef __cplusplus
|
23
|
-
extern "C" {
|
24
|
-
#endif
|
25
|
-
|
26
|
-
struct _GumboParser;
|
27
|
-
|
28
|
-
// Performs a deep-copy of an GumboStringPiece, allocating a fresh buffer in the
|
29
|
-
// destination and copying over the characters from source. Dest should be
|
30
|
-
// empty, with no buffer allocated; otherwise, this leaks it.
|
31
|
-
void gumbo_string_copy(
|
32
|
-
struct _GumboParser* parser, GumboStringPiece* dest,
|
33
|
-
const GumboStringPiece* source);
|
34
|
-
|
35
|
-
#ifdef __cplusplus
|
36
|
-
}
|
37
|
-
#endif
|
38
|
-
|
39
|
-
#endif // GUMBO_STRING_PIECE_H_
|
data/work/tag.c
DELETED
@@ -1,222 +0,0 @@
|
|
1
|
-
// Copyright 2011 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
|
17
|
-
#include "gumbo.h"
|
18
|
-
|
19
|
-
#include <assert.h>
|
20
|
-
#include <ctype.h>
|
21
|
-
#include <strings.h> // For strcasecmp.
|
22
|
-
|
23
|
-
// NOTE(jdtang): Keep this in sync with the GumboTag enum in the header.
|
24
|
-
// TODO(jdtang): Investigate whether there're efficiency benefits to putting the
|
25
|
-
// most common tag names first, or to putting them in alphabetical order and
|
26
|
-
// using a binary search.
|
27
|
-
const char* kGumboTagNames[] = {
|
28
|
-
"html",
|
29
|
-
"head",
|
30
|
-
"title",
|
31
|
-
"base",
|
32
|
-
"link",
|
33
|
-
"meta",
|
34
|
-
"style",
|
35
|
-
"script",
|
36
|
-
"noscript",
|
37
|
-
"body",
|
38
|
-
"section",
|
39
|
-
"nav",
|
40
|
-
"article",
|
41
|
-
"aside",
|
42
|
-
"h1",
|
43
|
-
"h2",
|
44
|
-
"h3",
|
45
|
-
"h4",
|
46
|
-
"h5",
|
47
|
-
"h6",
|
48
|
-
"hgroup",
|
49
|
-
"header",
|
50
|
-
"footer",
|
51
|
-
"address",
|
52
|
-
"p",
|
53
|
-
"hr",
|
54
|
-
"pre",
|
55
|
-
"blockquote",
|
56
|
-
"ol",
|
57
|
-
"ul",
|
58
|
-
"li",
|
59
|
-
"dl",
|
60
|
-
"dt",
|
61
|
-
"dd",
|
62
|
-
"figure",
|
63
|
-
"figcaption",
|
64
|
-
"div",
|
65
|
-
"a",
|
66
|
-
"em",
|
67
|
-
"strong",
|
68
|
-
"small",
|
69
|
-
"s",
|
70
|
-
"cite",
|
71
|
-
"q",
|
72
|
-
"dfn",
|
73
|
-
"abbr",
|
74
|
-
"time",
|
75
|
-
"code",
|
76
|
-
"var",
|
77
|
-
"samp",
|
78
|
-
"kbd",
|
79
|
-
"sub",
|
80
|
-
"sup",
|
81
|
-
"i",
|
82
|
-
"b",
|
83
|
-
"mark",
|
84
|
-
"ruby",
|
85
|
-
"rt",
|
86
|
-
"rp",
|
87
|
-
"bdi",
|
88
|
-
"bdo",
|
89
|
-
"span",
|
90
|
-
"br",
|
91
|
-
"wbr",
|
92
|
-
"ins",
|
93
|
-
"del",
|
94
|
-
"image",
|
95
|
-
"img",
|
96
|
-
"iframe",
|
97
|
-
"embed",
|
98
|
-
"object",
|
99
|
-
"param",
|
100
|
-
"video",
|
101
|
-
"audio",
|
102
|
-
"source",
|
103
|
-
"track",
|
104
|
-
"canvas",
|
105
|
-
"map",
|
106
|
-
"area",
|
107
|
-
"math",
|
108
|
-
"mi",
|
109
|
-
"mo",
|
110
|
-
"mn",
|
111
|
-
"ms",
|
112
|
-
"mtext",
|
113
|
-
"mglyph",
|
114
|
-
"malignmark",
|
115
|
-
"annotation-xml",
|
116
|
-
"svg",
|
117
|
-
"foreignobject",
|
118
|
-
"desc",
|
119
|
-
"table",
|
120
|
-
"caption",
|
121
|
-
"colgroup",
|
122
|
-
"col",
|
123
|
-
"tbody",
|
124
|
-
"thead",
|
125
|
-
"tfoot",
|
126
|
-
"tr",
|
127
|
-
"td",
|
128
|
-
"th",
|
129
|
-
"form",
|
130
|
-
"fieldset",
|
131
|
-
"legend",
|
132
|
-
"label",
|
133
|
-
"input",
|
134
|
-
"button",
|
135
|
-
"select",
|
136
|
-
"datalist",
|
137
|
-
"optgroup",
|
138
|
-
"option",
|
139
|
-
"textarea",
|
140
|
-
"keygen",
|
141
|
-
"output",
|
142
|
-
"progress",
|
143
|
-
"meter",
|
144
|
-
"details",
|
145
|
-
"summary",
|
146
|
-
"command",
|
147
|
-
"menu",
|
148
|
-
"applet",
|
149
|
-
"acronym",
|
150
|
-
"bgsound",
|
151
|
-
"dir",
|
152
|
-
"frame",
|
153
|
-
"frameset",
|
154
|
-
"noframes",
|
155
|
-
"isindex",
|
156
|
-
"listing",
|
157
|
-
"xmp",
|
158
|
-
"nextid",
|
159
|
-
"noembed",
|
160
|
-
"plaintext",
|
161
|
-
"rb",
|
162
|
-
"strike",
|
163
|
-
"basefont",
|
164
|
-
"big",
|
165
|
-
"blink",
|
166
|
-
"center",
|
167
|
-
"font",
|
168
|
-
"marquee",
|
169
|
-
"multicol",
|
170
|
-
"nobr",
|
171
|
-
"spacer",
|
172
|
-
"tt",
|
173
|
-
"u",
|
174
|
-
"", // TAG_UNKNOWN
|
175
|
-
"", // TAG_LAST
|
176
|
-
};
|
177
|
-
|
178
|
-
const char* gumbo_normalized_tagname(GumboTag tag) {
|
179
|
-
assert(tag <= GUMBO_TAG_LAST);
|
180
|
-
return kGumboTagNames[tag];
|
181
|
-
}
|
182
|
-
|
183
|
-
// TODO(jdtang): Add test for this.
|
184
|
-
void gumbo_tag_from_original_text(GumboStringPiece* text) {
|
185
|
-
if (text->data == NULL) {
|
186
|
-
return;
|
187
|
-
}
|
188
|
-
|
189
|
-
assert(text->length >= 2);
|
190
|
-
assert(text->data[0] == '<');
|
191
|
-
assert(text->data[text->length - 1] == '>');
|
192
|
-
if (text->data[1] == '/') {
|
193
|
-
// End tag.
|
194
|
-
assert(text->length >= 3);
|
195
|
-
text->data += 2; // Move past </
|
196
|
-
text->length -= 3;
|
197
|
-
} else {
|
198
|
-
// Start tag.
|
199
|
-
text->data += 1; // Move past <
|
200
|
-
text->length -= 2;
|
201
|
-
// strnchr is apparently not a standard C library function, so I loop
|
202
|
-
// explicitly looking for whitespace or other illegal tag characters.
|
203
|
-
for (const char* c = text->data; c != text->data + text->length; ++c) {
|
204
|
-
if (isspace(*c) || *c == '/') {
|
205
|
-
text->length = c - text->data;
|
206
|
-
break;
|
207
|
-
}
|
208
|
-
}
|
209
|
-
}
|
210
|
-
}
|
211
|
-
|
212
|
-
GumboTag gumbo_tag_enum(const char* tagname) {
|
213
|
-
for (int i = 0; i < GUMBO_TAG_LAST; ++i) {
|
214
|
-
// TODO(jdtang): strcasecmp is non-portable, so if we want to support
|
215
|
-
// non-GCC compilers, we'll need some #ifdef magic. This source already has
|
216
|
-
// pretty significant issues with MSVC6 anyway.
|
217
|
-
if (strcasecmp(tagname, kGumboTagNames[i]) == 0) {
|
218
|
-
return i;
|
219
|
-
}
|
220
|
-
}
|
221
|
-
return GUMBO_TAG_UNKNOWN;
|
222
|
-
}
|