nokogumbo 1.4.7 → 1.4.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,123 +0,0 @@
1
- // Copyright 2010 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
- //
17
- // This contains an implementation of a tokenizer for HTML5. It consumes a
18
- // buffer of UTF-8 characters, and then emits a stream of tokens.
19
-
20
- #ifndef GUMBO_TOKENIZER_H_
21
- #define GUMBO_TOKENIZER_H_
22
-
23
- #include <stdbool.h>
24
- #include <stddef.h>
25
-
26
- #include "gumbo.h"
27
- #include "token_type.h"
28
- #include "tokenizer_states.h"
29
-
30
- #ifdef __cplusplus
31
- extern "C" {
32
- #endif
33
-
34
- struct GumboInternalParser;
35
-
36
- // Struct containing all information pertaining to doctype tokens.
37
- typedef struct GumboInternalTokenDocType {
38
- const char* name;
39
- const char* public_identifier;
40
- const char* system_identifier;
41
- bool force_quirks;
42
- // There's no way to tell a 0-length public or system ID apart from the
43
- // absence of a public or system ID, but they're handled different by the
44
- // spec, so we need bool flags for them.
45
- bool has_public_identifier;
46
- bool has_system_identifier;
47
- } GumboTokenDocType;
48
-
49
- // Struct containing all information pertaining to start tag tokens.
50
- typedef struct GumboInternalTokenStartTag {
51
- GumboTag tag;
52
- GumboVector /* GumboAttribute */ attributes;
53
- bool is_self_closing;
54
- } GumboTokenStartTag;
55
-
56
- // A data structure representing a single token in the input stream. This
57
- // contains an enum for the type, the source position, a GumboStringPiece
58
- // pointing to the original text, and then a union for any parsed data.
59
- typedef struct GumboInternalToken {
60
- GumboTokenType type;
61
- GumboSourcePosition position;
62
- GumboStringPiece original_text;
63
- union {
64
- GumboTokenDocType doc_type;
65
- GumboTokenStartTag start_tag;
66
- GumboTag end_tag;
67
- const char* text; // For comments.
68
- int character; // For character, whitespace, null, and EOF tokens.
69
- } v;
70
- } GumboToken;
71
-
72
- // Initializes the tokenizer state within the GumboParser object, setting up a
73
- // parse of the specified text.
74
- void gumbo_tokenizer_state_init(
75
- struct GumboInternalParser* parser, const char* text, size_t text_length);
76
-
77
- // Destroys the tokenizer state within the GumboParser object, freeing any
78
- // dynamically-allocated structures within it.
79
- void gumbo_tokenizer_state_destroy(struct GumboInternalParser* parser);
80
-
81
- // Sets the tokenizer state to the specified value. This is needed by some
82
- // parser states, which alter the state of the tokenizer in response to tags
83
- // seen.
84
- void gumbo_tokenizer_set_state(
85
- struct GumboInternalParser* parser, GumboTokenizerEnum state);
86
-
87
- // Flags whether the current node is a foreign content element. This is
88
- // necessary for the markup declaration open state, where the tokenizer must be
89
- // aware of the state of the parser to properly tokenize bad comment tags.
90
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state
91
- void gumbo_tokenizer_set_is_current_node_foreign(
92
- struct GumboInternalParser* parser, bool is_foreign);
93
-
94
- // Lexes a single token from the specified buffer, filling the output with the
95
- // parsed GumboToken data structure. Returns true for a successful
96
- // tokenization, false if a parse error occurs.
97
- //
98
- // Example:
99
- // struct GumboInternalParser parser;
100
- // GumboToken output;
101
- // gumbo_tokenizer_state_init(&parser, text, strlen(text));
102
- // while (gumbo_lex(&parser, &output)) {
103
- // ...do stuff with output.
104
- // gumbo_token_destroy(&parser, &token);
105
- // }
106
- // gumbo_tokenizer_state_destroy(&parser);
107
- bool gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
108
-
109
- // Frees the internally-allocated pointers within an GumboToken. Note that this
110
- // doesn't free the token itself, since oftentimes it will be allocated on the
111
- // stack. A simple call to free() (or GumboParser->deallocator, if
112
- // appropriate) can handle that.
113
- //
114
- // Note that if you are handing over ownership of the internal strings to some
115
- // other data structure - for example, a parse tree - these do not need to be
116
- // freed.
117
- void gumbo_token_destroy(struct GumboInternalParser* parser, GumboToken* token);
118
-
119
- #ifdef __cplusplus
120
- }
121
- #endif
122
-
123
- #endif // GUMBO_TOKENIZER_H_
@@ -1,103 +0,0 @@
1
- // Copyright 2011 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
- //
17
- // This contains the list of states used in the tokenizer. Although at first
18
- // glance it seems like these could be kept internal to the tokenizer, several
19
- // of the actions in the parser require that it reach into the tokenizer and
20
- // reset the tokenizer state. For that to work, it needs to have the
21
- // definitions of individual states available.
22
- //
23
- // This may also be useful for providing more detailed error messages for parse
24
- // errors, as we can match up states and inputs in a table without having to
25
- // clutter the tokenizer code with lots of precise error messages.
26
-
27
- #ifndef GUMBO_TOKENIZER_STATES_H_
28
- #define GUMBO_TOKENIZER_STATES_H_
29
-
30
- // The ordering of this enum is also used to build the dispatch table for the
31
- // tokenizer state machine, so if it is changed, be sure to update that too.
32
- typedef enum {
33
- GUMBO_LEX_DATA,
34
- GUMBO_LEX_CHAR_REF_IN_DATA,
35
- GUMBO_LEX_RCDATA,
36
- GUMBO_LEX_CHAR_REF_IN_RCDATA,
37
- GUMBO_LEX_RAWTEXT,
38
- GUMBO_LEX_SCRIPT,
39
- GUMBO_LEX_PLAINTEXT,
40
- GUMBO_LEX_TAG_OPEN,
41
- GUMBO_LEX_END_TAG_OPEN,
42
- GUMBO_LEX_TAG_NAME,
43
- GUMBO_LEX_RCDATA_LT,
44
- GUMBO_LEX_RCDATA_END_TAG_OPEN,
45
- GUMBO_LEX_RCDATA_END_TAG_NAME,
46
- GUMBO_LEX_RAWTEXT_LT,
47
- GUMBO_LEX_RAWTEXT_END_TAG_OPEN,
48
- GUMBO_LEX_RAWTEXT_END_TAG_NAME,
49
- GUMBO_LEX_SCRIPT_LT,
50
- GUMBO_LEX_SCRIPT_END_TAG_OPEN,
51
- GUMBO_LEX_SCRIPT_END_TAG_NAME,
52
- GUMBO_LEX_SCRIPT_ESCAPED_START,
53
- GUMBO_LEX_SCRIPT_ESCAPED_START_DASH,
54
- GUMBO_LEX_SCRIPT_ESCAPED,
55
- GUMBO_LEX_SCRIPT_ESCAPED_DASH,
56
- GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH,
57
- GUMBO_LEX_SCRIPT_ESCAPED_LT,
58
- GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_OPEN,
59
- GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME,
60
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_START,
61
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED,
62
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH,
63
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH_DASH,
64
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_LT,
65
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END,
66
- GUMBO_LEX_BEFORE_ATTR_NAME,
67
- GUMBO_LEX_ATTR_NAME,
68
- GUMBO_LEX_AFTER_ATTR_NAME,
69
- GUMBO_LEX_BEFORE_ATTR_VALUE,
70
- GUMBO_LEX_ATTR_VALUE_DOUBLE_QUOTED,
71
- GUMBO_LEX_ATTR_VALUE_SINGLE_QUOTED,
72
- GUMBO_LEX_ATTR_VALUE_UNQUOTED,
73
- GUMBO_LEX_CHAR_REF_IN_ATTR_VALUE,
74
- GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED,
75
- GUMBO_LEX_SELF_CLOSING_START_TAG,
76
- GUMBO_LEX_BOGUS_COMMENT,
77
- GUMBO_LEX_MARKUP_DECLARATION,
78
- GUMBO_LEX_COMMENT_START,
79
- GUMBO_LEX_COMMENT_START_DASH,
80
- GUMBO_LEX_COMMENT,
81
- GUMBO_LEX_COMMENT_END_DASH,
82
- GUMBO_LEX_COMMENT_END,
83
- GUMBO_LEX_COMMENT_END_BANG,
84
- GUMBO_LEX_DOCTYPE,
85
- GUMBO_LEX_BEFORE_DOCTYPE_NAME,
86
- GUMBO_LEX_DOCTYPE_NAME,
87
- GUMBO_LEX_AFTER_DOCTYPE_NAME,
88
- GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD,
89
- GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID,
90
- GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED,
91
- GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED,
92
- GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID,
93
- GUMBO_LEX_BETWEEN_DOCTYPE_PUBLIC_SYSTEM_ID,
94
- GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD,
95
- GUMBO_LEX_BEFORE_DOCTYPE_SYSTEM_ID,
96
- GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED,
97
- GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED,
98
- GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID,
99
- GUMBO_LEX_BOGUS_DOCTYPE,
100
- GUMBO_LEX_CDATA
101
- } GumboTokenizerEnum;
102
-
103
- #endif // GUMBO_TOKENIZER_STATES_H_
@@ -1,270 +0,0 @@
1
- // Copyright 2010 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
-
17
- #include "utf8.h"
18
-
19
- #include <assert.h>
20
- #include <stdint.h>
21
- #include <string.h>
22
- #include <strings.h> // For strncasecmp.
23
-
24
- #include "error.h"
25
- #include "gumbo.h"
26
- #include "parser.h"
27
- #include "util.h"
28
- #include "vector.h"
29
-
30
- const int kUtf8ReplacementChar = 0xFFFD;
31
-
32
- // Reference material:
33
- // Wikipedia: http://en.wikipedia.org/wiki/UTF-8#Description
34
- // RFC 3629: http://tools.ietf.org/html/rfc3629
35
- // HTML5 Unicode handling:
36
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#preprocessing-the-input-stream
37
- //
38
- // This implementation is based on a DFA-based decoder by Bjoern Hoehrmann
39
- // <bjoern@hoehrmann.de>. We wrap the inner table-based decoder routine in our
40
- // own handling for newlines, tabs, invalid continuation bytes, and other
41
- // conditions that the HTML5 spec fully specifies but normal UTF8 decoders do
42
- // not handle.
43
- // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. Full text of
44
- // the license agreement and code follows.
45
-
46
- // Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
47
-
48
- // Permission is hereby granted, free of charge, to any person obtaining a copy
49
- // of this software and associated documentation files (the "Software"), to deal
50
- // in the Software without restriction, including without limitation the rights
51
- // to
52
- // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
53
- // of the Software, and to permit persons to whom the Software is furnished to
54
- // do
55
- // so, subject to the following conditions:
56
-
57
- // The above copyright notice and this permission notice shall be included in
58
- // all copies or substantial portions of the Software.
59
-
60
- #define UTF8_ACCEPT 0
61
- #define UTF8_REJECT 12
62
-
63
- static const uint8_t utf8d[] = {
64
- // The first part of the table maps bytes to character classes that
65
- // to reduce the size of the transition table and create bitmasks.
66
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
67
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71
- 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
72
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
73
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2,
74
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10,
75
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8,
76
- 8, 8, 8, 8, 8, 8,
77
-
78
- // The second part is a transition table that maps a combination
79
- // of a state of the automaton and a character class to a state.
80
- 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12,
81
- 12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12,
82
- 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,
83
- 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12,
84
- 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,
85
- 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
86
- };
87
-
88
- uint32_t static inline decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
89
- uint32_t type = utf8d[byte];
90
-
91
- *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
92
- : (0xff >> type) & (byte);
93
-
94
- *state = utf8d[256 + *state + type];
95
- return *state;
96
- }
97
-
98
- // END COPIED CODE.
99
-
100
- // Adds a decoding error to the parser's error list, based on the current state
101
- // of the Utf8Iterator.
102
- static void add_error(Utf8Iterator* iter, GumboErrorType type) {
103
- GumboParser* parser = iter->_parser;
104
-
105
- GumboError* error = gumbo_add_error(parser);
106
- if (!error) {
107
- return;
108
- }
109
- error->type = type;
110
- error->position = iter->_pos;
111
- error->original_text = iter->_start;
112
-
113
- // At the point the error is recorded, the code point hasn't been computed
114
- // yet (and can't be, because it's invalid), so we need to build up the raw
115
- // hex value from the bytes under the cursor.
116
- uint64_t code_point = 0;
117
- for (int i = 0; i < iter->_width; ++i) {
118
- code_point = (code_point << 8) | (unsigned char) iter->_start[i];
119
- }
120
- error->v.codepoint = code_point;
121
- }
122
-
123
- // Reads the next UTF-8 character in the iter.
124
- // This assumes that iter->_start points to the beginning of the character.
125
- // When this method returns, iter->_width and iter->_current will be set
126
- // appropriately, as well as any error flags.
127
- static void read_char(Utf8Iterator* iter) {
128
- if (iter->_start >= iter->_end) {
129
- // No input left to consume; emit an EOF and set width = 0.
130
- iter->_current = -1;
131
- iter->_width = 0;
132
- return;
133
- }
134
-
135
- uint32_t code_point = 0;
136
- uint32_t state = UTF8_ACCEPT;
137
- for (const char* c = iter->_start; c < iter->_end; ++c) {
138
- decode(&state, &code_point, (uint32_t)(unsigned char) (*c));
139
- if (state == UTF8_ACCEPT) {
140
- iter->_width = c - iter->_start + 1;
141
- // This is the special handling for carriage returns that is mandated by
142
- // the HTML5 spec. Since we're looking for particular 7-bit literal
143
- // characters, we operate in terms of chars and only need a check for iter
144
- // overrun, instead of having to read in a full next code point.
145
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream
146
- if (code_point == '\r') {
147
- assert(iter->_width == 1);
148
- const char* next = c + 1;
149
- if (next < iter->_end && *next == '\n') {
150
- // Advance the iter, as if the carriage return didn't exist.
151
- ++iter->_start;
152
- // Preserve the true offset, since other tools that look at it may be
153
- // unaware of HTML5's rules for converting \r into \n.
154
- ++iter->_pos.offset;
155
- }
156
- code_point = '\n';
157
- }
158
- if (utf8_is_invalid_code_point(code_point)) {
159
- add_error(iter, GUMBO_ERR_UTF8_INVALID);
160
- code_point = kUtf8ReplacementChar;
161
- }
162
- iter->_current = code_point;
163
- return;
164
- } else if (state == UTF8_REJECT) {
165
- // We don't want to consume the invalid continuation byte of a multi-byte
166
- // run, but we do want to skip past an invalid first byte.
167
- iter->_width = c - iter->_start + (c == iter->_start);
168
- iter->_current = kUtf8ReplacementChar;
169
- add_error(iter, GUMBO_ERR_UTF8_INVALID);
170
- return;
171
- }
172
- }
173
- // If we got here without exiting early, then we've reached the end of the
174
- // iterator. Add an error for truncated input, set the width to consume the
175
- // rest of the iterator, and emit a replacement character. The next time we
176
- // enter this method, it will detect that there's no input to consume and
177
- // output an EOF.
178
- iter->_current = kUtf8ReplacementChar;
179
- iter->_width = iter->_end - iter->_start;
180
- add_error(iter, GUMBO_ERR_UTF8_TRUNCATED);
181
- }
182
-
183
- static void update_position(Utf8Iterator* iter) {
184
- iter->_pos.offset += iter->_width;
185
- if (iter->_current == '\n') {
186
- ++iter->_pos.line;
187
- iter->_pos.column = 1;
188
- } else if (iter->_current == '\t') {
189
- int tab_stop = iter->_parser->_options->tab_stop;
190
- iter->_pos.column = ((iter->_pos.column / tab_stop) + 1) * tab_stop;
191
- } else if (iter->_current != -1) {
192
- ++iter->_pos.column;
193
- }
194
- }
195
-
196
- // Returns true if this Unicode code point is in the list of characters
197
- // forbidden by the HTML5 spec, such as undefined control chars.
198
- bool utf8_is_invalid_code_point(int c) {
199
- return (c >= 0x1 && c <= 0x8) || c == 0xB || (c >= 0xE && c <= 0x1F) ||
200
- (c >= 0x7F && c <= 0x9F) || (c >= 0xFDD0 && c <= 0xFDEF) ||
201
- ((c & 0xFFFF) == 0xFFFE) || ((c & 0xFFFF) == 0xFFFF);
202
- }
203
-
204
- void utf8iterator_init(GumboParser* parser, const char* source,
205
- size_t source_length, Utf8Iterator* iter) {
206
- iter->_start = source;
207
- iter->_end = source + source_length;
208
- iter->_pos.line = 1;
209
- iter->_pos.column = 1;
210
- iter->_pos.offset = 0;
211
- iter->_parser = parser;
212
- read_char(iter);
213
- }
214
-
215
- void utf8iterator_next(Utf8Iterator* iter) {
216
- // We update positions based on the *last* character read, so that the first
217
- // character following a newline is at column 1 in the next line.
218
- update_position(iter);
219
- iter->_start += iter->_width;
220
- read_char(iter);
221
- }
222
-
223
- int utf8iterator_current(const Utf8Iterator* iter) { return iter->_current; }
224
-
225
- void utf8iterator_get_position(
226
- const Utf8Iterator* iter, GumboSourcePosition* output) {
227
- *output = iter->_pos;
228
- }
229
-
230
- const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter) {
231
- return iter->_start;
232
- }
233
-
234
- const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter) {
235
- return iter->_end;
236
- }
237
-
238
- bool utf8iterator_maybe_consume_match(Utf8Iterator* iter, const char* prefix,
239
- size_t length, bool case_sensitive) {
240
- bool matched = (iter->_start + length <= iter->_end) &&
241
- (case_sensitive ? !strncmp(iter->_start, prefix, length)
242
- : !strncasecmp(iter->_start, prefix, length));
243
- if (matched) {
244
- for (unsigned int i = 0; i < length; ++i) {
245
- utf8iterator_next(iter);
246
- }
247
- return true;
248
- } else {
249
- return false;
250
- }
251
- }
252
-
253
- void utf8iterator_mark(Utf8Iterator* iter) {
254
- iter->_mark = iter->_start;
255
- iter->_mark_pos = iter->_pos;
256
- }
257
-
258
- // Returns the current input stream position to the mark.
259
- void utf8iterator_reset(Utf8Iterator* iter) {
260
- iter->_start = iter->_mark;
261
- iter->_pos = iter->_mark_pos;
262
- read_char(iter);
263
- }
264
-
265
- // Sets the position and original text fields of an error to the value at the
266
- // mark.
267
- void utf8iterator_fill_error_at_mark(Utf8Iterator* iter, GumboError* error) {
268
- error->position = iter->_mark_pos;
269
- error->original_text = iter->_mark;
270
- }