nokogumbo 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
data/work/utf8.h DELETED
@@ -1,127 +0,0 @@
1
- // Copyright 2010 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
- //
17
- // This contains an implementation of a UTF8 iterator and decoder suitable for
18
- // an HTML5 parser. This does a bit more than straight UTF-8 decoding. The
19
- // HTML5 spec specifies that:
20
- // 1. Decoding errors are parse errors.
21
- // 2. Certain other codepoints (eg. control characters) are parse errors.
22
- // 3. Carriage returns and CR/LF groups are converted to line feeds.
23
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#decoded-as-utf-8,-with-error-handling
24
- //
25
- // Also, we want to keep track of source positions for error handling. As a
26
- // result, we fold all that functionality into this decoder, and can't use an
27
- // off-the-shelf library.
28
- //
29
- // This header is internal-only, which is why we prefix functions with only
30
- // utf8_ or utf8_iterator_ instead of gumbo_utf8_.
31
-
32
- #ifndef GUMBO_UTF8_H_
33
- #define GUMBO_UTF8_H_
34
-
35
- #include <stdbool.h>
36
- #include <stddef.h>
37
-
38
- #include "gumbo.h"
39
-
40
- #ifdef __cplusplus
41
- extern "C" {
42
- #endif
43
-
44
- struct _GumboError;
45
- struct _GumboParser;
46
-
47
- // Unicode replacement char.
48
- extern const int kUtf8ReplacementChar;
49
-
50
- typedef struct _Utf8Iterator {
51
- // Points at the start of the code point most recently read into 'current'.
52
- const char* _start;
53
-
54
- // Points at the mark. The mark is initially set to the beginning of the
55
- // input.
56
- const char* _mark;
57
-
58
- // Points past the end of the iter, like a past-the-end iterator in the STL.
59
- const char* _end;
60
-
61
- // The code point under the cursor.
62
- int _current;
63
-
64
- // The width in bytes of the current code point.
65
- int _width;
66
-
67
- // The SourcePosition for the current location.
68
- GumboSourcePosition _pos;
69
-
70
- // The SourcePosition for the mark.
71
- GumboSourcePosition _mark_pos;
72
-
73
- // Pointer back to the GumboParser instance, for configuration options and
74
- // error recording.
75
- struct _GumboParser* _parser;
76
- } Utf8Iterator;
77
-
78
- // Returns true if this Unicode code point is in the list of characters
79
- // forbidden by the HTML5 spec, such as NUL bytes and undefined control chars.
80
- bool utf8_is_invalid_code_point(int c);
81
-
82
- // Initializes a new Utf8Iterator from the given byte buffer. The source does
83
- // not have to be NUL-terminated, but the length must be passed in explicitly.
84
- void utf8iterator_init(
85
- struct _GumboParser* parser, const char* source, size_t source_length,
86
- Utf8Iterator* iter);
87
-
88
- // Advances the current position by one code point.
89
- void utf8iterator_next(Utf8Iterator* iter);
90
-
91
- // Returns the current code point as an integer.
92
- int utf8iterator_current(const Utf8Iterator* iter);
93
-
94
- // Retrieves and fills the output parameter with the current source position.
95
- void utf8iterator_get_position(
96
- const Utf8Iterator* iter, GumboSourcePosition* output);
97
-
98
- // Retrieves a character pointer to the start of the current character.
99
- const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter);
100
-
101
- // If the upcoming text in the buffer matches the specified prefix (which has
102
- // length 'length'), consume it and return true. Otherwise, return false with
103
- // no other effects. If the length of the string would overflow the buffer,
104
- // this returns false. Note that prefix should not contain null bytes because
105
- // of the use of strncmp/strncasecmp internally. All existing use-cases adhere
106
- // to this.
107
- bool utf8iterator_maybe_consume_match(
108
- Utf8Iterator* iter, const char* prefix, size_t length, bool case_sensitive);
109
-
110
- // "Marks" a particular location of interest in the input stream, so that it can
111
- // later be reset() to. There's also the ability to record an error at the
112
- // point that was marked, as oftentimes that's more useful than the last
113
- // character before the error was detected.
114
- void utf8iterator_mark(Utf8Iterator* iter);
115
-
116
- // Returns the current input stream position to the mark.
117
- void utf8iterator_reset(Utf8Iterator* iter);
118
-
119
- // Sets the position and original text fields of an error to the value at the
120
- // mark.
121
- void utf8iterator_fill_error_at_mark(
122
- Utf8Iterator* iter, struct _GumboError* error);
123
-
124
- #ifdef __cplusplus
125
- }
126
- #endif
127
- #endif // GUMBO_UTF8_H_
data/work/util.c DELETED
@@ -1,58 +0,0 @@
1
- // Copyright 2010 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
-
17
- #include "util.h"
18
-
19
- #include <assert.h>
20
- #include <stdlib.h>
21
- #include <string.h>
22
- #include <strings.h>
23
- #include <stdarg.h>
24
- #include <stdio.h>
25
-
26
- #include "gumbo.h"
27
- #include "parser.h"
28
-
29
- // TODO(jdtang): This should be elsewhere, but there's no .c file for
30
- // SourcePositions and yet the constant needs some linkage, so this is as good
31
- // as any.
32
- const GumboSourcePosition kGumboEmptySourcePosition = { 0, 0, 0 };
33
-
34
- void* gumbo_parser_allocate(GumboParser* parser, size_t num_bytes) {
35
- return parser->_options->allocator(parser->_options->userdata, num_bytes);
36
- }
37
-
38
- void gumbo_parser_deallocate(GumboParser* parser, void* ptr) {
39
- return parser->_options->deallocator(parser->_options->userdata, ptr);
40
- }
41
-
42
- char* gumbo_copy_stringz(GumboParser* parser, const char* str) {
43
- char* buffer = gumbo_parser_allocate(parser, strlen(str) + 1);
44
- strcpy(buffer, str);
45
- return buffer;
46
- }
47
-
48
- // Debug function to trace operation of the parser. Pass --copts=-DGUMBO_DEBUG
49
- // to use.
50
- void gumbo_debug(const char* format, ...) {
51
- #ifdef GUMBO_DEBUG
52
- va_list args;
53
- va_start(args, format);
54
- vprintf(format, args);
55
- va_end(args);
56
- fflush(stdout);
57
- #endif
58
- }
data/work/util.h DELETED
@@ -1,57 +0,0 @@
1
- // Copyright 2010 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
- //
17
- // This contains some utility functions that didn't fit into any of the other
18
- // headers.
19
-
20
- #ifndef GUMBO_UTIL_H_
21
- #define GUMBO_UTIL_H_
22
-
23
- #include <stdbool.h>
24
- #include <stddef.h>
25
-
26
- #ifdef __cplusplus
27
- extern "C" {
28
- #endif
29
-
30
- // Forward declaration since it's passed into some of the functions in this
31
- // header.
32
- struct _GumboParser;
33
-
34
- // Utility function for allocating & copying a null-terminated string into a
35
- // freshly-allocated buffer. This is necessary for proper memory management; we
36
- // have the convention that all const char* in parse tree structures are
37
- // freshly-allocated, so if we didn't copy, we'd try to delete a literal string
38
- // when the parse tree is destroyed.
39
- char* gumbo_copy_stringz(struct _GumboParser* parser, const char* str);
40
-
41
- // Allocate a chunk of memory, using the allocator specified in the Parser's
42
- // config options.
43
- void* gumbo_parser_allocate(struct _GumboParser* parser, size_t num_bytes);
44
-
45
- // Deallocate a chunk of memory, using the deallocator specified in the Parser's
46
- // config options.
47
- void gumbo_parser_deallocate(struct _GumboParser* parser, void* ptr);
48
-
49
- // Debug wrapper for printf, to make it easier to turn off debugging info when
50
- // required.
51
- void gumbo_debug(const char* format, ...);
52
-
53
- #ifdef __cplusplus
54
- }
55
- #endif
56
-
57
- #endif // GUMBO_UTIL_H_
data/work/vector.c DELETED
@@ -1,121 +0,0 @@
1
- // Copyright 2010 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
-
17
- #include "vector.h"
18
-
19
- #include <assert.h>
20
- #include <stdlib.h>
21
- #include <string.h>
22
- #include <strings.h>
23
-
24
- #include "util.h"
25
-
26
- struct _GumboParser;
27
-
28
- const GumboVector kGumboEmptyVector = { NULL, 0, 0 };
29
-
30
- void gumbo_vector_init(
31
- struct _GumboParser* parser, size_t initial_capacity, GumboVector* vector) {
32
- vector->length = 0;
33
- vector->capacity = initial_capacity;
34
- if (initial_capacity > 0) {
35
- vector->data = gumbo_parser_allocate(
36
- parser, sizeof(void*) * initial_capacity);
37
- } else {
38
- vector->data = NULL;
39
- }
40
- }
41
-
42
- void gumbo_vector_destroy(struct _GumboParser* parser, GumboVector* vector) {
43
- if (vector->capacity > 0) {
44
- gumbo_parser_deallocate(parser, vector->data);
45
- }
46
- }
47
-
48
- static void enlarge_vector_if_full(
49
- struct _GumboParser* parser, GumboVector* vector) {
50
- if (vector->length >= vector->capacity) {
51
- if (vector->capacity) {
52
- size_t old_num_bytes = sizeof(void*) * vector->capacity;
53
- vector->capacity *= 2;
54
- size_t num_bytes = sizeof(void*) * vector->capacity;
55
- void** temp = gumbo_parser_allocate(parser, num_bytes);
56
- memcpy(temp, vector->data, old_num_bytes);
57
- gumbo_parser_deallocate(parser, vector->data);
58
- vector->data = temp;
59
- } else {
60
- // 0-capacity vector; no previous array to deallocate.
61
- vector->capacity = 2;
62
- vector->data = gumbo_parser_allocate(
63
- parser, sizeof(void*) * vector->capacity);
64
- }
65
- }
66
- }
67
-
68
- void gumbo_vector_add(
69
- struct _GumboParser* parser, void* element, GumboVector* vector) {
70
- enlarge_vector_if_full(parser, vector);
71
- assert(vector->data);
72
- assert(vector->length < vector->capacity);
73
- vector->data[vector->length++] = element;
74
- }
75
-
76
- void* gumbo_vector_pop(struct _GumboParser* parser, GumboVector* vector) {
77
- if (vector->length == 0) {
78
- return NULL;
79
- }
80
- return vector->data[--vector->length];
81
- }
82
-
83
- int gumbo_vector_index_of(GumboVector* vector, void* element) {
84
- for (int i = 0; i < vector->length; ++i) {
85
- if (vector->data[i] == element) {
86
- return i;
87
- }
88
- }
89
- return -1;
90
- }
91
-
92
- void gumbo_vector_insert_at(
93
- struct _GumboParser* parser, void* element, int index, GumboVector* vector) {
94
- assert(index >= 0);
95
- assert(index <= vector->length);
96
- enlarge_vector_if_full(parser, vector);
97
- ++vector->length;
98
- memmove(&vector->data[index + 1], &vector->data[index],
99
- sizeof(void*) * (vector->length - index - 1));
100
- vector->data[index] = element;
101
- }
102
-
103
- void gumbo_vector_remove(
104
- struct _GumboParser* parser, void* node, GumboVector* vector) {
105
- int index = gumbo_vector_index_of(vector, node);
106
- if (index == -1) {
107
- return;
108
- }
109
- gumbo_vector_remove_at(parser, index, vector);
110
- }
111
-
112
- void* gumbo_vector_remove_at(
113
- struct _GumboParser* parser, int index, GumboVector* vector) {
114
- assert(index >= 0);
115
- assert(index < vector->length);
116
- void* result = vector->data[index];
117
- memmove(&vector->data[index], &vector->data[index + 1],
118
- sizeof(void*) * (vector->length - index - 1));
119
- --vector->length;
120
- return result;
121
- }
data/work/vector.h DELETED
@@ -1,66 +0,0 @@
1
- // Copyright 2010 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
-
17
- #ifndef GUMBO_VECTOR_H_
18
- #define GUMBO_VECTOR_H_
19
-
20
- #include "gumbo.h"
21
-
22
- #ifdef __cplusplus
23
- extern "C" {
24
- #endif
25
-
26
- // Forward declaration since it's passed into some of the functions in this
27
- // header.
28
- struct _GumboParser;
29
-
30
- // Initializes a new GumboVector with the specified initial capacity.
31
- void gumbo_vector_init(
32
- struct _GumboParser* parser, size_t initial_capacity, GumboVector* vector);
33
-
34
- // Frees the memory used by an GumboVector. Does not free the contained
35
- // pointers.
36
- void gumbo_vector_destroy(struct _GumboParser* parser, GumboVector* vector);
37
-
38
- // Adds a new element to an GumboVector.
39
- void gumbo_vector_add(
40
- struct _GumboParser* parser, void* element, GumboVector* vector);
41
-
42
- // Removes and returns the element most recently added to the GumboVector.
43
- // Ownership is transferred to caller. Capacity is unchanged. If the vector is
44
- // empty, NULL is returned.
45
- void* gumbo_vector_pop(struct _GumboParser* parser, GumboVector* vector);
46
-
47
- // Inserts an element at a specific index. This is potentially O(N) time, but
48
- // is necessary for some of the spec's behavior.
49
- void gumbo_vector_insert_at(
50
- struct _GumboParser* parser, void* element, int index, GumboVector* vector);
51
-
52
- // Removes an element from the vector, or does nothing if the element is not in
53
- // the vector.
54
- void gumbo_vector_remove(
55
- struct _GumboParser* parser, void* element, GumboVector* vector);
56
-
57
- // Removes and returns an element at a specific index. Note that this is
58
- // potentially O(N) time and should be used sparingly.
59
- void* gumbo_vector_remove_at(
60
- struct _GumboParser* parser, int index, GumboVector* vector);
61
-
62
- #ifdef __cplusplus
63
- }
64
- #endif
65
-
66
- #endif // GUMBO_VECTOR_H_