nokogumbo 1.5.0 → 2.0.0.pre.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +56 -0
- data/README.md +146 -22
- data/ext/nokogumbo/extconf.rb +116 -0
- data/ext/{nokogumboc → nokogumbo}/nokogumbo.c +174 -71
- data/gumbo-parser/src/ascii.c +33 -0
- data/gumbo-parser/src/ascii.h +31 -0
- data/gumbo-parser/src/attribute.c +26 -28
- data/gumbo-parser/src/attribute.h +3 -23
- data/gumbo-parser/src/char_ref.c +135 -2351
- data/gumbo-parser/src/char_ref.h +13 -29
- data/gumbo-parser/src/error.c +215 -133
- data/gumbo-parser/src/error.h +34 -49
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/gumbo.h +506 -304
- data/gumbo-parser/src/insertion_mode.h +4 -28
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +1989 -1431
- data/gumbo-parser/src/parser.h +6 -22
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +43 -50
- data/gumbo-parser/src/string_buffer.h +24 -40
- data/gumbo-parser/src/string_piece.c +39 -39
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/tag.c +186 -59
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_type.h +1 -25
- data/gumbo-parser/src/tokenizer.c +899 -495
- data/gumbo-parser/src/tokenizer.h +37 -37
- data/gumbo-parser/src/tokenizer_states.h +6 -22
- data/gumbo-parser/src/utf8.c +103 -86
- data/gumbo-parser/src/utf8.h +37 -41
- data/gumbo-parser/src/util.c +48 -38
- data/gumbo-parser/src/util.h +10 -40
- data/gumbo-parser/src/vector.c +45 -57
- data/gumbo-parser/src/vector.h +17 -39
- data/lib/nokogumbo.rb +10 -174
- data/lib/nokogumbo/html5.rb +250 -0
- data/lib/nokogumbo/html5/document.rb +37 -0
- data/lib/nokogumbo/html5/document_fragment.rb +46 -0
- data/lib/nokogumbo/version.rb +3 -0
- data/lib/nokogumbo/xml/node.rb +57 -0
- metadata +32 -19
- data/ext/nokogumboc/extconf.rb +0 -60
- data/gumbo-parser/src/char_ref.rl +0 -2554
- data/gumbo-parser/src/string_piece.h +0 -38
- data/gumbo-parser/src/tag.in +0 -150
- data/gumbo-parser/src/tag_enum.h +0 -153
- data/gumbo-parser/src/tag_gperf.h +0 -105
- data/gumbo-parser/src/tag_sizes.h +0 -4
- data/gumbo-parser/src/tag_strings.h +0 -153
- data/gumbo-parser/visualc/include/strings.h +0 -4
- data/test-nokogumbo.rb +0 -190
data/gumbo-parser/src/parser.h
CHANGED
@@ -1,22 +1,3 @@
|
|
1
|
-
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
//
|
17
|
-
// Contains the definition of the top-level GumboParser structure that's
|
18
|
-
// threaded through basically every internal function in the library.
|
19
|
-
|
20
1
|
#ifndef GUMBO_PARSER_H_
|
21
2
|
#define GUMBO_PARSER_H_
|
22
3
|
|
@@ -24,13 +5,16 @@
|
|
24
5
|
extern "C" {
|
25
6
|
#endif
|
26
7
|
|
8
|
+
// Contains the definition of the top-level GumboParser structure that's
|
9
|
+
// threaded through basically every internal function in the library.
|
10
|
+
|
27
11
|
struct GumboInternalParserState;
|
28
12
|
struct GumboInternalOutput;
|
29
13
|
struct GumboInternalOptions;
|
30
14
|
struct GumboInternalTokenizerState;
|
31
15
|
|
32
16
|
// An overarching struct that's threaded through (nearly) all functions in the
|
33
|
-
// library, OOP-style.
|
17
|
+
// library, OOP-style. This gives each function access to the options and
|
34
18
|
// output, along with any internal state needed for the parse.
|
35
19
|
typedef struct GumboInternalParser {
|
36
20
|
// Settings for this parse run.
|
@@ -40,12 +24,12 @@ typedef struct GumboInternalParser {
|
|
40
24
|
struct GumboInternalOutput* _output;
|
41
25
|
|
42
26
|
// The internal tokenizer state, defined as a pointer to avoid a cyclic
|
43
|
-
// dependency on html5tokenizer.h.
|
27
|
+
// dependency on html5tokenizer.h. The main parse routine is responsible for
|
44
28
|
// initializing this on parse start, and destroying it on parse end.
|
45
29
|
// End-users will never see a non-garbage value in this pointer.
|
46
30
|
struct GumboInternalTokenizerState* _tokenizer_state;
|
47
31
|
|
48
|
-
// The internal parser state.
|
32
|
+
// The internal parser state. Initialized on parse start and destroyed on
|
49
33
|
// parse end; end-users will never see a non-garbage value in this pointer.
|
50
34
|
struct GumboInternalParserState* _parser_state;
|
51
35
|
} GumboParser;
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#ifndef GUMBO_REPLACEMENT_H_
|
2
|
+
#define GUMBO_REPLACEMENT_H_
|
3
|
+
|
4
|
+
#include <stddef.h>
|
5
|
+
#include "gumbo.h"
|
6
|
+
|
7
|
+
typedef struct {
|
8
|
+
const char *const from;
|
9
|
+
const char *const to;
|
10
|
+
} StringReplacement;
|
11
|
+
|
12
|
+
const StringReplacement *gumbo_get_svg_tag_replacement (
|
13
|
+
const char* str,
|
14
|
+
size_t len
|
15
|
+
);
|
16
|
+
|
17
|
+
const StringReplacement *gumbo_get_svg_attr_replacement (
|
18
|
+
const char* str,
|
19
|
+
size_t len
|
20
|
+
);
|
21
|
+
|
22
|
+
typedef struct {
|
23
|
+
const char *const from;
|
24
|
+
const char *const local_name;
|
25
|
+
const GumboAttributeNamespaceEnum attr_namespace;
|
26
|
+
} ForeignAttrReplacement;
|
27
|
+
|
28
|
+
const ForeignAttrReplacement *gumbo_get_foreign_attr_replacement (
|
29
|
+
const char* str,
|
30
|
+
size_t len
|
31
|
+
);
|
32
|
+
|
33
|
+
#endif // GUMBO_REPLACEMENT_H_
|
@@ -1,67 +1,61 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
1
|
+
/*
|
2
|
+
Copyright 2010 Google Inc.
|
16
3
|
|
17
|
-
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
18
7
|
|
19
|
-
|
20
|
-
#include <stdlib.h>
|
21
|
-
#include <string.h>
|
22
|
-
#include <strings.h>
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
23
9
|
|
24
|
-
|
25
|
-
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
*/
|
26
16
|
|
27
|
-
|
17
|
+
#include <string.h>
|
18
|
+
#include "string_buffer.h"
|
19
|
+
#include "util.h"
|
28
20
|
|
29
21
|
// Size chosen via statistical analysis of ~60K websites.
|
30
22
|
// 99% of text nodes and 98% of attribute names/values fit in this initial size.
|
31
23
|
static const size_t kDefaultStringBufferSize = 5;
|
32
24
|
|
33
|
-
static void maybe_resize_string_buffer(
|
34
|
-
|
25
|
+
static void maybe_resize_string_buffer (
|
26
|
+
size_t additional_chars,
|
27
|
+
GumboStringBuffer* buffer
|
28
|
+
) {
|
35
29
|
size_t new_length = buffer->length + additional_chars;
|
36
30
|
size_t new_capacity = buffer->capacity;
|
37
31
|
while (new_capacity < new_length) {
|
38
32
|
new_capacity *= 2;
|
39
33
|
}
|
40
34
|
if (new_capacity != buffer->capacity) {
|
41
|
-
|
42
|
-
memcpy(new_data, buffer->data, buffer->length);
|
43
|
-
gumbo_parser_deallocate(parser, buffer->data);
|
44
|
-
buffer->data = new_data;
|
35
|
+
buffer->data = gumbo_realloc(buffer->data, new_capacity);
|
45
36
|
buffer->capacity = new_capacity;
|
46
37
|
}
|
47
38
|
}
|
48
39
|
|
49
|
-
void gumbo_string_buffer_init(
|
50
|
-
|
51
|
-
output->data = gumbo_parser_allocate(parser, kDefaultStringBufferSize);
|
40
|
+
void gumbo_string_buffer_init(GumboStringBuffer* output) {
|
41
|
+
output->data = gumbo_alloc(kDefaultStringBufferSize);
|
52
42
|
output->length = 0;
|
53
43
|
output->capacity = kDefaultStringBufferSize;
|
54
44
|
}
|
55
45
|
|
56
|
-
void gumbo_string_buffer_reserve(
|
57
|
-
|
58
|
-
|
46
|
+
void gumbo_string_buffer_reserve (
|
47
|
+
size_t min_capacity,
|
48
|
+
GumboStringBuffer* output
|
49
|
+
) {
|
50
|
+
maybe_resize_string_buffer(min_capacity - output->length, output);
|
59
51
|
}
|
60
52
|
|
61
|
-
void gumbo_string_buffer_append_codepoint(
|
62
|
-
|
53
|
+
void gumbo_string_buffer_append_codepoint (
|
54
|
+
int c,
|
55
|
+
GumboStringBuffer* output
|
56
|
+
) {
|
63
57
|
// num_bytes is actually the number of continuation bytes, 1 less than the
|
64
|
-
// total number of bytes.
|
58
|
+
// total number of bytes. This is done to keep the loop below simple and
|
65
59
|
// should probably change if we unroll it.
|
66
60
|
int num_bytes, prefix;
|
67
61
|
if (c <= 0x7f) {
|
@@ -77,34 +71,33 @@ void gumbo_string_buffer_append_codepoint(
|
|
77
71
|
num_bytes = 3;
|
78
72
|
prefix = 0xf0;
|
79
73
|
}
|
80
|
-
maybe_resize_string_buffer(
|
74
|
+
maybe_resize_string_buffer(num_bytes + 1, output);
|
81
75
|
output->data[output->length++] = prefix | (c >> (num_bytes * 6));
|
82
76
|
for (int i = num_bytes - 1; i >= 0; --i) {
|
83
77
|
output->data[output->length++] = 0x80 | (0x3f & (c >> (i * 6)));
|
84
78
|
}
|
85
79
|
}
|
86
80
|
|
87
|
-
void gumbo_string_buffer_append_string(
|
88
|
-
|
89
|
-
|
81
|
+
void gumbo_string_buffer_append_string (
|
82
|
+
GumboStringPiece* str,
|
83
|
+
GumboStringBuffer* output
|
84
|
+
) {
|
85
|
+
maybe_resize_string_buffer(str->length, output);
|
90
86
|
memcpy(output->data + output->length, str->data, str->length);
|
91
87
|
output->length += str->length;
|
92
88
|
}
|
93
89
|
|
94
|
-
char* gumbo_string_buffer_to_string(
|
95
|
-
|
96
|
-
char* buffer = gumbo_parser_allocate(parser, input->length + 1);
|
90
|
+
char* gumbo_string_buffer_to_string(const GumboStringBuffer* input) {
|
91
|
+
char* buffer = gumbo_alloc(input->length + 1);
|
97
92
|
memcpy(buffer, input->data, input->length);
|
98
93
|
buffer[input->length] = '\0';
|
99
94
|
return buffer;
|
100
95
|
}
|
101
96
|
|
102
|
-
void gumbo_string_buffer_clear(
|
103
|
-
struct GumboInternalParser* parser, GumboStringBuffer* input) {
|
97
|
+
void gumbo_string_buffer_clear(GumboStringBuffer* input) {
|
104
98
|
input->length = 0;
|
105
99
|
}
|
106
100
|
|
107
|
-
void gumbo_string_buffer_destroy(
|
108
|
-
|
109
|
-
gumbo_parser_deallocate(parser, buffer->data);
|
101
|
+
void gumbo_string_buffer_destroy(GumboStringBuffer* buffer) {
|
102
|
+
gumbo_free(buffer->data);
|
110
103
|
}
|
@@ -1,19 +1,3 @@
|
|
1
|
-
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
//
|
17
1
|
#ifndef GUMBO_STRING_BUFFER_H_
|
18
2
|
#define GUMBO_STRING_BUFFER_H_
|
19
3
|
|
@@ -26,18 +10,16 @@
|
|
26
10
|
extern "C" {
|
27
11
|
#endif
|
28
12
|
|
29
|
-
struct
|
30
|
-
|
31
|
-
// A struct representing a mutable, growable string. This consists of a
|
32
|
-
// heap-allocated buffer that may grow (by doubling) as necessary. When
|
13
|
+
// A struct representing a mutable, growable string. This consists of a
|
14
|
+
// heap-allocated buffer that may grow (by doubling) as necessary. When
|
33
15
|
// converting to a string, this allocates a new buffer that is only as long as
|
34
|
-
// it needs to be.
|
16
|
+
// it needs to be. Note that the internal buffer here is *not* nul-terminated,
|
35
17
|
// so be sure not to use ordinary string manipulation functions on it.
|
36
18
|
typedef struct {
|
37
|
-
// A pointer to the beginning of the string.
|
19
|
+
// A pointer to the beginning of the string. NULL if length == 0.
|
38
20
|
char* data;
|
39
21
|
|
40
|
-
// The length of the string fragment, in bytes.
|
22
|
+
// The length of the string fragment, in bytes. May be zero.
|
41
23
|
size_t length;
|
42
24
|
|
43
25
|
// The capacity of the buffer, in bytes.
|
@@ -45,40 +27,42 @@ typedef struct {
|
|
45
27
|
} GumboStringBuffer;
|
46
28
|
|
47
29
|
// Initializes a new GumboStringBuffer.
|
48
|
-
void gumbo_string_buffer_init(
|
49
|
-
struct GumboInternalParser* parser, GumboStringBuffer* output);
|
30
|
+
void gumbo_string_buffer_init(GumboStringBuffer* output);
|
50
31
|
|
51
|
-
// Ensures that the buffer contains at least a certain amount of space.
|
32
|
+
// Ensures that the buffer contains at least a certain amount of space. Most
|
52
33
|
// useful with snprintf and the other length-delimited string functions, which
|
53
34
|
// may want to write directly into the buffer.
|
54
|
-
void gumbo_string_buffer_reserve(
|
55
|
-
|
35
|
+
void gumbo_string_buffer_reserve (
|
36
|
+
size_t min_capacity,
|
37
|
+
GumboStringBuffer* output
|
38
|
+
);
|
56
39
|
|
57
40
|
// Appends a single Unicode codepoint onto the end of the GumboStringBuffer.
|
58
41
|
// This is essentially a UTF-8 encoder, and may add 1-4 bytes depending on the
|
59
42
|
// value of the codepoint.
|
60
|
-
void gumbo_string_buffer_append_codepoint(
|
61
|
-
|
43
|
+
void gumbo_string_buffer_append_codepoint (
|
44
|
+
int c,
|
45
|
+
GumboStringBuffer* output
|
46
|
+
);
|
62
47
|
|
63
48
|
// Appends a string onto the end of the GumboStringBuffer.
|
64
|
-
void gumbo_string_buffer_append_string(
|
65
|
-
|
49
|
+
void gumbo_string_buffer_append_string (
|
50
|
+
GumboStringPiece* str,
|
51
|
+
GumboStringBuffer* output
|
52
|
+
);
|
66
53
|
|
67
54
|
// Converts this string buffer to const char*, alloctaing a new buffer for it.
|
68
|
-
char* gumbo_string_buffer_to_string(
|
69
|
-
struct GumboInternalParser* parser, GumboStringBuffer* input);
|
55
|
+
char* gumbo_string_buffer_to_string(const GumboStringBuffer* input);
|
70
56
|
|
71
|
-
// Reinitialize this string buffer.
|
57
|
+
// Reinitialize this string buffer. This clears it by setting length=0. It
|
72
58
|
// does not zero out the buffer itself.
|
73
|
-
void gumbo_string_buffer_clear(
|
74
|
-
struct GumboInternalParser* parser, GumboStringBuffer* input);
|
59
|
+
void gumbo_string_buffer_clear(GumboStringBuffer* input);
|
75
60
|
|
76
61
|
// Deallocates this GumboStringBuffer.
|
77
|
-
void gumbo_string_buffer_destroy(
|
78
|
-
struct GumboInternalParser* parser, GumboStringBuffer* buffer);
|
62
|
+
void gumbo_string_buffer_destroy(GumboStringBuffer* buffer);
|
79
63
|
|
80
64
|
#ifdef __cplusplus
|
81
65
|
}
|
82
66
|
#endif
|
83
67
|
|
84
|
-
#endif
|
68
|
+
#endif // GUMBO_STRING_BUFFER_H_
|
@@ -1,48 +1,48 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
1
|
+
/*
|
2
|
+
Copyright 2018 Craig Barnes.
|
3
|
+
Copyright 2010 Google Inc.
|
16
4
|
|
17
|
-
|
5
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
you may not use this file except in compliance with the License.
|
7
|
+
You may obtain a copy of the License at
|
18
8
|
|
19
|
-
|
20
|
-
#include <stdlib.h>
|
21
|
-
#include <string.h>
|
22
|
-
#include <strings.h>
|
23
|
-
|
24
|
-
#include "util.h"
|
9
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
25
10
|
|
26
|
-
|
11
|
+
Unless required by applicable law or agreed to in writing, software
|
12
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
See the License for the specific language governing permissions and
|
15
|
+
limitations under the License.
|
16
|
+
*/
|
27
17
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
18
|
+
#include <stddef.h>
|
19
|
+
#include <string.h>
|
20
|
+
#include "gumbo.h"
|
21
|
+
#include "ascii.h"
|
22
|
+
|
23
|
+
bool gumbo_string_equals (
|
24
|
+
const GumboStringPiece* str1,
|
25
|
+
const GumboStringPiece* str2
|
26
|
+
) {
|
27
|
+
return
|
28
|
+
str1->length == str2->length
|
29
|
+
&& !memcmp(str1->data, str2->data, str1->length);
|
34
30
|
}
|
35
31
|
|
36
|
-
bool gumbo_string_equals_ignore_case(
|
37
|
-
|
38
|
-
|
39
|
-
|
32
|
+
bool gumbo_string_equals_ignore_case (
|
33
|
+
const GumboStringPiece* str1,
|
34
|
+
const GumboStringPiece* str2
|
35
|
+
) {
|
36
|
+
return
|
37
|
+
str1->length == str2->length
|
38
|
+
&& !gumbo_ascii_strncasecmp(str1->data, str2->data, str1->length);
|
40
39
|
}
|
41
40
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
41
|
+
bool gumbo_string_prefix_ignore_case (
|
42
|
+
const GumboStringPiece* prefix,
|
43
|
+
const GumboStringPiece* str
|
44
|
+
) {
|
45
|
+
return
|
46
|
+
prefix->length <= str->length
|
47
|
+
&& !gumbo_ascii_strncasecmp(prefix->data, str->data, prefix->length);
|
48
48
|
}
|
@@ -0,0 +1,174 @@
|
|
1
|
+
/* ANSI-C code produced by gperf version 3.1 */
|
2
|
+
/* Command-line: gperf -m100 lib/svg_attrs.gperf */
|
3
|
+
/* Computed positions: -k'1,10,$' */
|
4
|
+
/* Filtered by: mk/gperf-filter.sed */
|
5
|
+
|
6
|
+
#include "replacement.h"
|
7
|
+
#include "macros.h"
|
8
|
+
#include "ascii.h"
|
9
|
+
#include <string.h>
|
10
|
+
|
11
|
+
#define TOTAL_KEYWORDS 58
|
12
|
+
#define MIN_WORD_LENGTH 4
|
13
|
+
#define MAX_WORD_LENGTH 19
|
14
|
+
#define MIN_HASH_VALUE 5
|
15
|
+
#define MAX_HASH_VALUE 77
|
16
|
+
/* maximum key range = 73, duplicates = 0 */
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
static inline unsigned int
|
21
|
+
hash (register const char *str, register size_t len)
|
22
|
+
{
|
23
|
+
static const unsigned char asso_values[] =
|
24
|
+
{
|
25
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
26
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
27
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
28
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
29
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
30
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
31
|
+
78, 78, 78, 78, 78, 5, 78, 39, 14, 1,
|
32
|
+
31, 31, 13, 13, 78, 78, 22, 25, 10, 2,
|
33
|
+
7, 78, 22, 0, 1, 3, 1, 78, 0, 36,
|
34
|
+
14, 17, 20, 78, 78, 78, 78, 5, 78, 39,
|
35
|
+
14, 1, 31, 31, 13, 13, 78, 78, 22, 25,
|
36
|
+
10, 2, 7, 78, 22, 0, 1, 3, 1, 78,
|
37
|
+
0, 36, 14, 17, 20, 78, 78, 78, 78, 78,
|
38
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
39
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
40
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
41
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
42
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
43
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
44
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
45
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
46
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
47
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
48
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
49
|
+
78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
|
50
|
+
78, 78, 78, 78, 78, 78, 78, 78
|
51
|
+
};
|
52
|
+
register unsigned int hval = len;
|
53
|
+
|
54
|
+
switch (hval)
|
55
|
+
{
|
56
|
+
default:
|
57
|
+
hval += asso_values[(unsigned char)str[9]];
|
58
|
+
/*FALLTHROUGH*/
|
59
|
+
case 9:
|
60
|
+
case 8:
|
61
|
+
case 7:
|
62
|
+
case 6:
|
63
|
+
case 5:
|
64
|
+
case 4:
|
65
|
+
case 3:
|
66
|
+
case 2:
|
67
|
+
case 1:
|
68
|
+
hval += asso_values[(unsigned char)str[0]+2];
|
69
|
+
break;
|
70
|
+
}
|
71
|
+
return hval + asso_values[(unsigned char)str[len - 1]];
|
72
|
+
}
|
73
|
+
|
74
|
+
const StringReplacement *
|
75
|
+
gumbo_get_svg_attr_replacement (register const char *str, register size_t len)
|
76
|
+
{
|
77
|
+
static const unsigned char lengthtable[] =
|
78
|
+
{
|
79
|
+
0, 0, 0, 0, 0, 4, 0, 7, 7, 0, 8, 9, 10, 11,
|
80
|
+
11, 11, 11, 10, 16, 18, 16, 12, 16, 11, 13, 11, 12, 11,
|
81
|
+
16, 0, 17, 9, 9, 8, 9, 10, 13, 10, 12, 14, 8, 4,
|
82
|
+
12, 19, 7, 9, 12, 12, 11, 14, 10, 19, 8, 16, 13, 16,
|
83
|
+
16, 15, 10, 12, 0, 0, 13, 13, 13, 0, 0, 9, 16, 0,
|
84
|
+
0, 0, 0, 0, 0, 0, 0, 17
|
85
|
+
};
|
86
|
+
static const StringReplacement wordlist[] =
|
87
|
+
{
|
88
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
89
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
90
|
+
{(char*)0,(char*)0},
|
91
|
+
{"refx", "refX"},
|
92
|
+
{(char*)0,(char*)0},
|
93
|
+
{"viewbox", "viewBox"},
|
94
|
+
{"targetx", "targetX"},
|
95
|
+
{(char*)0,(char*)0},
|
96
|
+
{"calcmode", "calcMode"},
|
97
|
+
{"maskunits", "maskUnits"},
|
98
|
+
{"viewtarget", "viewTarget"},
|
99
|
+
{"tablevalues", "tableValues"},
|
100
|
+
{"markerunits", "markerUnits"},
|
101
|
+
{"stitchtiles", "stitchTiles"},
|
102
|
+
{"startoffset", "startOffset"},
|
103
|
+
{"numoctaves", "numOctaves"},
|
104
|
+
{"requiredfeatures", "requiredFeatures"},
|
105
|
+
{"requiredextensions", "requiredExtensions"},
|
106
|
+
{"specularexponent", "specularExponent"},
|
107
|
+
{"surfacescale", "surfaceScale"},
|
108
|
+
{"specularconstant", "specularConstant"},
|
109
|
+
{"repeatcount", "repeatCount"},
|
110
|
+
{"clippathunits", "clipPathUnits"},
|
111
|
+
{"filterunits", "filterUnits"},
|
112
|
+
{"lengthadjust", "lengthAdjust"},
|
113
|
+
{"markerwidth", "markerWidth"},
|
114
|
+
{"maskcontentunits", "maskContentUnits"},
|
115
|
+
{(char*)0,(char*)0},
|
116
|
+
{"limitingconeangle", "limitingConeAngle"},
|
117
|
+
{"pointsatx", "pointsAtX"},
|
118
|
+
{"repeatdur", "repeatDur"},
|
119
|
+
{"keytimes", "keyTimes"},
|
120
|
+
{"keypoints", "keyPoints"},
|
121
|
+
{"keysplines", "keySplines"},
|
122
|
+
{"gradientunits", "gradientUnits"},
|
123
|
+
{"textlength", "textLength"},
|
124
|
+
{"stddeviation", "stdDeviation"},
|
125
|
+
{"primitiveunits", "primitiveUnits"},
|
126
|
+
{"edgemode", "edgeMode"},
|
127
|
+
{"refy", "refY"},
|
128
|
+
{"spreadmethod", "spreadMethod"},
|
129
|
+
{"preserveaspectratio", "preserveAspectRatio"},
|
130
|
+
{"targety", "targetY"},
|
131
|
+
{"pointsatz", "pointsAtZ"},
|
132
|
+
{"markerheight", "markerHeight"},
|
133
|
+
{"patternunits", "patternUnits"},
|
134
|
+
{"baseprofile", "baseProfile"},
|
135
|
+
{"systemlanguage", "systemLanguage"},
|
136
|
+
{"zoomandpan", "zoomAndPan"},
|
137
|
+
{"patterncontentunits", "patternContentUnits"},
|
138
|
+
{"glyphref", "glyphRef"},
|
139
|
+
{"xchannelselector", "xChannelSelector"},
|
140
|
+
{"attributetype", "attributeType"},
|
141
|
+
{"kernelunitlength", "kernelUnitLength"},
|
142
|
+
{"ychannelselector", "yChannelSelector"},
|
143
|
+
{"diffuseconstant", "diffuseConstant"},
|
144
|
+
{"pathlength", "pathLength"},
|
145
|
+
{"kernelmatrix", "kernelMatrix"},
|
146
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
147
|
+
{"preservealpha", "preserveAlpha"},
|
148
|
+
{"attributename", "attributeName"},
|
149
|
+
{"basefrequency", "baseFrequency"},
|
150
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
151
|
+
{"pointsaty", "pointsAtY"},
|
152
|
+
{"patterntransform", "patternTransform"},
|
153
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
154
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
155
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
156
|
+
{(char*)0,(char*)0}, {(char*)0,(char*)0},
|
157
|
+
{"gradienttransform", "gradientTransform"}
|
158
|
+
};
|
159
|
+
|
160
|
+
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
161
|
+
{
|
162
|
+
register unsigned int key = hash (str, len);
|
163
|
+
|
164
|
+
if (key <= MAX_HASH_VALUE)
|
165
|
+
if (len == lengthtable[key])
|
166
|
+
{
|
167
|
+
register const char *s = wordlist[key].from;
|
168
|
+
|
169
|
+
if (s && (((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gumbo_ascii_strncasecmp(str, s, len))
|
170
|
+
return &wordlist[key];
|
171
|
+
}
|
172
|
+
}
|
173
|
+
return 0;
|
174
|
+
}
|