nokogumbo 1.4.2 → 1.4.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -64,8 +64,8 @@ typedef struct GumboInternalToken {
64
64
  GumboTokenDocType doc_type;
65
65
  GumboTokenStartTag start_tag;
66
66
  GumboTag end_tag;
67
- const char* text; // For comments.
68
- int character; // For character, whitespace, null, and EOF tokens.
67
+ const char* text; // For comments.
68
+ int character; // For character, whitespace, null, and EOF tokens.
69
69
  } v;
70
70
  } GumboToken;
71
71
 
@@ -19,7 +19,7 @@
19
19
  #include <assert.h>
20
20
  #include <stdint.h>
21
21
  #include <string.h>
22
- #include <strings.h> // For strncasecmp.
22
+ #include <strings.h> // For strncasecmp.
23
23
 
24
24
  #include "error.h"
25
25
  #include "gumbo.h"
@@ -47,9 +47,11 @@ const int kUtf8ReplacementChar = 0xFFFD;
47
47
 
48
48
  // Permission is hereby granted, free of charge, to any person obtaining a copy
49
49
  // of this software and associated documentation files (the "Software"), to deal
50
- // in the Software without restriction, including without limitation the rights to
50
+ // in the Software without restriction, including without limitation the rights
51
+ // to
51
52
  // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
52
- // of the Software, and to permit persons to whom the Software is furnished to do
53
+ // of the Software, and to permit persons to whom the Software is furnished to
54
+ // do
53
55
  // so, subject to the following conditions:
54
56
 
55
57
  // The above copyright notice and this permission notice shall be included in
@@ -59,32 +61,35 @@ const int kUtf8ReplacementChar = 0xFFFD;
59
61
  #define UTF8_REJECT 12
60
62
 
61
63
  static const uint8_t utf8d[] = {
62
- // The first part of the table maps bytes to character classes that
63
- // to reduce the size of the transition table and create bitmasks.
64
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
65
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
66
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
67
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
68
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
69
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
70
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
71
- 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
72
-
73
- // The second part is a transition table that maps a combination
74
- // of a state of the automaton and a character class to a state.
75
- 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
76
- 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
77
- 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
78
- 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
79
- 12,36,12,12,12,12,12,12,12,12,12,12,
64
+ // The first part of the table maps bytes to character classes that
65
+ // to reduce the size of the transition table and create bitmasks.
66
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
67
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71
+ 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
72
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
73
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2,
74
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10,
75
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8,
76
+ 8, 8, 8, 8, 8, 8,
77
+
78
+ // The second part is a transition table that maps a combination
79
+ // of a state of the automaton and a character class to a state.
80
+ 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12,
81
+ 12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12,
82
+ 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,
83
+ 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12,
84
+ 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,
85
+ 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
80
86
  };
81
87
 
82
88
  uint32_t static inline decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
83
89
  uint32_t type = utf8d[byte];
84
90
 
85
- *codep = (*state != UTF8_ACCEPT) ?
86
- (byte & 0x3fu) | (*codep << 6) :
87
- (0xff >> type) & (byte);
91
+ *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
92
+ : (0xff >> type) & (byte);
88
93
 
89
94
  *state = utf8d[256 + *state + type];
90
95
  return *state;
@@ -130,7 +135,7 @@ static void read_char(Utf8Iterator* iter) {
130
135
  uint32_t code_point = 0;
131
136
  uint32_t state = UTF8_ACCEPT;
132
137
  for (const char* c = iter->_start; c < iter->_end; ++c) {
133
- decode(&state, &code_point, (uint32_t) (unsigned char) (*c));
138
+ decode(&state, &code_point, (uint32_t)(unsigned char) (*c));
134
139
  if (state == UTF8_ACCEPT) {
135
140
  iter->_width = c - iter->_start + 1;
136
141
  // This is the special handling for carriage returns that is mandated by
@@ -180,10 +185,10 @@ static void update_position(Utf8Iterator* iter) {
180
185
  if (iter->_current == '\n') {
181
186
  ++iter->_pos.line;
182
187
  iter->_pos.column = 1;
183
- } else if(iter->_current == '\t') {
188
+ } else if (iter->_current == '\t') {
184
189
  int tab_stop = iter->_parser->_options->tab_stop;
185
190
  iter->_pos.column = ((iter->_pos.column / tab_stop) + 1) * tab_stop;
186
- } else if(iter->_current != -1) {
191
+ } else if (iter->_current != -1) {
187
192
  ++iter->_pos.column;
188
193
  }
189
194
  }
@@ -192,13 +197,12 @@ static void update_position(Utf8Iterator* iter) {
192
197
  // forbidden by the HTML5 spec, such as undefined control chars.
193
198
  bool utf8_is_invalid_code_point(int c) {
194
199
  return (c >= 0x1 && c <= 0x8) || c == 0xB || (c >= 0xE && c <= 0x1F) ||
195
- (c >= 0x7F && c <= 0x9F) || (c >= 0xFDD0 && c <= 0xFDEF) ||
196
- ((c & 0xFFFF) == 0xFFFE) || ((c & 0xFFFF) == 0xFFFF);
200
+ (c >= 0x7F && c <= 0x9F) || (c >= 0xFDD0 && c <= 0xFDEF) ||
201
+ ((c & 0xFFFF) == 0xFFFE) || ((c & 0xFFFF) == 0xFFFF);
197
202
  }
198
203
 
199
- void utf8iterator_init(
200
- GumboParser* parser, const char* source, size_t source_length,
201
- Utf8Iterator* iter) {
204
+ void utf8iterator_init(GumboParser* parser, const char* source,
205
+ size_t source_length, Utf8Iterator* iter) {
202
206
  iter->_start = source;
203
207
  iter->_end = source + source_length;
204
208
  iter->_pos.line = 1;
@@ -216,9 +220,7 @@ void utf8iterator_next(Utf8Iterator* iter) {
216
220
  read_char(iter);
217
221
  }
218
222
 
219
- int utf8iterator_current(const Utf8Iterator* iter) {
220
- return iter->_current;
221
- }
223
+ int utf8iterator_current(const Utf8Iterator* iter) { return iter->_current; }
222
224
 
223
225
  void utf8iterator_get_position(
224
226
  const Utf8Iterator* iter, GumboSourcePosition* output) {
@@ -233,14 +235,13 @@ const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter) {
233
235
  return iter->_end;
234
236
  }
235
237
 
236
- bool utf8iterator_maybe_consume_match(
237
- Utf8Iterator* iter, const char* prefix, size_t length,
238
- bool case_sensitive) {
239
- bool matched = (iter->_start + length <= iter->_end) && (case_sensitive ?
240
- !strncmp(iter->_start, prefix, length) :
241
- !strncasecmp(iter->_start, prefix, length));
238
+ bool utf8iterator_maybe_consume_match(Utf8Iterator* iter, const char* prefix,
239
+ size_t length, bool case_sensitive) {
240
+ bool matched = (iter->_start + length <= iter->_end) &&
241
+ (case_sensitive ? !strncmp(iter->_start, prefix, length)
242
+ : !strncasecmp(iter->_start, prefix, length));
242
243
  if (matched) {
243
- for (int i = 0; i < length; ++i) {
244
+ for (unsigned int i = 0; i < length; ++i) {
244
245
  utf8iterator_next(iter);
245
246
  }
246
247
  return true;
@@ -263,8 +264,7 @@ void utf8iterator_reset(Utf8Iterator* iter) {
263
264
 
264
265
  // Sets the position and original text fields of an error to the value at the
265
266
  // mark.
266
- void utf8iterator_fill_error_at_mark(
267
- Utf8Iterator* iter, GumboError* error) {
267
+ void utf8iterator_fill_error_at_mark(Utf8Iterator* iter, GumboError* error) {
268
268
  error->position = iter->_mark_pos;
269
269
  error->original_text = iter->_mark;
270
270
  }
@@ -81,8 +81,7 @@ bool utf8_is_invalid_code_point(int c);
81
81
 
82
82
  // Initializes a new Utf8Iterator from the given byte buffer. The source does
83
83
  // not have to be NUL-terminated, but the length must be passed in explicitly.
84
- void utf8iterator_init(
85
- struct GumboInternalParser* parser, const char* source,
84
+ void utf8iterator_init(struct GumboInternalParser* parser, const char* source,
86
85
  size_t source_length, Utf8Iterator* iter);
87
86
 
88
87
  // Advances the current position by one code point.
@@ -29,7 +29,7 @@
29
29
  // TODO(jdtang): This should be elsewhere, but there's no .c file for
30
30
  // SourcePositions and yet the constant needs some linkage, so this is as good
31
31
  // as any.
32
- const GumboSourcePosition kGumboEmptySourcePosition = { 0, 0, 0 };
32
+ const GumboSourcePosition kGumboEmptySourcePosition = {0, 0, 0};
33
33
 
34
34
  void* gumbo_parser_allocate(GumboParser* parser, size_t num_bytes) {
35
35
  return parser->_options->allocator(parser->_options->userdata, num_bytes);
@@ -25,8 +25,6 @@
25
25
  #include <stdbool.h>
26
26
  #include <stddef.h>
27
27
 
28
-
29
-
30
28
  #ifdef __cplusplus
31
29
  extern "C" {
32
30
  #endif
@@ -25,21 +25,22 @@
25
25
 
26
26
  struct GumboInternalParser;
27
27
 
28
- const GumboVector kGumboEmptyVector = { NULL, 0, 0 };
28
+ const GumboVector kGumboEmptyVector = {NULL, 0, 0};
29
29
 
30
- void gumbo_vector_init(
31
- struct GumboInternalParser* parser, size_t initial_capacity, GumboVector* vector) {
30
+ void gumbo_vector_init(struct GumboInternalParser* parser,
31
+ size_t initial_capacity, GumboVector* vector) {
32
32
  vector->length = 0;
33
33
  vector->capacity = initial_capacity;
34
34
  if (initial_capacity > 0) {
35
- vector->data = gumbo_parser_allocate(
36
- parser, sizeof(void*) * initial_capacity);
35
+ vector->data =
36
+ gumbo_parser_allocate(parser, sizeof(void*) * initial_capacity);
37
37
  } else {
38
38
  vector->data = NULL;
39
39
  }
40
40
  }
41
41
 
42
- void gumbo_vector_destroy(struct GumboInternalParser* parser, GumboVector* vector) {
42
+ void gumbo_vector_destroy(
43
+ struct GumboInternalParser* parser, GumboVector* vector) {
43
44
  if (vector->capacity > 0) {
44
45
  gumbo_parser_deallocate(parser, vector->data);
45
46
  }
@@ -59,8 +60,8 @@ static void enlarge_vector_if_full(
59
60
  } else {
60
61
  // 0-capacity vector; no previous array to deallocate.
61
62
  vector->capacity = 2;
62
- vector->data = gumbo_parser_allocate(
63
- parser, sizeof(void*) * vector->capacity);
63
+ vector->data =
64
+ gumbo_parser_allocate(parser, sizeof(void*) * vector->capacity);
64
65
  }
65
66
  }
66
67
  }
@@ -81,8 +82,8 @@ void* gumbo_vector_pop(
81
82
  return vector->data[--vector->length];
82
83
  }
83
84
 
84
- int gumbo_vector_index_of(GumboVector* vector, void* element) {
85
- for (int i = 0; i < vector->length; ++i) {
85
+ int gumbo_vector_index_of(GumboVector* vector, const void* element) {
86
+ for (unsigned int i = 0; i < vector->length; ++i) {
86
87
  if (vector->data[i] == element) {
87
88
  return i;
88
89
  }
@@ -90,15 +91,14 @@ int gumbo_vector_index_of(GumboVector* vector, void* element) {
90
91
  return -1;
91
92
  }
92
93
 
93
- void gumbo_vector_insert_at(
94
- struct GumboInternalParser* parser, void* element, int index,
95
- GumboVector* vector) {
94
+ void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
95
+ unsigned int index, GumboVector* vector) {
96
96
  assert(index >= 0);
97
97
  assert(index <= vector->length);
98
98
  enlarge_vector_if_full(parser, vector);
99
99
  ++vector->length;
100
100
  memmove(&vector->data[index + 1], &vector->data[index],
101
- sizeof(void*) * (vector->length - index - 1));
101
+ sizeof(void*) * (vector->length - index - 1));
102
102
  vector->data[index] = element;
103
103
  }
104
104
 
@@ -111,13 +111,13 @@ void gumbo_vector_remove(
111
111
  gumbo_vector_remove_at(parser, index, vector);
112
112
  }
113
113
 
114
- void* gumbo_vector_remove_at(
115
- struct GumboInternalParser* parser, int index, GumboVector* vector) {
114
+ void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
115
+ unsigned int index, GumboVector* vector) {
116
116
  assert(index >= 0);
117
117
  assert(index < vector->length);
118
118
  void* result = vector->data[index];
119
119
  memmove(&vector->data[index], &vector->data[index + 1],
120
- sizeof(void*) * (vector->length - index - 1));
120
+ sizeof(void*) * (vector->length - index - 1));
121
121
  --vector->length;
122
122
  return result;
123
123
  }
@@ -28,9 +28,8 @@ extern "C" {
28
28
  struct GumboInternalParser;
29
29
 
30
30
  // Initializes a new GumboVector with the specified initial capacity.
31
- void gumbo_vector_init(
32
- struct GumboInternalParser* parser, size_t initial_capacity,
33
- GumboVector* vector);
31
+ void gumbo_vector_init(struct GumboInternalParser* parser,
32
+ size_t initial_capacity, GumboVector* vector);
34
33
 
35
34
  // Frees the memory used by an GumboVector. Does not free the contained
36
35
  // pointers.
@@ -48,9 +47,8 @@ void* gumbo_vector_pop(struct GumboInternalParser* parser, GumboVector* vector);
48
47
 
49
48
  // Inserts an element at a specific index. This is potentially O(N) time, but
50
49
  // is necessary for some of the spec's behavior.
51
- void gumbo_vector_insert_at(
52
- struct GumboInternalParser* parser, void* element, int index,
53
- GumboVector* vector);
50
+ void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
51
+ unsigned int index, GumboVector* vector);
54
52
 
55
53
  // Removes an element from the vector, or does nothing if the element is not in
56
54
  // the vector.
@@ -59,8 +57,8 @@ void gumbo_vector_remove(
59
57
 
60
58
  // Removes and returns an element at a specific index. Note that this is
61
59
  // potentially O(N) time and should be used sparingly.
62
- void* gumbo_vector_remove_at(
63
- struct GumboInternalParser* parser, int index, GumboVector* vector);
60
+ void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
61
+ unsigned int index, GumboVector* vector);
64
62
 
65
63
  #ifdef __cplusplus
66
64
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.2
4
+ version: 1.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-12 00:00:00.000000000 Z
11
+ date: 2015-09-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -52,6 +52,11 @@ files:
52
52
  - gumbo-parser/src/string_piece.c
53
53
  - gumbo-parser/src/string_piece.h
54
54
  - gumbo-parser/src/tag.c
55
+ - gumbo-parser/src/tag.in
56
+ - gumbo-parser/src/tag_enum.h
57
+ - gumbo-parser/src/tag_gperf.h
58
+ - gumbo-parser/src/tag_sizes.h
59
+ - gumbo-parser/src/tag_strings.h
55
60
  - gumbo-parser/src/token_type.h
56
61
  - gumbo-parser/src/tokenizer.c
57
62
  - gumbo-parser/src/tokenizer.h
@@ -85,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
85
90
  version: '0'
86
91
  requirements: []
87
92
  rubyforge_project:
88
- rubygems_version: 2.4.5
93
+ rubygems_version: 2.4.5.1
89
94
  signing_key:
90
95
  specification_version: 4
91
96
  summary: Nokogiri interface to the Gumbo HTML5 parser