nokogumbo 2.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -20,10 +20,7 @@
20
20
  Coding conventions specific to this file:
21
21
 
22
22
  1. Functions that fill in a token should be named emit_*, and should be
23
- followed immediately by a return from the tokenizer (true if no error
24
- occurred, false if an error occurred). Sometimes the emit functions
25
- themselves return a boolean so that they can be combined with the return
26
- statement; in this case, they should match this convention.
23
+ followed immediately by a return from the tokenizer.
27
24
  2. Functions that shuffle data from temporaries to final API structures
28
25
  should be named finish_*, and be called just before the tokenizer exits the
29
26
  state that accumulates the temporary.
@@ -141,10 +138,6 @@ typedef struct GumboInternalTokenizerState {
141
138
  // text tokens emitted will be GUMBO_TOKEN_CDATA.
142
139
  bool _is_in_cdata;
143
140
 
144
- // A flag indicating whether the tokenizer has seen a parse error since the
145
- // last token was emitted.
146
- bool _parse_error;
147
-
148
141
  // Certain states (notably character references) may emit two character tokens
149
142
  // at once, but the contract for lex() fills in only one token at a time. The
150
143
  // extra character is buffered here, and then this is checked on entry to
@@ -207,7 +200,6 @@ static void tokenizer_add_parse_error (
207
200
  GumboErrorType type
208
201
  ) {
209
202
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
210
- tokenizer->_parse_error = true;
211
203
  GumboError* error = gumbo_add_error(parser);
212
204
  if (!error) {
213
205
  return;
@@ -228,7 +220,6 @@ static void tokenizer_add_char_ref_error (
228
220
  int codepoint
229
221
  ) {
230
222
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
231
- tokenizer->_parse_error = true;
232
223
  GumboError* error = gumbo_add_error(parser);
233
224
  if (!error)
234
225
  return;
@@ -248,7 +239,6 @@ static void tokenizer_add_token_parse_error (
248
239
  GumboErrorType type
249
240
  ) {
250
241
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
251
- tokenizer->_parse_error = true;
252
242
  GumboError* error = gumbo_add_error(parser);
253
243
  if (!error)
254
244
  return;
@@ -732,7 +722,10 @@ static void copy_over_original_tag_text (
732
722
  original_text->data = tag_state->_original_text;
733
723
  original_text->length = utf8iterator_get_char_pointer(&tokenizer->_input) -
734
724
  tag_state->_original_text;
735
- if (original_text->data[original_text->length - 1] == '\r') {
725
+ if (
726
+ original_text->length
727
+ && original_text->data[original_text->length - 1] == '\r'
728
+ ) {
736
729
  // Since \r is skipped by the UTF-8 iterator, it can sometimes end up
737
730
  // appended to the end of original text even when it's really the first part
738
731
  // of the next character. If we detect this situation, shrink the length of
@@ -770,7 +763,6 @@ static void finish_tag_name(GumboParser* parser) {
770
763
  // Adds an ERR_DUPLICATE_ATTR parse error to the parser's error struct.
771
764
  static void add_duplicate_attr_error(GumboParser* parser) {
772
765
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
773
- tokenizer->_parse_error = true;
774
766
  GumboError* error = gumbo_add_error(parser);
775
767
  if (!error) {
776
768
  return;
@@ -788,9 +780,8 @@ static void add_duplicate_attr_error(GumboParser* parser) {
788
780
  // the attribute's name. The attribute's value starts out as the empty string
789
781
  // (following the "Boolean attributes" section of the spec) and is only
790
782
  // overwritten on finish_attribute_value(). If the attribute has already been
791
- // specified, the new attribute is dropped, a parse error is added, and the
792
- // function returns false. Otherwise, this returns true.
793
- static bool finish_attribute_name(GumboParser* parser) {
783
+ // specified, the new attribute is dropped and a parse error is added
784
+ static void finish_attribute_name(GumboParser* parser) {
794
785
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
795
786
  GumboTagState* tag_state = &tokenizer->_tag_state;
796
787
  // May've been set by a previous attribute without a value; reset it here.
@@ -813,7 +804,7 @@ static bool finish_attribute_name(GumboParser* parser) {
813
804
  add_duplicate_attr_error(parser);
814
805
  reinitialize_tag_buffer(parser);
815
806
  tag_state->_drop_next_attr_value = true;
816
- return false;
807
+ return;
817
808
  }
818
809
  }
819
810
 
@@ -835,7 +826,6 @@ static bool finish_attribute_name(GumboParser* parser) {
835
826
  );
836
827
  gumbo_vector_add(attr, attributes);
837
828
  reinitialize_tag_buffer(parser);
838
- return true;
839
829
  }
840
830
 
841
831
  // Finishes an attribute value. This sets the value of the most recently added
@@ -881,7 +871,6 @@ void gumbo_tokenizer_state_init (
881
871
  tokenizer->_reconsume_current_input = false;
882
872
  tokenizer->_is_adjusted_current_node_foreign = false;
883
873
  tokenizer->_is_in_cdata = false;
884
- tokenizer->_parse_error = false;
885
874
  tokenizer->_tag_state._last_start_tag = GUMBO_TAG_LAST;
886
875
  tokenizer->_tag_state._name = NULL;
887
876
 
@@ -3373,7 +3362,7 @@ static GumboLexerStateFunction dispatch_table[] = {
3373
3362
  [GUMBO_LEX_NUMERIC_CHARACTER_REFERENCE_END] = handle_numeric_character_reference_end_state,
3374
3363
  };
3375
3364
 
3376
- bool gumbo_lex(GumboParser* parser, GumboToken* output) {
3365
+ void gumbo_lex(GumboParser* parser, GumboToken* output) {
3377
3366
  // Because of the spec requirements that...
3378
3367
  //
3379
3368
  // 1. Tokens be handled immediately by the parser upon emission.
@@ -3398,15 +3387,13 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
3398
3387
  // isn't consumed twice.
3399
3388
  tokenizer->_reconsume_current_input = false;
3400
3389
  tokenizer->_buffered_emit_char = kGumboNoChar;
3401
- return true;
3390
+ return;
3402
3391
  }
3403
3392
 
3404
3393
  if (maybe_emit_from_mark(parser, output) == EMIT_TOKEN) {
3405
- // Return no error.
3406
- return true;
3394
+ return;
3407
3395
  }
3408
3396
 
3409
- tokenizer->_parse_error = false;
3410
3397
  while (1) {
3411
3398
  assert(!tokenizer->_resume_pos);
3412
3399
  assert(tokenizer->_buffered_emit_char == kGumboNoChar);
@@ -3420,7 +3407,7 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
3420
3407
  tokenizer->_reconsume_current_input = false;
3421
3408
 
3422
3409
  if (result == EMIT_TOKEN)
3423
- return !tokenizer->_parse_error;
3410
+ return;
3424
3411
 
3425
3412
  if (should_advance) {
3426
3413
  utf8iterator_next(&tokenizer->_input);
@@ -93,19 +93,8 @@ void gumbo_tokenizer_set_is_adjusted_current_node_foreign (
93
93
  );
94
94
 
95
95
  // Lexes a single token from the specified buffer, filling the output with the
96
- // parsed GumboToken data structure. Returns true for a successful
97
- // tokenization, false if a parse error occurs.
98
- //
99
- // Example:
100
- // struct GumboInternalParser parser;
101
- // GumboToken output;
102
- // gumbo_tokenizer_state_init(&parser, text, strlen(text));
103
- // while (gumbo_lex(&parser, &output)) {
104
- // ...do stuff with output.
105
- // gumbo_token_destroy(&token);
106
- // }
107
- // gumbo_tokenizer_state_destroy(&parser);
108
- bool gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
96
+ // parsed GumboToken data structure.
97
+ void gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
109
98
 
110
99
  // Frees the internally-allocated pointers within a GumboToken. Note that this
111
100
  // doesn't free the token itself, since oftentimes it will be allocated on the
@@ -1,3 +1,3 @@
1
1
  module Nokogumbo
2
- VERSION = "2.0.1"
2
+ VERSION = "2.0.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-11-11 00:00:00.000000000 Z
12
+ date: 2019-11-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -109,8 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  requirements: []
112
- rubyforge_project:
113
- rubygems_version: 2.7.7
112
+ rubygems_version: 3.0.6
114
113
  signing_key:
115
114
  specification_version: 4
116
115
  summary: Nokogiri interface to the Gumbo HTML5 parser