nokogumbo 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,10 +20,7 @@
20
20
  Coding conventions specific to this file:
21
21
 
22
22
  1. Functions that fill in a token should be named emit_*, and should be
23
- followed immediately by a return from the tokenizer (true if no error
24
- occurred, false if an error occurred). Sometimes the emit functions
25
- themselves return a boolean so that they can be combined with the return
26
- statement; in this case, they should match this convention.
23
+ followed immediately by a return from the tokenizer.
27
24
  2. Functions that shuffle data from temporaries to final API structures
28
25
  should be named finish_*, and be called just before the tokenizer exits the
29
26
  state that accumulates the temporary.
@@ -141,10 +138,6 @@ typedef struct GumboInternalTokenizerState {
141
138
  // text tokens emitted will be GUMBO_TOKEN_CDATA.
142
139
  bool _is_in_cdata;
143
140
 
144
- // A flag indicating whether the tokenizer has seen a parse error since the
145
- // last token was emitted.
146
- bool _parse_error;
147
-
148
141
  // Certain states (notably character references) may emit two character tokens
149
142
  // at once, but the contract for lex() fills in only one token at a time. The
150
143
  // extra character is buffered here, and then this is checked on entry to
@@ -207,7 +200,6 @@ static void tokenizer_add_parse_error (
207
200
  GumboErrorType type
208
201
  ) {
209
202
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
210
- tokenizer->_parse_error = true;
211
203
  GumboError* error = gumbo_add_error(parser);
212
204
  if (!error) {
213
205
  return;
@@ -228,7 +220,6 @@ static void tokenizer_add_char_ref_error (
228
220
  int codepoint
229
221
  ) {
230
222
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
231
- tokenizer->_parse_error = true;
232
223
  GumboError* error = gumbo_add_error(parser);
233
224
  if (!error)
234
225
  return;
@@ -248,7 +239,6 @@ static void tokenizer_add_token_parse_error (
248
239
  GumboErrorType type
249
240
  ) {
250
241
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
251
- tokenizer->_parse_error = true;
252
242
  GumboError* error = gumbo_add_error(parser);
253
243
  if (!error)
254
244
  return;
@@ -732,7 +722,10 @@ static void copy_over_original_tag_text (
732
722
  original_text->data = tag_state->_original_text;
733
723
  original_text->length = utf8iterator_get_char_pointer(&tokenizer->_input) -
734
724
  tag_state->_original_text;
735
- if (original_text->data[original_text->length - 1] == '\r') {
725
+ if (
726
+ original_text->length
727
+ && original_text->data[original_text->length - 1] == '\r'
728
+ ) {
736
729
  // Since \r is skipped by the UTF-8 iterator, it can sometimes end up
737
730
  // appended to the end of original text even when it's really the first part
738
731
  // of the next character. If we detect this situation, shrink the length of
@@ -770,7 +763,6 @@ static void finish_tag_name(GumboParser* parser) {
770
763
  // Adds an ERR_DUPLICATE_ATTR parse error to the parser's error struct.
771
764
  static void add_duplicate_attr_error(GumboParser* parser) {
772
765
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
773
- tokenizer->_parse_error = true;
774
766
  GumboError* error = gumbo_add_error(parser);
775
767
  if (!error) {
776
768
  return;
@@ -788,9 +780,8 @@ static void add_duplicate_attr_error(GumboParser* parser) {
788
780
  // the attribute's name. The attribute's value starts out as the empty string
789
781
  // (following the "Boolean attributes" section of the spec) and is only
790
782
  // overwritten on finish_attribute_value(). If the attribute has already been
791
- // specified, the new attribute is dropped, a parse error is added, and the
792
- // function returns false. Otherwise, this returns true.
793
- static bool finish_attribute_name(GumboParser* parser) {
783
+ // specified, the new attribute is dropped and a parse error is added
784
+ static void finish_attribute_name(GumboParser* parser) {
794
785
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
795
786
  GumboTagState* tag_state = &tokenizer->_tag_state;
796
787
  // May've been set by a previous attribute without a value; reset it here.
@@ -813,7 +804,7 @@ static bool finish_attribute_name(GumboParser* parser) {
813
804
  add_duplicate_attr_error(parser);
814
805
  reinitialize_tag_buffer(parser);
815
806
  tag_state->_drop_next_attr_value = true;
816
- return false;
807
+ return;
817
808
  }
818
809
  }
819
810
 
@@ -835,7 +826,6 @@ static bool finish_attribute_name(GumboParser* parser) {
835
826
  );
836
827
  gumbo_vector_add(attr, attributes);
837
828
  reinitialize_tag_buffer(parser);
838
- return true;
839
829
  }
840
830
 
841
831
  // Finishes an attribute value. This sets the value of the most recently added
@@ -881,7 +871,6 @@ void gumbo_tokenizer_state_init (
881
871
  tokenizer->_reconsume_current_input = false;
882
872
  tokenizer->_is_adjusted_current_node_foreign = false;
883
873
  tokenizer->_is_in_cdata = false;
884
- tokenizer->_parse_error = false;
885
874
  tokenizer->_tag_state._last_start_tag = GUMBO_TAG_LAST;
886
875
  tokenizer->_tag_state._name = NULL;
887
876
 
@@ -3373,7 +3362,7 @@ static GumboLexerStateFunction dispatch_table[] = {
3373
3362
  [GUMBO_LEX_NUMERIC_CHARACTER_REFERENCE_END] = handle_numeric_character_reference_end_state,
3374
3363
  };
3375
3364
 
3376
- bool gumbo_lex(GumboParser* parser, GumboToken* output) {
3365
+ void gumbo_lex(GumboParser* parser, GumboToken* output) {
3377
3366
  // Because of the spec requirements that...
3378
3367
  //
3379
3368
  // 1. Tokens be handled immediately by the parser upon emission.
@@ -3398,15 +3387,13 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
3398
3387
  // isn't consumed twice.
3399
3388
  tokenizer->_reconsume_current_input = false;
3400
3389
  tokenizer->_buffered_emit_char = kGumboNoChar;
3401
- return true;
3390
+ return;
3402
3391
  }
3403
3392
 
3404
3393
  if (maybe_emit_from_mark(parser, output) == EMIT_TOKEN) {
3405
- // Return no error.
3406
- return true;
3394
+ return;
3407
3395
  }
3408
3396
 
3409
- tokenizer->_parse_error = false;
3410
3397
  while (1) {
3411
3398
  assert(!tokenizer->_resume_pos);
3412
3399
  assert(tokenizer->_buffered_emit_char == kGumboNoChar);
@@ -3420,7 +3407,7 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
3420
3407
  tokenizer->_reconsume_current_input = false;
3421
3408
 
3422
3409
  if (result == EMIT_TOKEN)
3423
- return !tokenizer->_parse_error;
3410
+ return;
3424
3411
 
3425
3412
  if (should_advance) {
3426
3413
  utf8iterator_next(&tokenizer->_input);
@@ -93,19 +93,8 @@ void gumbo_tokenizer_set_is_adjusted_current_node_foreign (
93
93
  );
94
94
 
95
95
  // Lexes a single token from the specified buffer, filling the output with the
96
- // parsed GumboToken data structure. Returns true for a successful
97
- // tokenization, false if a parse error occurs.
98
- //
99
- // Example:
100
- // struct GumboInternalParser parser;
101
- // GumboToken output;
102
- // gumbo_tokenizer_state_init(&parser, text, strlen(text));
103
- // while (gumbo_lex(&parser, &output)) {
104
- // ...do stuff with output.
105
- // gumbo_token_destroy(&token);
106
- // }
107
- // gumbo_tokenizer_state_destroy(&parser);
108
- bool gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
96
+ // parsed GumboToken data structure.
97
+ void gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
109
98
 
110
99
  // Frees the internally-allocated pointers within a GumboToken. Note that this
111
100
  // doesn't free the token itself, since oftentimes it will be allocated on the
@@ -1,3 +1,3 @@
1
1
  module Nokogumbo
2
- VERSION = "2.0.1"
2
+ VERSION = "2.0.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-11-11 00:00:00.000000000 Z
12
+ date: 2019-11-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -109,8 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  requirements: []
112
- rubyforge_project:
113
- rubygems_version: 2.7.7
112
+ rubygems_version: 3.0.6
114
113
  signing_key:
115
114
  specification_version: 4
116
115
  summary: Nokogiri interface to the Gumbo HTML5 parser