nokogumbo 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZDM4N2U1MTA3MjkzNGQ1ZmU1OGY4NjljY2NiNzdlMGEwNzYyYTkwMA==
4
+ ZmRkOWYwNjM2OTdmNTc1MjIzNjg4MDBhNzJmZmMzNTgwMWNmNjJiNA==
5
5
  data.tar.gz: !binary |-
6
- YzQ1MTYzYTJjZjM1YjFkZTU2NThlZWFkMzhmODFjMTI4MDk2YzVlYw==
6
+ ODA3OTkxNDYwZTM3NjI3ZGE1OGYyYjllYWU0ZDk5ODU5M2RlYWZkMA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- ZDM3MDI2MjkwMzAyNTQ2ZmRhNDc3NmRlNjIzMjBlZjE5OGU2MGU3M2MyZDcw
10
- MTgxOTBhOGUxZjkxYmZlZGZlN2I1NTVmYTBlM2E3NWZkNmFiODE4ZThjMjdj
11
- N2E1M2RkNDljYjc4YWY0YmIxOGYyYTNjYTM1OWQzNDA0NjBiOTQ=
9
+ ZWJjNGEwMDg2OTVjNjBhYTA5YTM1NTM4ZWNjOTFiN2NjZTM5MmFiYWFmY2U4
10
+ MDliNGVhMWU3ZWI0ZTMxMjg4ZDRkMTNlMzQ5YzE3ZTg1NGU3MDAyOTcyOWQz
11
+ ZGYxYjU3YTE4Y2ZmNjNkNDZmN2NlN2Y5Y2MwOWE2MDFiY2E1NzA=
12
12
  data.tar.gz: !binary |-
13
- NmFkYjA3MjA3Njk5YTJiMmIyZWEyM2EyNGQ0ZjQxYzAwMTY2NDQwZmM3YmUz
14
- YWQwM2YzNjFlOGM2OTljNjc1MzZlMTZmNDM1ODUwYmZkZjZhNzcyZGE0ZGMy
15
- N2Y4YzA5ZWI4NjZiY2M5NGI2Nzk5MjNiZDE5MzMzNmM4Y2M2OWE=
13
+ MTU3MjVmNWViNDdiMDJlOWI4MmNiMzU0NTBjZWVkNTg3YWMzZTFlYWYxODVi
14
+ MzM4YWE4NDdlZjdlNzdlZDc0N2FkMGE2MTY4ZGVmN2ZkYzYzNzdjNTViNDQ3
15
+ YzNmMzhiODQ0ZGQwMDk3ZDkxMDQ5NTFmNDcyMzNmZmZiOTJiMDc=
data/README.md CHANGED
@@ -34,7 +34,7 @@ Notes
34
34
 
35
35
  * The `Nokogiri::HTML5.parse` function takes a string and passes it to the
36
36
  <code>gumbo_parse_with_options</code> method, using the default options.
37
- The resulting Gumbo parse tree is the walked.
37
+ The resulting Gumbo parse tree is then walked.
38
38
  * If the necessary Nokogiri and [libxml2](http://xmlsoft.org/html/) headers
39
39
  can be found at installation time then an
40
40
  [xmlDoc](http://xmlsoft.org/html/libxml-tree.html#xmlDoc) tree is produced
@@ -164,11 +164,12 @@ typedef enum {
164
164
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
165
165
  GUMBO_TAG_SCRIPT,
166
166
  GUMBO_TAG_NOSCRIPT,
167
+ GUMBO_TAG_TEMPLATE,
167
168
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
168
169
  GUMBO_TAG_BODY,
170
+ GUMBO_TAG_ARTICLE,
169
171
  GUMBO_TAG_SECTION,
170
172
  GUMBO_TAG_NAV,
171
- GUMBO_TAG_ARTICLE,
172
173
  GUMBO_TAG_ASIDE,
173
174
  GUMBO_TAG_H1,
174
175
  GUMBO_TAG_H2,
@@ -193,6 +194,7 @@ typedef enum {
193
194
  GUMBO_TAG_DD,
194
195
  GUMBO_TAG_FIGURE,
195
196
  GUMBO_TAG_FIGCAPTION,
197
+ GUMBO_TAG_MAIN,
196
198
  GUMBO_TAG_DIV,
197
199
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
198
200
  GUMBO_TAG_A,
@@ -204,6 +206,7 @@ typedef enum {
204
206
  GUMBO_TAG_Q,
205
207
  GUMBO_TAG_DFN,
206
208
  GUMBO_TAG_ABBR,
209
+ GUMBO_TAG_DATA,
207
210
  GUMBO_TAG_TIME,
208
211
  GUMBO_TAG_CODE,
209
212
  GUMBO_TAG_VAR,
@@ -213,6 +216,7 @@ typedef enum {
213
216
  GUMBO_TAG_SUP,
214
217
  GUMBO_TAG_I,
215
218
  GUMBO_TAG_B,
219
+ GUMBO_TAG_U,
216
220
  GUMBO_TAG_MARK,
217
221
  GUMBO_TAG_RUBY,
218
222
  GUMBO_TAG_RT,
@@ -284,8 +288,8 @@ typedef enum {
284
288
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
285
289
  GUMBO_TAG_DETAILS,
286
290
  GUMBO_TAG_SUMMARY,
287
- GUMBO_TAG_COMMAND,
288
291
  GUMBO_TAG_MENU,
292
+ GUMBO_TAG_MENUITEM,
289
293
  // Non-conforming elements that nonetheless appear in the HTML5 spec.
290
294
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
291
295
  GUMBO_TAG_APPLET,
@@ -313,7 +317,6 @@ typedef enum {
313
317
  GUMBO_TAG_NOBR,
314
318
  GUMBO_TAG_SPACER,
315
319
  GUMBO_TAG_TT,
316
- GUMBO_TAG_U,
317
320
  // Used for all tags that don't have special handling in HTML.
318
321
  GUMBO_TAG_UNKNOWN,
319
322
  // A marker value to indicate the end of the enum, for iterating over it.
@@ -22,6 +22,8 @@ extern "C" {
22
22
  #endif
23
23
 
24
24
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#insertion-mode
25
+ // If new enum values are added, be sure to update the kTokenHandlers dispatch
26
+ // table in parser.c.
25
27
  typedef enum {
26
28
  GUMBO_INSERTION_MODE_INITIAL,
27
29
  GUMBO_INSERTION_MODE_BEFORE_HTML,
@@ -40,6 +42,7 @@ typedef enum {
40
42
  GUMBO_INSERTION_MODE_IN_CELL,
41
43
  GUMBO_INSERTION_MODE_IN_SELECT,
42
44
  GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE,
45
+ GUMBO_INSERTION_MODE_IN_TEMPLATE,
43
46
  GUMBO_INSERTION_MODE_AFTER_BODY,
44
47
  GUMBO_INSERTION_MODE_IN_FRAMESET,
45
48
  GUMBO_INSERTION_MODE_AFTER_FRAMESET,
@@ -354,6 +354,10 @@ typedef struct GumboInternalParserState {
354
354
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#the-list-of-active-formatting-elements
355
355
  GumboVector /*GumboNode*/ _active_formatting_elements;
356
356
 
357
+ // The stack of template insertion modes.
358
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-insertion-mode
359
+ GumboVector /*InsertionMode*/ _template_insertion_modes;
360
+
357
361
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#the-element-pointers
358
362
  GumboNode* _head_element;
359
363
  GumboNode* _form_element;
@@ -482,6 +486,7 @@ static void parser_state_init(GumboParser* parser) {
482
486
  gumbo_string_buffer_init(parser, &parser_state->_text_node._buffer);
483
487
  gumbo_vector_init(parser, 10, &parser_state->_open_elements);
484
488
  gumbo_vector_init(parser, 5, &parser_state->_active_formatting_elements);
489
+ gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
485
490
  parser_state->_head_element = NULL;
486
491
  parser_state->_form_element = NULL;
487
492
  parser_state->_current_token = NULL;
@@ -494,6 +499,7 @@ static void parser_state_destroy(GumboParser* parser) {
494
499
  GumboParserState* state = parser->_parser_state;
495
500
  gumbo_vector_destroy(parser, &state->_active_formatting_elements);
496
501
  gumbo_vector_destroy(parser, &state->_open_elements);
502
+ gumbo_vector_destroy(parser, &state->_template_insertion_modes);
497
503
  gumbo_string_buffer_destroy(parser, &state->_text_node._buffer);
498
504
  gumbo_parser_deallocate(parser, state);
499
505
  }
@@ -531,6 +537,25 @@ static bool is_in_static_list(
531
537
  return false;
532
538
  }
533
539
 
540
+ static void push_template_insertion_mode(
541
+ GumboParser* parser, GumboInsertionMode mode) {
542
+ gumbo_vector_add(
543
+ parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
544
+ }
545
+
546
+ static void pop_template_insertion_mode(GumboParser* parser) {
547
+ gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
548
+ }
549
+
550
+ static GumboInsertionMode get_current_template_insertion_mode(
551
+ GumboParser* parser) {
552
+ GumboVector* template_insertion_modes =
553
+ &parser->_parser_state->_template_insertion_modes;
554
+ assert(template_insertion_modes->length > 0);
555
+ return (GumboInsertionMode) template_insertion_modes->data[
556
+ template_insertion_modes->length - 1];
557
+ }
558
+
534
559
  static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
535
560
  parser->_parser_state->_insertion_mode = mode;
536
561
  }
@@ -718,7 +743,7 @@ static bool is_html_integration_point(const GumboNode* node) {
718
743
  static void append_node(
719
744
  GumboParser* parser, GumboNode* parent, GumboNode* node) {
720
745
  assert(node->parent == NULL);
721
- assert(node->index_within_parent = -1);
746
+ assert(node->index_within_parent == -1);
722
747
  GumboVector* children;
723
748
  if (parent->type == GUMBO_NODE_ELEMENT) {
724
749
  children = &parent->v.element.children;
@@ -737,7 +762,7 @@ static void append_node(
737
762
  static void insert_node(
738
763
  GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
739
764
  assert(node->parent == NULL);
740
- assert(node->index_within_parent = -1);
765
+ assert(node->index_within_parent == -1);
741
766
  assert(parent->type == GUMBO_NODE_ELEMENT);
742
767
  GumboVector* children = &parent->v.element.children;
743
768
  assert(index >= 0);
@@ -1520,7 +1545,7 @@ static bool is_special_node(const GumboNode* node) {
1520
1545
  GUMBO_TAG_BASEFONT, GUMBO_TAG_BGSOUND, GUMBO_TAG_BLOCKQUOTE,
1521
1546
  GUMBO_TAG_BODY, GUMBO_TAG_BR, GUMBO_TAG_BUTTON, GUMBO_TAG_CAPTION,
1522
1547
  GUMBO_TAG_CENTER, GUMBO_TAG_COL, GUMBO_TAG_COLGROUP,
1523
- GUMBO_TAG_COMMAND, GUMBO_TAG_DD, GUMBO_TAG_DETAILS, GUMBO_TAG_DIR,
1548
+ GUMBO_TAG_MENUITEM, GUMBO_TAG_DD, GUMBO_TAG_DETAILS, GUMBO_TAG_DIR,
1524
1549
  GUMBO_TAG_DIV, GUMBO_TAG_DL, GUMBO_TAG_DT, GUMBO_TAG_EMBED,
1525
1550
  GUMBO_TAG_FIELDSET, GUMBO_TAG_FIGCAPTION, GUMBO_TAG_FIGURE,
1526
1551
  GUMBO_TAG_FOOTER, GUMBO_TAG_FORM, GUMBO_TAG_FRAME,
@@ -2105,7 +2130,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2105
2130
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2106
2131
  return handle_in_body(parser, token);
2107
2132
  } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2108
- GUMBO_TAG_BGSOUND, GUMBO_TAG_COMMAND, GUMBO_TAG_LINK,
2133
+ GUMBO_TAG_BGSOUND, GUMBO_TAG_MENUITEM, GUMBO_TAG_LINK,
2109
2134
  GUMBO_TAG_LAST)) {
2110
2135
  insert_element_from_token(parser, token);
2111
2136
  pop_current_node(parser);
@@ -2316,7 +2341,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2316
2341
  merge_attributes(parser, token, parser->_output->root);
2317
2342
  return false;
2318
2343
  } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2319
- GUMBO_TAG_BGSOUND, GUMBO_TAG_COMMAND, GUMBO_TAG_LINK,
2344
+ GUMBO_TAG_BGSOUND, GUMBO_TAG_MENUITEM, GUMBO_TAG_LINK,
2320
2345
  GUMBO_TAG_META, GUMBO_TAG_NOFRAMES, GUMBO_TAG_SCRIPT,
2321
2346
  GUMBO_TAG_STYLE, GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
2322
2347
  return handle_in_head(parser, token);
@@ -3415,6 +3440,12 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3415
3440
  }
3416
3441
  }
3417
3442
 
3443
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
3444
+ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3445
+ // TODO(jdtang): Implement this.
3446
+ return true;
3447
+ }
3448
+
3418
3449
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
3419
3450
  static bool handle_after_body(GumboParser* parser, GumboToken* token) {
3420
3451
  if (token->type == GUMBO_TOKEN_WHITESPACE ||
@@ -3586,6 +3617,7 @@ static const TokenHandler kTokenHandlers[] = {
3586
3617
  handle_in_cell,
3587
3618
  handle_in_select,
3588
3619
  handle_in_select_in_table,
3620
+ handle_in_template,
3589
3621
  handle_after_body,
3590
3622
  handle_in_frameset,
3591
3623
  handle_after_frameset,
@@ -3594,7 +3626,7 @@ static const TokenHandler kTokenHandlers[] = {
3594
3626
  };
3595
3627
 
3596
3628
  static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3597
- return kTokenHandlers[parser->_parser_state->_insertion_mode](
3629
+ return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
3598
3630
  parser, token);
3599
3631
  }
3600
3632
 
@@ -34,10 +34,11 @@ const char* kGumboTagNames[] = {
34
34
  "style",
35
35
  "script",
36
36
  "noscript",
37
+ "template",
37
38
  "body",
39
+ "article",
38
40
  "section",
39
41
  "nav",
40
- "article",
41
42
  "aside",
42
43
  "h1",
43
44
  "h2",
@@ -61,6 +62,7 @@ const char* kGumboTagNames[] = {
61
62
  "dd",
62
63
  "figure",
63
64
  "figcaption",
65
+ "main",
64
66
  "div",
65
67
  "a",
66
68
  "em",
@@ -71,6 +73,7 @@ const char* kGumboTagNames[] = {
71
73
  "q",
72
74
  "dfn",
73
75
  "abbr",
76
+ "data",
74
77
  "time",
75
78
  "code",
76
79
  "var",
@@ -80,6 +83,7 @@ const char* kGumboTagNames[] = {
80
83
  "sup",
81
84
  "i",
82
85
  "b",
86
+ "u",
83
87
  "mark",
84
88
  "ruby",
85
89
  "rt",
@@ -143,8 +147,8 @@ const char* kGumboTagNames[] = {
143
147
  "meter",
144
148
  "details",
145
149
  "summary",
146
- "command",
147
150
  "menu",
151
+ "menuitem",
148
152
  "applet",
149
153
  "acronym",
150
154
  "bgsound",
@@ -170,7 +174,6 @@ const char* kGumboTagNames[] = {
170
174
  "nobr",
171
175
  "spacer",
172
176
  "tt",
173
- "u",
174
177
  "", // TAG_UNKNOWN
175
178
  "", // TAG_LAST
176
179
  };
data/lib/nokogumbo.rb CHANGED
@@ -67,7 +67,8 @@ module Nokogiri
67
67
  doc
68
68
  when Net::HTTPRedirection
69
69
  response.value if limit <= 1
70
- get(response['location'], options.merge(:follow_limit => limit-1))
70
+ location = URI.join(uri, response['location'])
71
+ get(location, options.merge(:follow_limit => limit-1))
71
72
  else
72
73
  response.value
73
74
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-17 00:00:00.000000000 Z
11
+ date: 2013-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri