nokogumbo 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZDM4N2U1MTA3MjkzNGQ1ZmU1OGY4NjljY2NiNzdlMGEwNzYyYTkwMA==
4
+ ZmRkOWYwNjM2OTdmNTc1MjIzNjg4MDBhNzJmZmMzNTgwMWNmNjJiNA==
5
5
  data.tar.gz: !binary |-
6
- YzQ1MTYzYTJjZjM1YjFkZTU2NThlZWFkMzhmODFjMTI4MDk2YzVlYw==
6
+ ODA3OTkxNDYwZTM3NjI3ZGE1OGYyYjllYWU0ZDk5ODU5M2RlYWZkMA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- ZDM3MDI2MjkwMzAyNTQ2ZmRhNDc3NmRlNjIzMjBlZjE5OGU2MGU3M2MyZDcw
10
- MTgxOTBhOGUxZjkxYmZlZGZlN2I1NTVmYTBlM2E3NWZkNmFiODE4ZThjMjdj
11
- N2E1M2RkNDljYjc4YWY0YmIxOGYyYTNjYTM1OWQzNDA0NjBiOTQ=
9
+ ZWJjNGEwMDg2OTVjNjBhYTA5YTM1NTM4ZWNjOTFiN2NjZTM5MmFiYWFmY2U4
10
+ MDliNGVhMWU3ZWI0ZTMxMjg4ZDRkMTNlMzQ5YzE3ZTg1NGU3MDAyOTcyOWQz
11
+ ZGYxYjU3YTE4Y2ZmNjNkNDZmN2NlN2Y5Y2MwOWE2MDFiY2E1NzA=
12
12
  data.tar.gz: !binary |-
13
- NmFkYjA3MjA3Njk5YTJiMmIyZWEyM2EyNGQ0ZjQxYzAwMTY2NDQwZmM3YmUz
14
- YWQwM2YzNjFlOGM2OTljNjc1MzZlMTZmNDM1ODUwYmZkZjZhNzcyZGE0ZGMy
15
- N2Y4YzA5ZWI4NjZiY2M5NGI2Nzk5MjNiZDE5MzMzNmM4Y2M2OWE=
13
+ MTU3MjVmNWViNDdiMDJlOWI4MmNiMzU0NTBjZWVkNTg3YWMzZTFlYWYxODVi
14
+ MzM4YWE4NDdlZjdlNzdlZDc0N2FkMGE2MTY4ZGVmN2ZkYzYzNzdjNTViNDQ3
15
+ YzNmMzhiODQ0ZGQwMDk3ZDkxMDQ5NTFmNDcyMzNmZmZiOTJiMDc=
data/README.md CHANGED
@@ -34,7 +34,7 @@ Notes
34
34
 
35
35
  * The `Nokogiri::HTML5.parse` function takes a string and passes it to the
36
36
  <code>gumbo_parse_with_options</code> method, using the default options.
37
- The resulting Gumbo parse tree is the walked.
37
+ The resulting Gumbo parse tree is then walked.
38
38
  * If the necessary Nokogiri and [libxml2](http://xmlsoft.org/html/) headers
39
39
  can be found at installation time then an
40
40
  [xmlDoc](http://xmlsoft.org/html/libxml-tree.html#xmlDoc) tree is produced
@@ -164,11 +164,12 @@ typedef enum {
164
164
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
165
165
  GUMBO_TAG_SCRIPT,
166
166
  GUMBO_TAG_NOSCRIPT,
167
+ GUMBO_TAG_TEMPLATE,
167
168
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
168
169
  GUMBO_TAG_BODY,
170
+ GUMBO_TAG_ARTICLE,
169
171
  GUMBO_TAG_SECTION,
170
172
  GUMBO_TAG_NAV,
171
- GUMBO_TAG_ARTICLE,
172
173
  GUMBO_TAG_ASIDE,
173
174
  GUMBO_TAG_H1,
174
175
  GUMBO_TAG_H2,
@@ -193,6 +194,7 @@ typedef enum {
193
194
  GUMBO_TAG_DD,
194
195
  GUMBO_TAG_FIGURE,
195
196
  GUMBO_TAG_FIGCAPTION,
197
+ GUMBO_TAG_MAIN,
196
198
  GUMBO_TAG_DIV,
197
199
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
198
200
  GUMBO_TAG_A,
@@ -204,6 +206,7 @@ typedef enum {
204
206
  GUMBO_TAG_Q,
205
207
  GUMBO_TAG_DFN,
206
208
  GUMBO_TAG_ABBR,
209
+ GUMBO_TAG_DATA,
207
210
  GUMBO_TAG_TIME,
208
211
  GUMBO_TAG_CODE,
209
212
  GUMBO_TAG_VAR,
@@ -213,6 +216,7 @@ typedef enum {
213
216
  GUMBO_TAG_SUP,
214
217
  GUMBO_TAG_I,
215
218
  GUMBO_TAG_B,
219
+ GUMBO_TAG_U,
216
220
  GUMBO_TAG_MARK,
217
221
  GUMBO_TAG_RUBY,
218
222
  GUMBO_TAG_RT,
@@ -284,8 +288,8 @@ typedef enum {
284
288
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
285
289
  GUMBO_TAG_DETAILS,
286
290
  GUMBO_TAG_SUMMARY,
287
- GUMBO_TAG_COMMAND,
288
291
  GUMBO_TAG_MENU,
292
+ GUMBO_TAG_MENUITEM,
289
293
  // Non-conforming elements that nonetheless appear in the HTML5 spec.
290
294
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
291
295
  GUMBO_TAG_APPLET,
@@ -313,7 +317,6 @@ typedef enum {
313
317
  GUMBO_TAG_NOBR,
314
318
  GUMBO_TAG_SPACER,
315
319
  GUMBO_TAG_TT,
316
- GUMBO_TAG_U,
317
320
  // Used for all tags that don't have special handling in HTML.
318
321
  GUMBO_TAG_UNKNOWN,
319
322
  // A marker value to indicate the end of the enum, for iterating over it.
@@ -22,6 +22,8 @@ extern "C" {
22
22
  #endif
23
23
 
24
24
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#insertion-mode
25
+ // If new enum values are added, be sure to update the kTokenHandlers dispatch
26
+ // table in parser.c.
25
27
  typedef enum {
26
28
  GUMBO_INSERTION_MODE_INITIAL,
27
29
  GUMBO_INSERTION_MODE_BEFORE_HTML,
@@ -40,6 +42,7 @@ typedef enum {
40
42
  GUMBO_INSERTION_MODE_IN_CELL,
41
43
  GUMBO_INSERTION_MODE_IN_SELECT,
42
44
  GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE,
45
+ GUMBO_INSERTION_MODE_IN_TEMPLATE,
43
46
  GUMBO_INSERTION_MODE_AFTER_BODY,
44
47
  GUMBO_INSERTION_MODE_IN_FRAMESET,
45
48
  GUMBO_INSERTION_MODE_AFTER_FRAMESET,
@@ -354,6 +354,10 @@ typedef struct GumboInternalParserState {
354
354
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#the-list-of-active-formatting-elements
355
355
  GumboVector /*GumboNode*/ _active_formatting_elements;
356
356
 
357
+ // The stack of template insertion modes.
358
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-insertion-mode
359
+ GumboVector /*InsertionMode*/ _template_insertion_modes;
360
+
357
361
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#the-element-pointers
358
362
  GumboNode* _head_element;
359
363
  GumboNode* _form_element;
@@ -482,6 +486,7 @@ static void parser_state_init(GumboParser* parser) {
482
486
  gumbo_string_buffer_init(parser, &parser_state->_text_node._buffer);
483
487
  gumbo_vector_init(parser, 10, &parser_state->_open_elements);
484
488
  gumbo_vector_init(parser, 5, &parser_state->_active_formatting_elements);
489
+ gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
485
490
  parser_state->_head_element = NULL;
486
491
  parser_state->_form_element = NULL;
487
492
  parser_state->_current_token = NULL;
@@ -494,6 +499,7 @@ static void parser_state_destroy(GumboParser* parser) {
494
499
  GumboParserState* state = parser->_parser_state;
495
500
  gumbo_vector_destroy(parser, &state->_active_formatting_elements);
496
501
  gumbo_vector_destroy(parser, &state->_open_elements);
502
+ gumbo_vector_destroy(parser, &state->_template_insertion_modes);
497
503
  gumbo_string_buffer_destroy(parser, &state->_text_node._buffer);
498
504
  gumbo_parser_deallocate(parser, state);
499
505
  }
@@ -531,6 +537,25 @@ static bool is_in_static_list(
531
537
  return false;
532
538
  }
533
539
 
540
+ static void push_template_insertion_mode(
541
+ GumboParser* parser, GumboInsertionMode mode) {
542
+ gumbo_vector_add(
543
+ parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
544
+ }
545
+
546
+ static void pop_template_insertion_mode(GumboParser* parser) {
547
+ gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
548
+ }
549
+
550
+ static GumboInsertionMode get_current_template_insertion_mode(
551
+ GumboParser* parser) {
552
+ GumboVector* template_insertion_modes =
553
+ &parser->_parser_state->_template_insertion_modes;
554
+ assert(template_insertion_modes->length > 0);
555
+ return (GumboInsertionMode) template_insertion_modes->data[
556
+ template_insertion_modes->length - 1];
557
+ }
558
+
534
559
  static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
535
560
  parser->_parser_state->_insertion_mode = mode;
536
561
  }
@@ -718,7 +743,7 @@ static bool is_html_integration_point(const GumboNode* node) {
718
743
  static void append_node(
719
744
  GumboParser* parser, GumboNode* parent, GumboNode* node) {
720
745
  assert(node->parent == NULL);
721
- assert(node->index_within_parent = -1);
746
+ assert(node->index_within_parent == -1);
722
747
  GumboVector* children;
723
748
  if (parent->type == GUMBO_NODE_ELEMENT) {
724
749
  children = &parent->v.element.children;
@@ -737,7 +762,7 @@ static void append_node(
737
762
  static void insert_node(
738
763
  GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
739
764
  assert(node->parent == NULL);
740
- assert(node->index_within_parent = -1);
765
+ assert(node->index_within_parent == -1);
741
766
  assert(parent->type == GUMBO_NODE_ELEMENT);
742
767
  GumboVector* children = &parent->v.element.children;
743
768
  assert(index >= 0);
@@ -1520,7 +1545,7 @@ static bool is_special_node(const GumboNode* node) {
1520
1545
  GUMBO_TAG_BASEFONT, GUMBO_TAG_BGSOUND, GUMBO_TAG_BLOCKQUOTE,
1521
1546
  GUMBO_TAG_BODY, GUMBO_TAG_BR, GUMBO_TAG_BUTTON, GUMBO_TAG_CAPTION,
1522
1547
  GUMBO_TAG_CENTER, GUMBO_TAG_COL, GUMBO_TAG_COLGROUP,
1523
- GUMBO_TAG_COMMAND, GUMBO_TAG_DD, GUMBO_TAG_DETAILS, GUMBO_TAG_DIR,
1548
+ GUMBO_TAG_MENUITEM, GUMBO_TAG_DD, GUMBO_TAG_DETAILS, GUMBO_TAG_DIR,
1524
1549
  GUMBO_TAG_DIV, GUMBO_TAG_DL, GUMBO_TAG_DT, GUMBO_TAG_EMBED,
1525
1550
  GUMBO_TAG_FIELDSET, GUMBO_TAG_FIGCAPTION, GUMBO_TAG_FIGURE,
1526
1551
  GUMBO_TAG_FOOTER, GUMBO_TAG_FORM, GUMBO_TAG_FRAME,
@@ -2105,7 +2130,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2105
2130
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2106
2131
  return handle_in_body(parser, token);
2107
2132
  } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2108
- GUMBO_TAG_BGSOUND, GUMBO_TAG_COMMAND, GUMBO_TAG_LINK,
2133
+ GUMBO_TAG_BGSOUND, GUMBO_TAG_MENUITEM, GUMBO_TAG_LINK,
2109
2134
  GUMBO_TAG_LAST)) {
2110
2135
  insert_element_from_token(parser, token);
2111
2136
  pop_current_node(parser);
@@ -2316,7 +2341,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2316
2341
  merge_attributes(parser, token, parser->_output->root);
2317
2342
  return false;
2318
2343
  } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2319
- GUMBO_TAG_BGSOUND, GUMBO_TAG_COMMAND, GUMBO_TAG_LINK,
2344
+ GUMBO_TAG_BGSOUND, GUMBO_TAG_MENUITEM, GUMBO_TAG_LINK,
2320
2345
  GUMBO_TAG_META, GUMBO_TAG_NOFRAMES, GUMBO_TAG_SCRIPT,
2321
2346
  GUMBO_TAG_STYLE, GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
2322
2347
  return handle_in_head(parser, token);
@@ -3415,6 +3440,12 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3415
3440
  }
3416
3441
  }
3417
3442
 
3443
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
3444
+ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3445
+ // TODO(jdtang): Implement this.
3446
+ return true;
3447
+ }
3448
+
3418
3449
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
3419
3450
  static bool handle_after_body(GumboParser* parser, GumboToken* token) {
3420
3451
  if (token->type == GUMBO_TOKEN_WHITESPACE ||
@@ -3586,6 +3617,7 @@ static const TokenHandler kTokenHandlers[] = {
3586
3617
  handle_in_cell,
3587
3618
  handle_in_select,
3588
3619
  handle_in_select_in_table,
3620
+ handle_in_template,
3589
3621
  handle_after_body,
3590
3622
  handle_in_frameset,
3591
3623
  handle_after_frameset,
@@ -3594,7 +3626,7 @@ static const TokenHandler kTokenHandlers[] = {
3594
3626
  };
3595
3627
 
3596
3628
  static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3597
- return kTokenHandlers[parser->_parser_state->_insertion_mode](
3629
+ return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
3598
3630
  parser, token);
3599
3631
  }
3600
3632
 
@@ -34,10 +34,11 @@ const char* kGumboTagNames[] = {
34
34
  "style",
35
35
  "script",
36
36
  "noscript",
37
+ "template",
37
38
  "body",
39
+ "article",
38
40
  "section",
39
41
  "nav",
40
- "article",
41
42
  "aside",
42
43
  "h1",
43
44
  "h2",
@@ -61,6 +62,7 @@ const char* kGumboTagNames[] = {
61
62
  "dd",
62
63
  "figure",
63
64
  "figcaption",
65
+ "main",
64
66
  "div",
65
67
  "a",
66
68
  "em",
@@ -71,6 +73,7 @@ const char* kGumboTagNames[] = {
71
73
  "q",
72
74
  "dfn",
73
75
  "abbr",
76
+ "data",
74
77
  "time",
75
78
  "code",
76
79
  "var",
@@ -80,6 +83,7 @@ const char* kGumboTagNames[] = {
80
83
  "sup",
81
84
  "i",
82
85
  "b",
86
+ "u",
83
87
  "mark",
84
88
  "ruby",
85
89
  "rt",
@@ -143,8 +147,8 @@ const char* kGumboTagNames[] = {
143
147
  "meter",
144
148
  "details",
145
149
  "summary",
146
- "command",
147
150
  "menu",
151
+ "menuitem",
148
152
  "applet",
149
153
  "acronym",
150
154
  "bgsound",
@@ -170,7 +174,6 @@ const char* kGumboTagNames[] = {
170
174
  "nobr",
171
175
  "spacer",
172
176
  "tt",
173
- "u",
174
177
  "", // TAG_UNKNOWN
175
178
  "", // TAG_LAST
176
179
  };
data/lib/nokogumbo.rb CHANGED
@@ -67,7 +67,8 @@ module Nokogiri
67
67
  doc
68
68
  when Net::HTTPRedirection
69
69
  response.value if limit <= 1
70
- get(response['location'], options.merge(:follow_limit => limit-1))
70
+ location = URI.join(uri, response['location'])
71
+ get(location, options.merge(:follow_limit => limit-1))
71
72
  else
72
73
  response.value
73
74
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-17 00:00:00.000000000 Z
11
+ date: 2013-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri