nokogumbo 1.4.13 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4a025ea0b590f29baaaba62289f947c6e97807c7
4
- data.tar.gz: e2b4b270b28c5fc6de4b24be437faf548cbf8b4e
2
+ SHA256:
3
+ metadata.gz: 96fa61565f78d5491e0b6d5b505cf936524745eb848b8b6584fc15e20c7ae35b
4
+ data.tar.gz: e5416f71bbe90323f04b8aad4dc48b28947e43a9eb46f446f8ca1444f519a07b
5
5
  SHA512:
6
- metadata.gz: 21c251c64b6d821a9c8ec1c06f3d00930d29390e52a9b5c8587e67bf71d3cd5e5663e77690019d68121d0072fe766819fa817049deb0d8eac93cd98d15fb2791
7
- data.tar.gz: b9ef569e5974fb6ef6246434295e61f87fc462b997ea79fd9d9645f76360f0056052ab5f3fc3d4c53e8c3fc989c4185a65f333e453b56eb3bfdbb5e7edfc341b
6
+ metadata.gz: 676bf3585d38cd4ad5c72b8b3afd4952e248c747683ae1072dd43f6ce1ccd279177e4d0c75a9821ed76d32806333128152231349d8d113ae5d81279580b13004
7
+ data.tar.gz: 3459078d96977399e75551c4a3ee5623091f48569984b771e540ec111125f5af91e39a8d78cbd3ce9280326b1b9395dc4a0b0d7f0a72294876682cb9fe35e3d9
data/README.md CHANGED
@@ -89,5 +89,5 @@ Installation
89
89
  Related efforts
90
90
  ============
91
91
 
92
- * [ruby-gumbo](https://github.com/galdor/ruby-gumbo#readme) - a ruby binding
92
+ * [ruby-gumbo](https://github.com/nevir/ruby-gumbo#readme) - a ruby binding
93
93
  for the Gumbo HTML5 parser.
@@ -184,11 +184,14 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) {
184
184
  }
185
185
 
186
186
  // Parse a string using gumbo_parse into a Nokogiri document
187
- static VALUE parse(VALUE self, VALUE string) {
188
- const GumboOptions *options = &kGumboDefaultOptions;
187
+ static VALUE parse(VALUE self, VALUE string, VALUE max_parse_errors) {
188
+ GumboOptions options;
189
+ memcpy(&options, &kGumboDefaultOptions, sizeof options);
190
+ options.max_errors = NUM2INT(max_parse_errors);
191
+
189
192
  const char *input = RSTRING_PTR(string);
190
193
  size_t input_len = RSTRING_LEN(string);
191
- GumboOutput *output = gumbo_parse_with_options(options, input, input_len);
194
+ GumboOutput *output = gumbo_parse_with_options(&options, input, input_len);
192
195
  xmlDocPtr doc = xmlNewDoc(CONST_CAST "1.0");
193
196
  #ifdef NGLIB
194
197
  doc->type = XML_HTML_DOCUMENT_NODE;
@@ -219,7 +222,7 @@ static VALUE parse(VALUE self, VALUE string) {
219
222
  // Add parse errors to rdoc.
220
223
  if (output->errors.length) {
221
224
  GumboVector *errors = &output->errors;
222
- GumboParser parser = { ._options = options };
225
+ GumboParser parser = { ._options = &options };
223
226
  GumboStringBuffer msg;
224
227
  VALUE rerrors = rb_ary_new2(errors->length);
225
228
 
@@ -253,7 +256,7 @@ static VALUE parse(VALUE self, VALUE string) {
253
256
  gumbo_string_buffer_destroy(&parser, &msg);
254
257
  }
255
258
 
256
- gumbo_destroy_output(options, output);
259
+ gumbo_destroy_output(&options, output);
257
260
 
258
261
  return rdoc;
259
262
  }
@@ -288,5 +291,5 @@ void Init_nokogumboc() {
288
291
 
289
292
  // define Nokogumbo class with a singleton parse method
290
293
  VALUE Gumbo = rb_define_class("Nokogumbo", rb_cObject);
291
- rb_define_singleton_method(Gumbo, "parse", parse, 1);
294
+ rb_define_singleton_method(Gumbo, "parse", parse, 2);
292
295
  }
@@ -4,14 +4,14 @@ require 'nokogumboc'
4
4
  module Nokogiri
5
5
  # Parse an HTML document. +string+ contains the document. +string+
6
6
  # may also be an IO-like object. Returns a +Nokogiri::HTML::Document+.
7
- def self.HTML5(string)
8
- Nokogiri::HTML5.parse(string)
7
+ def self.HTML5(*args)
8
+ Nokogiri::HTML5.parse(*args)
9
9
  end
10
10
 
11
11
  module HTML5
12
12
  # Parse an HTML document. +string+ contains the document. +string+
13
13
  # may also be an IO-like object. Returns a +Nokogiri::HTML::Document+.
14
- def self.parse(string)
14
+ def self.parse(string, options={})
15
15
  if string.respond_to? :read
16
16
  string = string.read
17
17
  end
@@ -21,7 +21,7 @@ module Nokogiri
21
21
  string = reencode(string)
22
22
  end
23
23
 
24
- Nokogumbo.parse(string.to_s)
24
+ Nokogumbo.parse(string.to_s, options[:max_parse_errors] || 0)
25
25
  end
26
26
 
27
27
  # Fetch and parse a HTML document from the web, following redirects,
@@ -67,7 +67,7 @@ module Nokogiri
67
67
 
68
68
  case response
69
69
  when Net::HTTPSuccess
70
- doc = parse(reencode(response.body, response['content-type']))
70
+ doc = parse(reencode(response.body, response['content-type']), options)
71
71
  doc.instance_variable_set('@response', response)
72
72
  doc.class.send(:attr_reader, :response)
73
73
  doc
@@ -83,8 +83,8 @@ module Nokogiri
83
83
  # while fragment is on the Gumbo TODO list, simulate it by doing
84
84
  # a full document parse and ignoring the parent <html>, <head>, and <body>
85
85
  # tags, and collecting up the children of each.
86
- def self.fragment(string)
87
- doc = parse(string)
86
+ def self.fragment(*args)
87
+ doc = parse(*args)
88
88
  fragment = Nokogiri::HTML::DocumentFragment.new(doc)
89
89
 
90
90
  if doc.children.length != 1 or doc.children.first.name != 'html'
@@ -78,7 +78,7 @@ class TestNokogumbo < Minitest::Test
78
78
  end
79
79
 
80
80
  def test_html5_doctype
81
- doc = Nokogumbo.parse("<!DOCTYPE html><html></html>")
81
+ doc = Nokogiri::HTML5.parse("<!DOCTYPE html><html></html>")
82
82
  assert_match /<!DOCTYPE html>/, doc.to_html
83
83
  end
84
84
 
@@ -126,17 +126,43 @@ class TestNokogumbo < Minitest::Test
126
126
  end
127
127
 
128
128
  def test_parse_errors
129
- doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>")
129
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_parse_errors: 10)
130
130
  assert_equal doc.errors.length, 2
131
- doc = Nokogiri::HTML5("<!DOCTYPE html><html>")
131
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_parse_errors: 10)
132
132
  assert_empty doc.errors
133
133
  end
134
134
 
135
+ def test_max_parse_errors
136
+ # This document contains 2 parse errors, but we force limit to 1.
137
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_parse_errors: 1)
138
+ assert_equal 1, doc.errors.length
139
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_parse_errors: 1)
140
+ assert_empty doc.errors
141
+ end
142
+
143
+ def test_default_max_parse_errors
144
+ # This document contains 200 parse errors, but default limit is 0.
145
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html>" + "</p>" * 200)
146
+ assert_equal 0, doc.errors.length
147
+ end
148
+
135
149
  def test_parse_fragment_errors
136
- doc = Nokogiri::HTML5.fragment("<\r\n")
150
+ doc = Nokogiri::HTML5.fragment("<\r\n", max_parse_errors: 10)
137
151
  refute_empty doc.errors
138
152
  end
139
153
 
154
+ def test_fragment_max_parse_errors
155
+ # This fragment contains 3 parse errors, but we force limit to 1.
156
+ doc = Nokogiri::HTML5.fragment("<!-- -- --></a>", max_parse_errors: 1)
157
+ assert_equal 1, doc.errors.length
158
+ end
159
+
160
+ def test_fragment_default_max_parse_errors
161
+ # This fragment contains 201 parse errors, but default limit is 0.
162
+ doc = Nokogiri::HTML5.fragment("</p>" * 200)
163
+ assert_equal 0, doc.errors.length
164
+ end
165
+
140
166
  private
141
167
 
142
168
  def buffer
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.13
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-06 00:00:00.000000000 Z
11
+ date: 2018-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -90,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
90
90
  version: '0'
91
91
  requirements: []
92
92
  rubyforge_project:
93
- rubygems_version: 2.6.11
93
+ rubygems_version: 2.7.4
94
94
  signing_key:
95
95
  specification_version: 4
96
96
  summary: Nokogiri interface to the Gumbo HTML5 parser