nokogumbo 1.4.13 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4a025ea0b590f29baaaba62289f947c6e97807c7
4
- data.tar.gz: e2b4b270b28c5fc6de4b24be437faf548cbf8b4e
2
+ SHA256:
3
+ metadata.gz: 96fa61565f78d5491e0b6d5b505cf936524745eb848b8b6584fc15e20c7ae35b
4
+ data.tar.gz: e5416f71bbe90323f04b8aad4dc48b28947e43a9eb46f446f8ca1444f519a07b
5
5
  SHA512:
6
- metadata.gz: 21c251c64b6d821a9c8ec1c06f3d00930d29390e52a9b5c8587e67bf71d3cd5e5663e77690019d68121d0072fe766819fa817049deb0d8eac93cd98d15fb2791
7
- data.tar.gz: b9ef569e5974fb6ef6246434295e61f87fc462b997ea79fd9d9645f76360f0056052ab5f3fc3d4c53e8c3fc989c4185a65f333e453b56eb3bfdbb5e7edfc341b
6
+ metadata.gz: 676bf3585d38cd4ad5c72b8b3afd4952e248c747683ae1072dd43f6ce1ccd279177e4d0c75a9821ed76d32806333128152231349d8d113ae5d81279580b13004
7
+ data.tar.gz: 3459078d96977399e75551c4a3ee5623091f48569984b771e540ec111125f5af91e39a8d78cbd3ce9280326b1b9395dc4a0b0d7f0a72294876682cb9fe35e3d9
data/README.md CHANGED
@@ -89,5 +89,5 @@ Installation
89
89
  Related efforts
90
90
  ============
91
91
 
92
- * [ruby-gumbo](https://github.com/galdor/ruby-gumbo#readme) - a ruby binding
92
+ * [ruby-gumbo](https://github.com/nevir/ruby-gumbo#readme) - a ruby binding
93
93
  for the Gumbo HTML5 parser.
@@ -184,11 +184,14 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) {
184
184
  }
185
185
 
186
186
  // Parse a string using gumbo_parse into a Nokogiri document
187
- static VALUE parse(VALUE self, VALUE string) {
188
- const GumboOptions *options = &kGumboDefaultOptions;
187
+ static VALUE parse(VALUE self, VALUE string, VALUE max_parse_errors) {
188
+ GumboOptions options;
189
+ memcpy(&options, &kGumboDefaultOptions, sizeof options);
190
+ options.max_errors = NUM2INT(max_parse_errors);
191
+
189
192
  const char *input = RSTRING_PTR(string);
190
193
  size_t input_len = RSTRING_LEN(string);
191
- GumboOutput *output = gumbo_parse_with_options(options, input, input_len);
194
+ GumboOutput *output = gumbo_parse_with_options(&options, input, input_len);
192
195
  xmlDocPtr doc = xmlNewDoc(CONST_CAST "1.0");
193
196
  #ifdef NGLIB
194
197
  doc->type = XML_HTML_DOCUMENT_NODE;
@@ -219,7 +222,7 @@ static VALUE parse(VALUE self, VALUE string) {
219
222
  // Add parse errors to rdoc.
220
223
  if (output->errors.length) {
221
224
  GumboVector *errors = &output->errors;
222
- GumboParser parser = { ._options = options };
225
+ GumboParser parser = { ._options = &options };
223
226
  GumboStringBuffer msg;
224
227
  VALUE rerrors = rb_ary_new2(errors->length);
225
228
 
@@ -253,7 +256,7 @@ static VALUE parse(VALUE self, VALUE string) {
253
256
  gumbo_string_buffer_destroy(&parser, &msg);
254
257
  }
255
258
 
256
- gumbo_destroy_output(options, output);
259
+ gumbo_destroy_output(&options, output);
257
260
 
258
261
  return rdoc;
259
262
  }
@@ -288,5 +291,5 @@ void Init_nokogumboc() {
288
291
 
289
292
  // define Nokogumbo class with a singleton parse method
290
293
  VALUE Gumbo = rb_define_class("Nokogumbo", rb_cObject);
291
- rb_define_singleton_method(Gumbo, "parse", parse, 1);
294
+ rb_define_singleton_method(Gumbo, "parse", parse, 2);
292
295
  }
@@ -4,14 +4,14 @@ require 'nokogumboc'
4
4
  module Nokogiri
5
5
  # Parse an HTML document. +string+ contains the document. +string+
6
6
  # may also be an IO-like object. Returns a +Nokogiri::HTML::Document+.
7
- def self.HTML5(string)
8
- Nokogiri::HTML5.parse(string)
7
+ def self.HTML5(*args)
8
+ Nokogiri::HTML5.parse(*args)
9
9
  end
10
10
 
11
11
  module HTML5
12
12
  # Parse an HTML document. +string+ contains the document. +string+
13
13
  # may also be an IO-like object. Returns a +Nokogiri::HTML::Document+.
14
- def self.parse(string)
14
+ def self.parse(string, options={})
15
15
  if string.respond_to? :read
16
16
  string = string.read
17
17
  end
@@ -21,7 +21,7 @@ module Nokogiri
21
21
  string = reencode(string)
22
22
  end
23
23
 
24
- Nokogumbo.parse(string.to_s)
24
+ Nokogumbo.parse(string.to_s, options[:max_parse_errors] || 0)
25
25
  end
26
26
 
27
27
  # Fetch and parse a HTML document from the web, following redirects,
@@ -67,7 +67,7 @@ module Nokogiri
67
67
 
68
68
  case response
69
69
  when Net::HTTPSuccess
70
- doc = parse(reencode(response.body, response['content-type']))
70
+ doc = parse(reencode(response.body, response['content-type']), options)
71
71
  doc.instance_variable_set('@response', response)
72
72
  doc.class.send(:attr_reader, :response)
73
73
  doc
@@ -83,8 +83,8 @@ module Nokogiri
83
83
  # while fragment is on the Gumbo TODO list, simulate it by doing
84
84
  # a full document parse and ignoring the parent <html>, <head>, and <body>
85
85
  # tags, and collecting up the children of each.
86
- def self.fragment(string)
87
- doc = parse(string)
86
+ def self.fragment(*args)
87
+ doc = parse(*args)
88
88
  fragment = Nokogiri::HTML::DocumentFragment.new(doc)
89
89
 
90
90
  if doc.children.length != 1 or doc.children.first.name != 'html'
@@ -78,7 +78,7 @@ class TestNokogumbo < Minitest::Test
78
78
  end
79
79
 
80
80
  def test_html5_doctype
81
- doc = Nokogumbo.parse("<!DOCTYPE html><html></html>")
81
+ doc = Nokogiri::HTML5.parse("<!DOCTYPE html><html></html>")
82
82
  assert_match /<!DOCTYPE html>/, doc.to_html
83
83
  end
84
84
 
@@ -126,17 +126,43 @@ class TestNokogumbo < Minitest::Test
126
126
  end
127
127
 
128
128
  def test_parse_errors
129
- doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>")
129
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_parse_errors: 10)
130
130
  assert_equal doc.errors.length, 2
131
- doc = Nokogiri::HTML5("<!DOCTYPE html><html>")
131
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_parse_errors: 10)
132
132
  assert_empty doc.errors
133
133
  end
134
134
 
135
+ def test_max_parse_errors
136
+ # This document contains 2 parse errors, but we force limit to 1.
137
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_parse_errors: 1)
138
+ assert_equal 1, doc.errors.length
139
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_parse_errors: 1)
140
+ assert_empty doc.errors
141
+ end
142
+
143
+ def test_default_max_parse_errors
144
+ # This document contains 200 parse errors, but default limit is 0.
145
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html>" + "</p>" * 200)
146
+ assert_equal 0, doc.errors.length
147
+ end
148
+
135
149
  def test_parse_fragment_errors
136
- doc = Nokogiri::HTML5.fragment("<\r\n")
150
+ doc = Nokogiri::HTML5.fragment("<\r\n", max_parse_errors: 10)
137
151
  refute_empty doc.errors
138
152
  end
139
153
 
154
+ def test_fragment_max_parse_errors
155
+ # This fragment contains 3 parse errors, but we force limit to 1.
156
+ doc = Nokogiri::HTML5.fragment("<!-- -- --></a>", max_parse_errors: 1)
157
+ assert_equal 1, doc.errors.length
158
+ end
159
+
160
+ def test_fragment_default_max_parse_errors
161
+ # This fragment contains 201 parse errors, but default limit is 0.
162
+ doc = Nokogiri::HTML5.fragment("</p>" * 200)
163
+ assert_equal 0, doc.errors.length
164
+ end
165
+
140
166
  private
141
167
 
142
168
  def buffer
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.13
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-06 00:00:00.000000000 Z
11
+ date: 2018-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -90,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
90
90
  version: '0'
91
91
  requirements: []
92
92
  rubyforge_project:
93
- rubygems_version: 2.6.11
93
+ rubygems_version: 2.7.4
94
94
  signing_key:
95
95
  specification_version: 4
96
96
  summary: Nokogiri interface to the Gumbo HTML5 parser