nokogumbo 1.4.9 → 1.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ecb515ec9c492a01dce23eed6eb7336695f166d6
4
- data.tar.gz: c1ac4272ee113e56ecd72302847886042165a1fb
3
+ metadata.gz: c97bf9e43a0be85f20cf07ec15901052a1f9dbe1
4
+ data.tar.gz: 4c195ae6b3fc161648b7fc0c7eea06d14e8bd451
5
5
  SHA512:
6
- metadata.gz: b979502577d944afa23cf0db4307494c6e53c320c3efc87c8066174d8ae3eee8902f60164375869dd6bd154abb1f6f69c42db9fbb0bc7fbd86d1b2f3e33bafd3
7
- data.tar.gz: abd153cd24e504dc82c80d38ad91c8dd3dae25e913d650d0c92a8823cb1638dcf99baa43eb40a6a36573f9de7d14d78e2d77921062e341f0e82d49290a2f7ad7
6
+ metadata.gz: f49c427bdb96bce746e541c7864c755b0ece284f0f7fff8d0f3913403b8c9e9941b1d9c06ddec25142be4ee7ead2c97e96b9eac26eeee119cbb6cfc1879d5f7b
7
+ data.tar.gz: bf2b358e4f496f5f8e1c3b308e8c7befc308b9dda1af7399d4baa73cc59a395f630d315a5791565a088d7dfdf9c9a0eaaea5638798da6a2dfc6cea559df1fa25
@@ -19,10 +19,13 @@
19
19
  //
20
20
 
21
21
  #include <ruby.h>
22
- #include <gumbo.h>
22
+ #include "gumbo.h"
23
+ #include "error.h"
24
+ #include "parser.h"
23
25
 
24
26
  // class constants
25
27
  static VALUE Document;
28
+ static VALUE XMLSyntaxError;
26
29
 
27
30
  #ifdef NGLIB
28
31
  #include <nokogiri.h>
@@ -182,10 +185,10 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) {
182
185
 
183
186
  // Parse a string using gumbo_parse into a Nokogiri document
184
187
  static VALUE parse(VALUE self, VALUE string) {
185
- GumboOutput *output = gumbo_parse_with_options(
186
- &kGumboDefaultOptions, RSTRING_PTR(string),
187
- (size_t) RSTRING_LEN(string)
188
- );
188
+ const GumboOptions *options = &kGumboDefaultOptions;
189
+ const char *input = RSTRING_PTR(string);
190
+ size_t input_len = RSTRING_LEN(string);
191
+ GumboOutput *output = gumbo_parse_with_options(options, input, input_len);
189
192
  xmlDocPtr doc = xmlNewDoc(CONST_CAST "1.0");
190
193
  #ifdef NGLIB
191
194
  doc->type = XML_HTML_DOCUMENT_NODE;
@@ -210,9 +213,42 @@ static VALUE parse(VALUE self, VALUE string) {
210
213
  xmlAddChild((xmlNodePtr)doc, node);
211
214
  }
212
215
  }
213
- gumbo_destroy_output(&kGumboDefaultOptions, output);
214
216
 
215
- return Nokogiri_wrap_xml_document(Document, doc);
217
+ VALUE rdoc = Nokogiri_wrap_xml_document(Document, doc);
218
+
219
+ // Add parse errors to rdoc.
220
+ if (output->errors.length) {
221
+ GumboVector *errors = &output->errors;
222
+ GumboParser parser = { ._options = options };
223
+ GumboStringBuffer msg;
224
+ VALUE rerrors = rb_ary_new2(errors->length);
225
+
226
+ gumbo_string_buffer_init(&parser, &msg);
227
+ for (int i=0; i < errors->length; i++) {
228
+ GumboError *err = errors->data[i];
229
+ gumbo_string_buffer_clear(&parser, &msg);
230
+ gumbo_caret_diagnostic_to_string(&parser, err, input, &msg);
231
+ VALUE err_str = rb_str_new(msg.data, msg.length);
232
+ VALUE syntax_error = rb_class_new_instance(1, &err_str, XMLSyntaxError);
233
+ rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
234
+ rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR
235
+ rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR
236
+ rb_iv_set(syntax_error, "@file", Qnil);
237
+ rb_iv_set(syntax_error, "@line", INT2NUM(err->position.line));
238
+ rb_iv_set(syntax_error, "@str1", Qnil);
239
+ rb_iv_set(syntax_error, "@str2", Qnil);
240
+ rb_iv_set(syntax_error, "@str3", Qnil);
241
+ rb_iv_set(syntax_error, "@int1", INT2NUM(err->type));
242
+ rb_iv_set(syntax_error, "@column", INT2NUM(err->position.column));
243
+ rb_ary_push(rerrors, syntax_error);
244
+ }
245
+ rb_iv_set(rdoc, "@errors", rerrors);
246
+ gumbo_string_buffer_destroy(&parser, &msg);
247
+ }
248
+
249
+ gumbo_destroy_output(options, output);
250
+
251
+ return rdoc;
216
252
  }
217
253
 
218
254
  // Initialize the Nokogumbo class and fetch constants we will use later
@@ -224,10 +260,11 @@ void Init_nokogumboc() {
224
260
  VALUE Nokogiri = rb_const_get(rb_cObject, rb_intern("Nokogiri"));
225
261
  VALUE HTML = rb_const_get(Nokogiri, rb_intern("HTML"));
226
262
  Document = rb_const_get(HTML, rb_intern("Document"));
263
+ VALUE XML = rb_const_get(Nokogiri, rb_intern("XML"));
264
+ XMLSyntaxError = rb_const_get(XML, rb_intern("SyntaxError"));
227
265
 
228
266
  #ifndef NGLIB
229
267
  // more class constants
230
- VALUE XML = rb_const_get(Nokogiri, rb_intern("XML"));
231
268
  Element = rb_const_get(XML, rb_intern("Element"));
232
269
  Text = rb_const_get(XML, rb_intern("Text"));
233
270
  CDATA = rb_const_get(XML, rb_intern("CDATA"));
@@ -125,6 +125,13 @@ class TestNokogumbo < Minitest::Test
125
125
  assert_equal ["html", "comment", "html", "comment"], doc.children.map(&:name)
126
126
  end
127
127
 
128
+ def test_parse_errors
129
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>")
130
+ assert_equal doc.errors.length, 2
131
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html>")
132
+ assert_empty doc.errors
133
+ end
134
+
128
135
  private
129
136
 
130
137
  def buffer
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.9
4
+ version: 1.4.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-01 00:00:00.000000000 Z
11
+ date: 2016-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
@@ -32,11 +32,10 @@ extensions:
32
32
  - ext/nokogumboc/extconf.rb
33
33
  extra_rdoc_files: []
34
34
  files:
35
- - ext/nokogumboc/extconf.rb
36
- - ext/nokogumboc/nokogumbo.c
37
- - lib/nokogumbo.rb
38
35
  - LICENSE.txt
39
36
  - README.md
37
+ - ext/nokogumboc/extconf.rb
38
+ - ext/nokogumboc/nokogumbo.c
40
39
  - gumbo-parser/src/attribute.c
41
40
  - gumbo-parser/src/attribute.h
42
41
  - gumbo-parser/src/char_ref.c
@@ -69,10 +68,11 @@ files:
69
68
  - gumbo-parser/src/vector.c
70
69
  - gumbo-parser/src/vector.h
71
70
  - gumbo-parser/visualc/include/strings.h
71
+ - lib/nokogumbo.rb
72
72
  - test-nokogumbo.rb
73
73
  homepage: https://github.com/rubys/nokogumbo/#readme
74
74
  licenses:
75
- - Apache 2.0
75
+ - Apache-2.0
76
76
  metadata: {}
77
77
  post_install_message:
78
78
  rdoc_options: []
@@ -80,17 +80,17 @@ require_paths:
80
80
  - lib
81
81
  required_ruby_version: !ruby/object:Gem::Requirement
82
82
  requirements:
83
- - - '>='
83
+ - - ">="
84
84
  - !ruby/object:Gem::Version
85
85
  version: '0'
86
86
  required_rubygems_version: !ruby/object:Gem::Requirement
87
87
  requirements:
88
- - - '>='
88
+ - - ">="
89
89
  - !ruby/object:Gem::Version
90
90
  version: '0'
91
91
  requirements: []
92
92
  rubyforge_project:
93
- rubygems_version: 2.0.14.1
93
+ rubygems_version: 2.5.1
94
94
  signing_key:
95
95
  specification_version: 4
96
96
  summary: Nokogiri interface to the Gumbo HTML5 parser