nokogumbo 1.4.9 → 1.4.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ecb515ec9c492a01dce23eed6eb7336695f166d6
4
- data.tar.gz: c1ac4272ee113e56ecd72302847886042165a1fb
3
+ metadata.gz: c97bf9e43a0be85f20cf07ec15901052a1f9dbe1
4
+ data.tar.gz: 4c195ae6b3fc161648b7fc0c7eea06d14e8bd451
5
5
  SHA512:
6
- metadata.gz: b979502577d944afa23cf0db4307494c6e53c320c3efc87c8066174d8ae3eee8902f60164375869dd6bd154abb1f6f69c42db9fbb0bc7fbd86d1b2f3e33bafd3
7
- data.tar.gz: abd153cd24e504dc82c80d38ad91c8dd3dae25e913d650d0c92a8823cb1638dcf99baa43eb40a6a36573f9de7d14d78e2d77921062e341f0e82d49290a2f7ad7
6
+ metadata.gz: f49c427bdb96bce746e541c7864c755b0ece284f0f7fff8d0f3913403b8c9e9941b1d9c06ddec25142be4ee7ead2c97e96b9eac26eeee119cbb6cfc1879d5f7b
7
+ data.tar.gz: bf2b358e4f496f5f8e1c3b308e8c7befc308b9dda1af7399d4baa73cc59a395f630d315a5791565a088d7dfdf9c9a0eaaea5638798da6a2dfc6cea559df1fa25
@@ -19,10 +19,13 @@
19
19
  //
20
20
 
21
21
  #include <ruby.h>
22
- #include <gumbo.h>
22
+ #include "gumbo.h"
23
+ #include "error.h"
24
+ #include "parser.h"
23
25
 
24
26
  // class constants
25
27
  static VALUE Document;
28
+ static VALUE XMLSyntaxError;
26
29
 
27
30
  #ifdef NGLIB
28
31
  #include <nokogiri.h>
@@ -182,10 +185,10 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) {
182
185
 
183
186
  // Parse a string using gumbo_parse into a Nokogiri document
184
187
  static VALUE parse(VALUE self, VALUE string) {
185
- GumboOutput *output = gumbo_parse_with_options(
186
- &kGumboDefaultOptions, RSTRING_PTR(string),
187
- (size_t) RSTRING_LEN(string)
188
- );
188
+ const GumboOptions *options = &kGumboDefaultOptions;
189
+ const char *input = RSTRING_PTR(string);
190
+ size_t input_len = RSTRING_LEN(string);
191
+ GumboOutput *output = gumbo_parse_with_options(options, input, input_len);
189
192
  xmlDocPtr doc = xmlNewDoc(CONST_CAST "1.0");
190
193
  #ifdef NGLIB
191
194
  doc->type = XML_HTML_DOCUMENT_NODE;
@@ -210,9 +213,42 @@ static VALUE parse(VALUE self, VALUE string) {
210
213
  xmlAddChild((xmlNodePtr)doc, node);
211
214
  }
212
215
  }
213
- gumbo_destroy_output(&kGumboDefaultOptions, output);
214
216
 
215
- return Nokogiri_wrap_xml_document(Document, doc);
217
+ VALUE rdoc = Nokogiri_wrap_xml_document(Document, doc);
218
+
219
+ // Add parse errors to rdoc.
220
+ if (output->errors.length) {
221
+ GumboVector *errors = &output->errors;
222
+ GumboParser parser = { ._options = options };
223
+ GumboStringBuffer msg;
224
+ VALUE rerrors = rb_ary_new2(errors->length);
225
+
226
+ gumbo_string_buffer_init(&parser, &msg);
227
+ for (int i=0; i < errors->length; i++) {
228
+ GumboError *err = errors->data[i];
229
+ gumbo_string_buffer_clear(&parser, &msg);
230
+ gumbo_caret_diagnostic_to_string(&parser, err, input, &msg);
231
+ VALUE err_str = rb_str_new(msg.data, msg.length);
232
+ VALUE syntax_error = rb_class_new_instance(1, &err_str, XMLSyntaxError);
233
+ rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
234
+ rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR
235
+ rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR
236
+ rb_iv_set(syntax_error, "@file", Qnil);
237
+ rb_iv_set(syntax_error, "@line", INT2NUM(err->position.line));
238
+ rb_iv_set(syntax_error, "@str1", Qnil);
239
+ rb_iv_set(syntax_error, "@str2", Qnil);
240
+ rb_iv_set(syntax_error, "@str3", Qnil);
241
+ rb_iv_set(syntax_error, "@int1", INT2NUM(err->type));
242
+ rb_iv_set(syntax_error, "@column", INT2NUM(err->position.column));
243
+ rb_ary_push(rerrors, syntax_error);
244
+ }
245
+ rb_iv_set(rdoc, "@errors", rerrors);
246
+ gumbo_string_buffer_destroy(&parser, &msg);
247
+ }
248
+
249
+ gumbo_destroy_output(options, output);
250
+
251
+ return rdoc;
216
252
  }
217
253
 
218
254
  // Initialize the Nokogumbo class and fetch constants we will use later
@@ -224,10 +260,11 @@ void Init_nokogumboc() {
224
260
  VALUE Nokogiri = rb_const_get(rb_cObject, rb_intern("Nokogiri"));
225
261
  VALUE HTML = rb_const_get(Nokogiri, rb_intern("HTML"));
226
262
  Document = rb_const_get(HTML, rb_intern("Document"));
263
+ VALUE XML = rb_const_get(Nokogiri, rb_intern("XML"));
264
+ XMLSyntaxError = rb_const_get(XML, rb_intern("SyntaxError"));
227
265
 
228
266
  #ifndef NGLIB
229
267
  // more class constants
230
- VALUE XML = rb_const_get(Nokogiri, rb_intern("XML"));
231
268
  Element = rb_const_get(XML, rb_intern("Element"));
232
269
  Text = rb_const_get(XML, rb_intern("Text"));
233
270
  CDATA = rb_const_get(XML, rb_intern("CDATA"));
@@ -125,6 +125,13 @@ class TestNokogumbo < Minitest::Test
125
125
  assert_equal ["html", "comment", "html", "comment"], doc.children.map(&:name)
126
126
  end
127
127
 
128
+ def test_parse_errors
129
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>")
130
+ assert_equal doc.errors.length, 2
131
+ doc = Nokogiri::HTML5("<!DOCTYPE html><html>")
132
+ assert_empty doc.errors
133
+ end
134
+
128
135
  private
129
136
 
130
137
  def buffer
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.9
4
+ version: 1.4.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-01 00:00:00.000000000 Z
11
+ date: 2016-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
@@ -32,11 +32,10 @@ extensions:
32
32
  - ext/nokogumboc/extconf.rb
33
33
  extra_rdoc_files: []
34
34
  files:
35
- - ext/nokogumboc/extconf.rb
36
- - ext/nokogumboc/nokogumbo.c
37
- - lib/nokogumbo.rb
38
35
  - LICENSE.txt
39
36
  - README.md
37
+ - ext/nokogumboc/extconf.rb
38
+ - ext/nokogumboc/nokogumbo.c
40
39
  - gumbo-parser/src/attribute.c
41
40
  - gumbo-parser/src/attribute.h
42
41
  - gumbo-parser/src/char_ref.c
@@ -69,10 +68,11 @@ files:
69
68
  - gumbo-parser/src/vector.c
70
69
  - gumbo-parser/src/vector.h
71
70
  - gumbo-parser/visualc/include/strings.h
71
+ - lib/nokogumbo.rb
72
72
  - test-nokogumbo.rb
73
73
  homepage: https://github.com/rubys/nokogumbo/#readme
74
74
  licenses:
75
- - Apache 2.0
75
+ - Apache-2.0
76
76
  metadata: {}
77
77
  post_install_message:
78
78
  rdoc_options: []
@@ -80,17 +80,17 @@ require_paths:
80
80
  - lib
81
81
  required_ruby_version: !ruby/object:Gem::Requirement
82
82
  requirements:
83
- - - '>='
83
+ - - ">="
84
84
  - !ruby/object:Gem::Version
85
85
  version: '0'
86
86
  required_rubygems_version: !ruby/object:Gem::Requirement
87
87
  requirements:
88
- - - '>='
88
+ - - ">="
89
89
  - !ruby/object:Gem::Version
90
90
  version: '0'
91
91
  requirements: []
92
92
  rubyforge_project:
93
- rubygems_version: 2.0.14.1
93
+ rubygems_version: 2.5.1
94
94
  signing_key:
95
95
  specification_version: 4
96
96
  summary: Nokogiri interface to the Gumbo HTML5 parser