nokogumbo 1.4.9 → 1.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokogumboc/nokogumbo.c +45 -8
- data/test-nokogumbo.rb +7 -0
- metadata +11 -11
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c97bf9e43a0be85f20cf07ec15901052a1f9dbe1
|
|
4
|
+
data.tar.gz: 4c195ae6b3fc161648b7fc0c7eea06d14e8bd451
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f49c427bdb96bce746e541c7864c755b0ece284f0f7fff8d0f3913403b8c9e9941b1d9c06ddec25142be4ee7ead2c97e96b9eac26eeee119cbb6cfc1879d5f7b
|
|
7
|
+
data.tar.gz: bf2b358e4f496f5f8e1c3b308e8c7befc308b9dda1af7399d4baa73cc59a395f630d315a5791565a088d7dfdf9c9a0eaaea5638798da6a2dfc6cea559df1fa25
|
data/ext/nokogumboc/nokogumbo.c
CHANGED
|
@@ -19,10 +19,13 @@
|
|
|
19
19
|
//
|
|
20
20
|
|
|
21
21
|
#include <ruby.h>
|
|
22
|
-
#include
|
|
22
|
+
#include "gumbo.h"
|
|
23
|
+
#include "error.h"
|
|
24
|
+
#include "parser.h"
|
|
23
25
|
|
|
24
26
|
// class constants
|
|
25
27
|
static VALUE Document;
|
|
28
|
+
static VALUE XMLSyntaxError;
|
|
26
29
|
|
|
27
30
|
#ifdef NGLIB
|
|
28
31
|
#include <nokogiri.h>
|
|
@@ -182,10 +185,10 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) {
|
|
|
182
185
|
|
|
183
186
|
// Parse a string using gumbo_parse into a Nokogiri document
|
|
184
187
|
static VALUE parse(VALUE self, VALUE string) {
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
);
|
|
188
|
+
const GumboOptions *options = &kGumboDefaultOptions;
|
|
189
|
+
const char *input = RSTRING_PTR(string);
|
|
190
|
+
size_t input_len = RSTRING_LEN(string);
|
|
191
|
+
GumboOutput *output = gumbo_parse_with_options(options, input, input_len);
|
|
189
192
|
xmlDocPtr doc = xmlNewDoc(CONST_CAST "1.0");
|
|
190
193
|
#ifdef NGLIB
|
|
191
194
|
doc->type = XML_HTML_DOCUMENT_NODE;
|
|
@@ -210,9 +213,42 @@ static VALUE parse(VALUE self, VALUE string) {
|
|
|
210
213
|
xmlAddChild((xmlNodePtr)doc, node);
|
|
211
214
|
}
|
|
212
215
|
}
|
|
213
|
-
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
|
214
216
|
|
|
215
|
-
|
|
217
|
+
VALUE rdoc = Nokogiri_wrap_xml_document(Document, doc);
|
|
218
|
+
|
|
219
|
+
// Add parse errors to rdoc.
|
|
220
|
+
if (output->errors.length) {
|
|
221
|
+
GumboVector *errors = &output->errors;
|
|
222
|
+
GumboParser parser = { ._options = options };
|
|
223
|
+
GumboStringBuffer msg;
|
|
224
|
+
VALUE rerrors = rb_ary_new2(errors->length);
|
|
225
|
+
|
|
226
|
+
gumbo_string_buffer_init(&parser, &msg);
|
|
227
|
+
for (int i=0; i < errors->length; i++) {
|
|
228
|
+
GumboError *err = errors->data[i];
|
|
229
|
+
gumbo_string_buffer_clear(&parser, &msg);
|
|
230
|
+
gumbo_caret_diagnostic_to_string(&parser, err, input, &msg);
|
|
231
|
+
VALUE err_str = rb_str_new(msg.data, msg.length);
|
|
232
|
+
VALUE syntax_error = rb_class_new_instance(1, &err_str, XMLSyntaxError);
|
|
233
|
+
rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
|
|
234
|
+
rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR
|
|
235
|
+
rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR
|
|
236
|
+
rb_iv_set(syntax_error, "@file", Qnil);
|
|
237
|
+
rb_iv_set(syntax_error, "@line", INT2NUM(err->position.line));
|
|
238
|
+
rb_iv_set(syntax_error, "@str1", Qnil);
|
|
239
|
+
rb_iv_set(syntax_error, "@str2", Qnil);
|
|
240
|
+
rb_iv_set(syntax_error, "@str3", Qnil);
|
|
241
|
+
rb_iv_set(syntax_error, "@int1", INT2NUM(err->type));
|
|
242
|
+
rb_iv_set(syntax_error, "@column", INT2NUM(err->position.column));
|
|
243
|
+
rb_ary_push(rerrors, syntax_error);
|
|
244
|
+
}
|
|
245
|
+
rb_iv_set(rdoc, "@errors", rerrors);
|
|
246
|
+
gumbo_string_buffer_destroy(&parser, &msg);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
gumbo_destroy_output(options, output);
|
|
250
|
+
|
|
251
|
+
return rdoc;
|
|
216
252
|
}
|
|
217
253
|
|
|
218
254
|
// Initialize the Nokogumbo class and fetch constants we will use later
|
|
@@ -224,10 +260,11 @@ void Init_nokogumboc() {
|
|
|
224
260
|
VALUE Nokogiri = rb_const_get(rb_cObject, rb_intern("Nokogiri"));
|
|
225
261
|
VALUE HTML = rb_const_get(Nokogiri, rb_intern("HTML"));
|
|
226
262
|
Document = rb_const_get(HTML, rb_intern("Document"));
|
|
263
|
+
VALUE XML = rb_const_get(Nokogiri, rb_intern("XML"));
|
|
264
|
+
XMLSyntaxError = rb_const_get(XML, rb_intern("SyntaxError"));
|
|
227
265
|
|
|
228
266
|
#ifndef NGLIB
|
|
229
267
|
// more class constants
|
|
230
|
-
VALUE XML = rb_const_get(Nokogiri, rb_intern("XML"));
|
|
231
268
|
Element = rb_const_get(XML, rb_intern("Element"));
|
|
232
269
|
Text = rb_const_get(XML, rb_intern("Text"));
|
|
233
270
|
CDATA = rb_const_get(XML, rb_intern("CDATA"));
|
data/test-nokogumbo.rb
CHANGED
|
@@ -125,6 +125,13 @@ class TestNokogumbo < Minitest::Test
|
|
|
125
125
|
assert_equal ["html", "comment", "html", "comment"], doc.children.map(&:name)
|
|
126
126
|
end
|
|
127
127
|
|
|
128
|
+
def test_parse_errors
|
|
129
|
+
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>")
|
|
130
|
+
assert_equal doc.errors.length, 2
|
|
131
|
+
doc = Nokogiri::HTML5("<!DOCTYPE html><html>")
|
|
132
|
+
assert_empty doc.errors
|
|
133
|
+
end
|
|
134
|
+
|
|
128
135
|
private
|
|
129
136
|
|
|
130
137
|
def buffer
|
metadata
CHANGED
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: nokogumbo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.4.
|
|
4
|
+
version: 1.4.10
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sam Ruby
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-
|
|
11
|
+
date: 2016-11-01 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- -
|
|
17
|
+
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
19
|
version: '0'
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- -
|
|
24
|
+
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '0'
|
|
27
27
|
description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
|
|
@@ -32,11 +32,10 @@ extensions:
|
|
|
32
32
|
- ext/nokogumboc/extconf.rb
|
|
33
33
|
extra_rdoc_files: []
|
|
34
34
|
files:
|
|
35
|
-
- ext/nokogumboc/extconf.rb
|
|
36
|
-
- ext/nokogumboc/nokogumbo.c
|
|
37
|
-
- lib/nokogumbo.rb
|
|
38
35
|
- LICENSE.txt
|
|
39
36
|
- README.md
|
|
37
|
+
- ext/nokogumboc/extconf.rb
|
|
38
|
+
- ext/nokogumboc/nokogumbo.c
|
|
40
39
|
- gumbo-parser/src/attribute.c
|
|
41
40
|
- gumbo-parser/src/attribute.h
|
|
42
41
|
- gumbo-parser/src/char_ref.c
|
|
@@ -69,10 +68,11 @@ files:
|
|
|
69
68
|
- gumbo-parser/src/vector.c
|
|
70
69
|
- gumbo-parser/src/vector.h
|
|
71
70
|
- gumbo-parser/visualc/include/strings.h
|
|
71
|
+
- lib/nokogumbo.rb
|
|
72
72
|
- test-nokogumbo.rb
|
|
73
73
|
homepage: https://github.com/rubys/nokogumbo/#readme
|
|
74
74
|
licenses:
|
|
75
|
-
- Apache
|
|
75
|
+
- Apache-2.0
|
|
76
76
|
metadata: {}
|
|
77
77
|
post_install_message:
|
|
78
78
|
rdoc_options: []
|
|
@@ -80,17 +80,17 @@ require_paths:
|
|
|
80
80
|
- lib
|
|
81
81
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
82
82
|
requirements:
|
|
83
|
-
- -
|
|
83
|
+
- - ">="
|
|
84
84
|
- !ruby/object:Gem::Version
|
|
85
85
|
version: '0'
|
|
86
86
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
87
|
requirements:
|
|
88
|
-
- -
|
|
88
|
+
- - ">="
|
|
89
89
|
- !ruby/object:Gem::Version
|
|
90
90
|
version: '0'
|
|
91
91
|
requirements: []
|
|
92
92
|
rubyforge_project:
|
|
93
|
-
rubygems_version: 2.
|
|
93
|
+
rubygems_version: 2.5.1
|
|
94
94
|
signing_key:
|
|
95
95
|
specification_version: 4
|
|
96
96
|
summary: Nokogiri interface to the Gumbo HTML5 parser
|