nokogiri 1.11.0.rc3 → 1.11.0.rc4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +1015 -947
  3. data/README.md +1 -1
  4. data/ext/nokogiri/depend +476 -357
  5. data/ext/nokogiri/extconf.rb +441 -321
  6. data/ext/nokogiri/html_document.c +79 -78
  7. data/ext/nokogiri/html_sax_parser_context.c +2 -2
  8. data/ext/nokogiri/nokogiri.c +34 -46
  9. data/ext/nokogiri/nokogiri.h +22 -26
  10. data/ext/nokogiri/xml_document.c +2 -2
  11. data/ext/nokogiri/xml_node.c +1 -1
  12. data/ext/nokogiri/xml_node_set.c +1 -1
  13. data/ext/nokogiri/xml_relax_ng.c +29 -11
  14. data/ext/nokogiri/xml_sax_parser.c +2 -7
  15. data/ext/nokogiri/xml_sax_parser_context.c +2 -2
  16. data/ext/nokogiri/xml_schema.c +55 -13
  17. data/ext/nokogiri/xml_xpath_context.c +80 -4
  18. data/ext/nokogiri/xslt_stylesheet.c +1 -4
  19. data/lib/nokogiri.rb +1 -1
  20. data/lib/nokogiri/css/parser.rb +3 -3
  21. data/lib/nokogiri/css/parser.y +2 -2
  22. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  23. data/lib/nokogiri/html/document.rb +12 -26
  24. data/lib/nokogiri/version.rb +2 -149
  25. data/lib/nokogiri/version/constant.rb +5 -0
  26. data/lib/nokogiri/version/info.rb +182 -0
  27. data/lib/nokogiri/xml/document.rb +17 -7
  28. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  29. data/lib/nokogiri/xml/node.rb +50 -27
  30. data/lib/nokogiri/xml/parse_options.rb +6 -0
  31. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  32. data/lib/nokogiri/xml/schema.rb +12 -4
  33. data/lib/nokogiri/xml/searchable.rb +3 -1
  34. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
  35. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
  36. data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
  37. metadata +34 -22
@@ -506,7 +506,7 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
506
506
  * The block must return a non-nil, non-false value if the +obj+ passed in
507
507
  * should be included in the canonicalized document.
508
508
  */
509
- static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
509
+ static VALUE nokogiri_xml_document_canonicalize(int argc, VALUE* argv, VALUE self)
510
510
  {
511
511
  VALUE mode;
512
512
  VALUE incl_ns;
@@ -587,7 +587,7 @@ void init_xml_document()
587
587
  rb_define_method(klass, "encoding", encoding, 0);
588
588
  rb_define_method(klass, "encoding=", set_encoding, 1);
589
589
  rb_define_method(klass, "version", version, 0);
590
- rb_define_method(klass, "canonicalize", canonicalize, -1);
590
+ rb_define_method(klass, "canonicalize", nokogiri_xml_document_canonicalize, -1);
591
591
  rb_define_method(klass, "dup", duplicate_document, -1);
592
592
  rb_define_method(klass, "url", url, 0);
593
593
  rb_define_method(klass, "create_entity", create_entity, -1);
@@ -301,7 +301,7 @@ ok:
301
301
  * issue #391, where new node's prefix may become the string "default"
302
302
  * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior.
303
303
  */
304
- xmlFree(reparentee->ns->prefix);
304
+ xmlFree((xmlChar*)reparentee->ns->prefix);
305
305
  reparentee->ns->prefix = NULL;
306
306
  }
307
307
  }
@@ -319,7 +319,7 @@ static VALUE slice(int argc, VALUE *argv, VALUE self)
319
319
  *
320
320
  * Return this list as an Array
321
321
  */
322
- static VALUE to_array(VALUE self, VALUE rb_node)
322
+ static VALUE to_array(VALUE self)
323
323
  {
324
324
  xmlNodeSetPtr node_set ;
325
325
  VALUE list;
@@ -53,16 +53,24 @@ static VALUE validate_document(VALUE self, VALUE document)
53
53
  *
54
54
  * Create a new RelaxNG from the contents of +string+
55
55
  */
56
- static VALUE read_memory(VALUE klass, VALUE content)
56
+ static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
57
57
  {
58
- xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
59
- (const char *)StringValuePtr(content),
60
- (int)RSTRING_LEN(content)
61
- );
58
+ VALUE content;
59
+ VALUE parse_options;
60
+ xmlRelaxNGParserCtxtPtr ctx;
62
61
  xmlRelaxNGPtr schema;
63
- VALUE errors = rb_ary_new();
62
+ VALUE errors;
64
63
  VALUE rb_schema;
64
+ int scanned_args = 0;
65
+
66
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
67
+ if (scanned_args == 1) {
68
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
69
+ }
65
70
 
71
+ ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
72
+
73
+ errors = rb_ary_new();
66
74
  xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
67
75
 
68
76
  #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
@@ -90,6 +98,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
90
98
 
91
99
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
92
100
  rb_iv_set(rb_schema, "@errors", errors);
101
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
93
102
 
94
103
  return rb_schema;
95
104
  }
@@ -100,18 +109,25 @@ static VALUE read_memory(VALUE klass, VALUE content)
100
109
  *
101
110
  * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
102
111
  */
103
- static VALUE from_document(VALUE klass, VALUE document)
112
+ static VALUE from_document(int argc, VALUE *argv, VALUE klass)
104
113
  {
114
+ VALUE document;
115
+ VALUE parse_options;
105
116
  xmlDocPtr doc;
106
117
  xmlRelaxNGParserCtxtPtr ctx;
107
118
  xmlRelaxNGPtr schema;
108
119
  VALUE errors;
109
120
  VALUE rb_schema;
121
+ int scanned_args = 0;
122
+
123
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
110
124
 
111
125
  Data_Get_Struct(document, xmlDoc, doc);
126
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
112
127
 
113
- /* In case someone passes us a node. ugh. */
114
- doc = doc->doc;
128
+ if (scanned_args == 1) {
129
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
130
+ }
115
131
 
116
132
  ctx = xmlRelaxNGNewDocParserCtxt(doc);
117
133
 
@@ -129,6 +145,7 @@ static VALUE from_document(VALUE klass, VALUE document)
129
145
  schema = xmlRelaxNGParse(ctx);
130
146
 
131
147
  xmlSetStructuredErrorFunc(NULL, NULL);
148
+ xmlRelaxNGFreeParserCtxt(ctx);
132
149
 
133
150
  if(NULL == schema) {
134
151
  xmlErrorPtr error = xmlGetLastError();
@@ -142,6 +159,7 @@ static VALUE from_document(VALUE klass, VALUE document)
142
159
 
143
160
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
144
161
  rb_iv_set(rb_schema, "@errors", errors);
162
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
145
163
 
146
164
  return rb_schema;
147
165
  }
@@ -155,7 +173,7 @@ void init_xml_relax_ng()
155
173
 
156
174
  cNokogiriXmlRelaxNG = klass;
157
175
 
158
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
159
- rb_define_singleton_method(klass, "from_document", from_document, 1);
176
+ rb_define_singleton_method(klass, "read_memory", read_memory, -1);
177
+ rb_define_singleton_method(klass, "from_document", from_document, -1);
160
178
  rb_define_private_method(klass, "validate_document", validate_document, 1);
161
179
  }
@@ -1,8 +1,5 @@
1
1
  #include <xml_sax_parser.h>
2
2
 
3
- int vasprintf (char **strp, const char *fmt, va_list ap);
4
- void vasprintf_free (void *p);
5
-
6
3
  static ID id_start_document, id_end_document, id_start_element, id_end_element;
7
4
  static ID id_start_element_namespace, id_end_element_namespace;
8
5
  static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
@@ -206,7 +203,7 @@ static void warning_func(void * ctx, const char *msg, ...)
206
203
  va_end(args);
207
204
 
208
205
  ruby_message = NOKOGIRI_STR_NEW2(message);
209
- vasprintf_free(message);
206
+ free(message);
210
207
  rb_funcall(doc, id_warning, 1, ruby_message);
211
208
  }
212
209
 
@@ -223,7 +220,7 @@ static void error_func(void * ctx, const char *msg, ...)
223
220
  va_end(args);
224
221
 
225
222
  ruby_message = NOKOGIRI_STR_NEW2(message);
226
- vasprintf_free(message);
223
+ free(message);
227
224
  rb_funcall(doc, id_error, 1, ruby_message);
228
225
  }
229
226
 
@@ -262,8 +259,6 @@ static VALUE allocate(VALUE klass)
262
259
  {
263
260
  xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
264
261
 
265
- xmlSetStructuredErrorFunc(NULL, NULL);
266
-
267
262
  handler->startDocument = start_document;
268
263
  handler->endDocument = end_document;
269
264
  handler->startElement = start_element;
@@ -4,13 +4,13 @@ VALUE cNokogiriXmlSaxParserContext ;
4
4
 
5
5
  static void deallocate(xmlParserCtxtPtr ctxt)
6
6
  {
7
- NOKOGIRI_DEBUG_START(handler);
7
+ NOKOGIRI_DEBUG_START(ctxt);
8
8
 
9
9
  ctxt->sax = NULL;
10
10
 
11
11
  xmlFreeParserCtxt(ctxt);
12
12
 
13
- NOKOGIRI_DEBUG_END(handler);
13
+ NOKOGIRI_DEBUG_END(ctxt);
14
14
  }
15
15
 
16
16
  /*
@@ -93,15 +93,27 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
93
93
  *
94
94
  * Create a new Schema from the contents of +string+
95
95
  */
96
- static VALUE read_memory(VALUE klass, VALUE content)
96
+ static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
97
97
  {
98
+ VALUE content;
99
+ VALUE parse_options;
100
+ int parse_options_int;
101
+ xmlSchemaParserCtxtPtr ctx;
98
102
  xmlSchemaPtr schema;
99
- xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
100
- (const char *)StringValuePtr(content),
101
- (int)RSTRING_LEN(content)
102
- );
103
+ VALUE errors;
103
104
  VALUE rb_schema;
104
- VALUE errors = rb_ary_new();
105
+ int scanned_args = 0;
106
+ xmlExternalEntityLoader old_loader = 0;
107
+
108
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
109
+ if (scanned_args == 1) {
110
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
111
+ }
112
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
113
+
114
+ ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
115
+
116
+ errors = rb_ary_new();
105
117
  xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
106
118
 
107
119
  #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
@@ -109,10 +121,19 @@ static VALUE read_memory(VALUE klass, VALUE content)
109
121
  ctx,
110
122
  Nokogiri_error_array_pusher,
111
123
  (void *)errors
112
- );
124
+ );
113
125
  #endif
114
126
 
115
- schema = xmlSchemaParse(ctx);
127
+ if (parse_options_int & XML_PARSE_NONET) {
128
+ old_loader = xmlGetExternalEntityLoader();
129
+ xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
130
+ }
131
+
132
+ schema = xmlSchemaParse(ctx);
133
+
134
+ if (old_loader) {
135
+ xmlSetExternalEntityLoader(old_loader);
136
+ }
116
137
 
117
138
  xmlSetStructuredErrorFunc(NULL, NULL);
118
139
  xmlSchemaFreeParserCtxt(ctx);
@@ -129,6 +150,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
129
150
 
130
151
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
131
152
  rb_iv_set(rb_schema, "@errors", errors);
153
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
132
154
 
133
155
  return rb_schema;
134
156
  }
@@ -164,18 +186,28 @@ static int has_blank_nodes_p(VALUE cache)
164
186
  *
165
187
  * Create a new Schema from the Nokogiri::XML::Document +doc+
166
188
  */
167
- static VALUE from_document(VALUE klass, VALUE document)
189
+ static VALUE from_document(int argc, VALUE *argv, VALUE klass)
168
190
  {
191
+ VALUE document;
192
+ VALUE parse_options;
193
+ int parse_options_int;
169
194
  xmlDocPtr doc;
170
195
  xmlSchemaParserCtxtPtr ctx;
171
196
  xmlSchemaPtr schema;
172
197
  VALUE errors;
173
198
  VALUE rb_schema;
199
+ int scanned_args = 0;
200
+ xmlExternalEntityLoader old_loader = 0;
201
+
202
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
174
203
 
175
204
  Data_Get_Struct(document, xmlDoc, doc);
205
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
176
206
 
177
- /* In case someone passes us a node. ugh. */
178
- doc = doc->doc;
207
+ if (scanned_args == 1) {
208
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
209
+ }
210
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
179
211
 
180
212
  if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
181
213
  rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
@@ -194,8 +226,17 @@ static VALUE from_document(VALUE klass, VALUE document)
194
226
  );
195
227
  #endif
196
228
 
229
+ if (parse_options_int & XML_PARSE_NONET) {
230
+ old_loader = xmlGetExternalEntityLoader();
231
+ xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
232
+ }
233
+
197
234
  schema = xmlSchemaParse(ctx);
198
235
 
236
+ if (old_loader) {
237
+ xmlSetExternalEntityLoader(old_loader);
238
+ }
239
+
199
240
  xmlSetStructuredErrorFunc(NULL, NULL);
200
241
  xmlSchemaFreeParserCtxt(ctx);
201
242
 
@@ -211,6 +252,7 @@ static VALUE from_document(VALUE klass, VALUE document)
211
252
 
212
253
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
213
254
  rb_iv_set(rb_schema, "@errors", errors);
255
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
214
256
 
215
257
  return rb_schema;
216
258
 
@@ -226,8 +268,8 @@ void init_xml_schema()
226
268
 
227
269
  cNokogiriXmlSchema = klass;
228
270
 
229
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
230
- rb_define_singleton_method(klass, "from_document", from_document, 1);
271
+ rb_define_singleton_method(klass, "read_memory", read_memory, -1);
272
+ rb_define_singleton_method(klass, "from_document", from_document, -1);
231
273
 
232
274
  rb_define_private_method(klass, "validate_document", validate_document, 1);
233
275
  rb_define_private_method(klass, "validate_file", validate_file, 1);
@@ -1,6 +1,11 @@
1
1
  #include <xml_xpath_context.h>
2
2
 
3
- int vasprintf (char **strp, const char *fmt, va_list ap);
3
+ /*
4
+ * these constants have matching declarations in
5
+ * ext/java/nokogiri/internals/NokogiriNamespaceContext.java
6
+ */
7
+ static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
8
+ static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
4
9
 
5
10
  static void deallocate(xmlXPathContextPtr ctx)
6
11
  {
@@ -9,6 +14,73 @@ static void deallocate(xmlXPathContextPtr ctx)
9
14
  NOKOGIRI_DEBUG_END(ctx);
10
15
  }
11
16
 
17
+ /* find a CSS class in an HTML element's `class` attribute */
18
+ const xmlChar* builtin_css_class(const xmlChar* str, const xmlChar *val)
19
+ {
20
+ int val_len;
21
+
22
+ if (str == NULL) { return(NULL); }
23
+ if (val == NULL) { return(NULL); }
24
+
25
+ val_len = xmlStrlen(val);
26
+ if (val_len == 0) { return(str); }
27
+
28
+ while (*str != 0) {
29
+ if ((*str == *val) && !xmlStrncmp(str, val, val_len)) {
30
+ const xmlChar* next_byte = str + val_len;
31
+
32
+ /* only match if the next byte is whitespace or end of string */
33
+ if ((*next_byte == 0) || (IS_BLANK_CH(*next_byte))) {
34
+ return((const xmlChar*)str);
35
+ }
36
+ }
37
+
38
+ /* advance str to whitespace */
39
+ while ((*str != 0) && !IS_BLANK_CH(*str)) {
40
+ str++;
41
+ }
42
+
43
+ /* advance str to start of next word or end of string */
44
+ while ((*str != 0) && IS_BLANK_CH(*str)) {
45
+ str++;
46
+ }
47
+ }
48
+
49
+ return(NULL);
50
+ }
51
+
52
+ /* xmlXPathFunction to wrap builtin_css_class() */
53
+ static void xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
54
+ {
55
+ xmlXPathObjectPtr hay, needle;
56
+
57
+ CHECK_ARITY(2);
58
+
59
+ CAST_TO_STRING;
60
+ needle = valuePop(ctxt);
61
+ if ((needle == NULL) || (needle->type != XPATH_STRING)) {
62
+ xmlXPathFreeObject(needle);
63
+ XP_ERROR(XPATH_INVALID_TYPE);
64
+ }
65
+
66
+ CAST_TO_STRING;
67
+ hay = valuePop(ctxt);
68
+ if ((hay == NULL) || (hay->type != XPATH_STRING)) {
69
+ xmlXPathFreeObject(hay);
70
+ xmlXPathFreeObject(needle);
71
+ XP_ERROR(XPATH_INVALID_TYPE);
72
+ }
73
+
74
+ if (builtin_css_class(hay->stringval, needle->stringval)) {
75
+ valuePush(ctxt, xmlXPathNewBoolean(1));
76
+ } else {
77
+ valuePush(ctxt, xmlXPathNewBoolean(0));
78
+ }
79
+
80
+ xmlXPathFreeObject(hay);
81
+ xmlXPathFreeObject(needle);
82
+ }
83
+
12
84
  /*
13
85
  * call-seq:
14
86
  * register_ns(prefix, uri)
@@ -263,14 +335,18 @@ static VALUE new(VALUE klass, VALUE nodeobj)
263
335
  xmlXPathContextPtr ctx;
264
336
  VALUE self;
265
337
 
266
- xmlXPathInit();
267
-
268
338
  Data_Get_Struct(nodeobj, xmlNode, node);
269
339
 
340
+ xmlXPathInit();
341
+
270
342
  ctx = xmlXPathNewContext(node->doc);
271
343
  ctx->node = node;
344
+
345
+ xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
346
+ xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
347
+ xpath_builtin_css_class);
348
+
272
349
  self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
273
- /*rb_iv_set(self, "@xpath_handler", Qnil); */
274
350
  return self;
275
351
  }
276
352
 
@@ -7,9 +7,6 @@
7
7
 
8
8
  VALUE xslt;
9
9
 
10
- int vasprintf (char **strp, const char *fmt, va_list ap);
11
- void vasprintf_free (void *p);
12
-
13
10
  static void mark(nokogiriXsltStylesheetTuple *wrapper)
14
11
  {
15
12
  rb_gc_mark(wrapper->func_instances);
@@ -37,7 +34,7 @@ static void xslt_generic_error_handler(void * ctx, const char *msg, ...)
37
34
 
38
35
  rb_str_cat2((VALUE)ctx, message);
39
36
 
40
- vasprintf_free(message);
37
+ free(message);
41
38
  }
42
39
 
43
40
  VALUE Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
@@ -113,7 +113,7 @@ module Nokogiri
113
113
  end
114
114
 
115
115
  ###
116
- # Parser a document contained in +args+. Nokogiri will try to guess what
116
+ # Parse a document contained in +args+. Nokogiri will try to guess what
117
117
  # type of document you are attempting to parse. For more information, see
118
118
  # Nokogiri.parse
119
119
  #