nokogiri 1.10.6-java → 1.11.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (162) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/README.md +165 -91
  5. data/ext/java/nokogiri/HtmlDocument.java +34 -46
  6. data/ext/java/nokogiri/HtmlSaxParserContext.java +88 -58
  7. data/ext/java/nokogiri/HtmlSaxPushParser.java +1 -1
  8. data/ext/java/nokogiri/NokogiriService.java +1 -1
  9. data/ext/java/nokogiri/XmlAttr.java +13 -20
  10. data/ext/java/nokogiri/XmlAttributeDecl.java +11 -12
  11. data/ext/java/nokogiri/XmlCdata.java +3 -4
  12. data/ext/java/nokogiri/XmlComment.java +1 -1
  13. data/ext/java/nokogiri/XmlDocument.java +148 -175
  14. data/ext/java/nokogiri/XmlDocumentFragment.java +13 -31
  15. data/ext/java/nokogiri/XmlDtd.java +5 -8
  16. data/ext/java/nokogiri/XmlElement.java +1 -20
  17. data/ext/java/nokogiri/XmlElementDecl.java +23 -28
  18. data/ext/java/nokogiri/XmlEntityDecl.java +23 -27
  19. data/ext/java/nokogiri/XmlEntityReference.java +2 -2
  20. data/ext/java/nokogiri/XmlNamespace.java +72 -89
  21. data/ext/java/nokogiri/XmlNode.java +303 -406
  22. data/ext/java/nokogiri/XmlNodeSet.java +72 -77
  23. data/ext/java/nokogiri/XmlReader.java +12 -13
  24. data/ext/java/nokogiri/XmlRelaxng.java +10 -3
  25. data/ext/java/nokogiri/XmlSaxParserContext.java +15 -10
  26. data/ext/java/nokogiri/XmlSchema.java +87 -27
  27. data/ext/java/nokogiri/XmlSyntaxError.java +2 -6
  28. data/ext/java/nokogiri/XmlText.java +12 -9
  29. data/ext/java/nokogiri/XmlXpathContext.java +55 -25
  30. data/ext/java/nokogiri/XsltStylesheet.java +7 -15
  31. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +52 -46
  32. data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
  33. data/ext/java/nokogiri/internals/NokogiriHelpers.java +71 -135
  34. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +90 -58
  35. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +9 -2
  36. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +67 -10
  37. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +4 -2
  38. data/ext/java/nokogiri/internals/ParserContext.java +27 -73
  39. data/ext/java/nokogiri/internals/ReaderNode.java +2 -4
  40. data/ext/java/nokogiri/internals/XmlDomParserContext.java +18 -33
  41. data/ext/nokogiri/depend +476 -357
  42. data/ext/nokogiri/extconf.rb +507 -357
  43. data/ext/nokogiri/html_document.c +79 -78
  44. data/ext/nokogiri/html_sax_parser_context.c +2 -2
  45. data/ext/nokogiri/nokogiri.c +34 -40
  46. data/ext/nokogiri/xml_document.c +18 -4
  47. data/ext/nokogiri/xml_io.c +8 -6
  48. data/ext/nokogiri/xml_node.c +21 -1
  49. data/ext/nokogiri/xml_node_set.c +1 -1
  50. data/ext/nokogiri/xml_reader.c +6 -17
  51. data/ext/nokogiri/xml_relax_ng.c +29 -11
  52. data/ext/nokogiri/xml_sax_parser.c +2 -7
  53. data/ext/nokogiri/xml_sax_parser_context.c +2 -2
  54. data/ext/nokogiri/xml_schema.c +84 -13
  55. data/ext/nokogiri/xml_xpath_context.c +80 -4
  56. data/ext/nokogiri/xslt_stylesheet.c +1 -8
  57. data/lib/nokogiri.rb +22 -22
  58. data/lib/nokogiri/css.rb +1 -0
  59. data/lib/nokogiri/css/node.rb +1 -0
  60. data/lib/nokogiri/css/parser.rb +63 -62
  61. data/lib/nokogiri/css/parser.y +2 -2
  62. data/lib/nokogiri/css/parser_extras.rb +39 -36
  63. data/lib/nokogiri/css/syntax_error.rb +1 -0
  64. data/lib/nokogiri/css/tokenizer.rb +1 -0
  65. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  66. data/lib/nokogiri/decorators/slop.rb +1 -0
  67. data/lib/nokogiri/html.rb +1 -0
  68. data/lib/nokogiri/html/builder.rb +1 -0
  69. data/lib/nokogiri/html/document.rb +13 -26
  70. data/lib/nokogiri/html/document_fragment.rb +1 -0
  71. data/lib/nokogiri/html/element_description.rb +1 -0
  72. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  73. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  74. data/lib/nokogiri/html/sax/parser.rb +1 -0
  75. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  76. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  77. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  78. data/lib/nokogiri/nokogiri.jar +0 -0
  79. data/lib/nokogiri/syntax_error.rb +1 -0
  80. data/lib/nokogiri/version.rb +3 -109
  81. data/lib/nokogiri/version/constant.rb +5 -0
  82. data/lib/nokogiri/version/info.rb +182 -0
  83. data/lib/nokogiri/xml.rb +1 -0
  84. data/lib/nokogiri/xml/attr.rb +1 -0
  85. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  86. data/lib/nokogiri/xml/builder.rb +3 -2
  87. data/lib/nokogiri/xml/cdata.rb +1 -0
  88. data/lib/nokogiri/xml/character_data.rb +1 -0
  89. data/lib/nokogiri/xml/document.rb +20 -15
  90. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  91. data/lib/nokogiri/xml/dtd.rb +1 -0
  92. data/lib/nokogiri/xml/element_content.rb +1 -0
  93. data/lib/nokogiri/xml/element_decl.rb +1 -0
  94. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  95. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  96. data/lib/nokogiri/xml/namespace.rb +1 -0
  97. data/lib/nokogiri/xml/node.rb +587 -249
  98. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  99. data/lib/nokogiri/xml/node_set.rb +1 -0
  100. data/lib/nokogiri/xml/notation.rb +1 -0
  101. data/lib/nokogiri/xml/parse_options.rb +10 -3
  102. data/lib/nokogiri/xml/pp.rb +1 -0
  103. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  104. data/lib/nokogiri/xml/pp/node.rb +1 -0
  105. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  106. data/lib/nokogiri/xml/reader.rb +7 -3
  107. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  108. data/lib/nokogiri/xml/sax.rb +1 -0
  109. data/lib/nokogiri/xml/sax/document.rb +1 -0
  110. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  111. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  112. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  113. data/lib/nokogiri/xml/schema.rb +13 -4
  114. data/lib/nokogiri/xml/searchable.rb +25 -16
  115. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  116. data/lib/nokogiri/xml/text.rb +1 -0
  117. data/lib/nokogiri/xml/xpath.rb +1 -0
  118. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
  119. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  120. data/lib/nokogiri/xslt.rb +1 -0
  121. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  122. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  123. metadata +92 -157
  124. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +0 -107
  125. data/ext/java/nokogiri/internals/UncloseableInputStream.java +0 -102
  126. data/ext/nokogiri/html_document.h +0 -10
  127. data/ext/nokogiri/html_element_description.h +0 -10
  128. data/ext/nokogiri/html_entity_lookup.h +0 -8
  129. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  130. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  131. data/ext/nokogiri/nokogiri.h +0 -121
  132. data/ext/nokogiri/xml_attr.h +0 -9
  133. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  134. data/ext/nokogiri/xml_cdata.h +0 -9
  135. data/ext/nokogiri/xml_comment.h +0 -9
  136. data/ext/nokogiri/xml_document.h +0 -23
  137. data/ext/nokogiri/xml_document_fragment.h +0 -10
  138. data/ext/nokogiri/xml_dtd.h +0 -10
  139. data/ext/nokogiri/xml_element_content.h +0 -10
  140. data/ext/nokogiri/xml_element_decl.h +0 -9
  141. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  142. data/ext/nokogiri/xml_entity_decl.h +0 -10
  143. data/ext/nokogiri/xml_entity_reference.h +0 -9
  144. data/ext/nokogiri/xml_io.h +0 -11
  145. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  146. data/ext/nokogiri/xml_namespace.h +0 -14
  147. data/ext/nokogiri/xml_node.h +0 -13
  148. data/ext/nokogiri/xml_node_set.h +0 -12
  149. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  150. data/ext/nokogiri/xml_reader.h +0 -10
  151. data/ext/nokogiri/xml_relax_ng.h +0 -9
  152. data/ext/nokogiri/xml_sax_parser.h +0 -39
  153. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  154. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  155. data/ext/nokogiri/xml_schema.h +0 -9
  156. data/ext/nokogiri/xml_syntax_error.h +0 -13
  157. data/ext/nokogiri/xml_text.h +0 -9
  158. data/ext/nokogiri/xml_xpath_context.h +0 -10
  159. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  160. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  161. data/patches/libxml2/0002-Remove-script-macro-support.patch +0 -40
  162. data/patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch +0 -44
@@ -301,7 +301,7 @@ ok:
301
301
  * issue #391, where new node's prefix may become the string "default"
302
302
  * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior.
303
303
  */
304
- xmlFree(reparentee->ns->prefix);
304
+ xmlFree((xmlChar*)reparentee->ns->prefix);
305
305
  reparentee->ns->prefix = NULL;
306
306
  }
307
307
  }
@@ -1332,6 +1332,25 @@ static VALUE line(VALUE self)
1332
1332
  return INT2NUM(xmlGetLineNo(node));
1333
1333
  }
1334
1334
 
1335
+ /*
1336
+ * call-seq:
1337
+ * line=(num)
1338
+ *
1339
+ * Sets the line for this Node. num must be less than 65535.
1340
+ */
1341
+ static VALUE set_line(VALUE self, VALUE num)
1342
+ {
1343
+ xmlNodePtr node;
1344
+ Data_Get_Struct(self, xmlNode, node);
1345
+
1346
+ int value = NUM2INT(num);
1347
+ if (value < 65535) {
1348
+ node->line = value;
1349
+ }
1350
+
1351
+ return num;
1352
+ }
1353
+
1335
1354
  /*
1336
1355
  * call-seq:
1337
1356
  * add_namespace_definition(prefix, href)
@@ -1728,6 +1747,7 @@ void init_xml_node()
1728
1747
  rb_define_method(klass, "create_external_subset", create_external_subset, 3);
1729
1748
  rb_define_method(klass, "pointer_id", pointer_id, 0);
1730
1749
  rb_define_method(klass, "line", line, 0);
1750
+ rb_define_method(klass, "line=", set_line, 1);
1731
1751
  rb_define_method(klass, "content", get_native_content, 0);
1732
1752
  rb_define_method(klass, "native_content=", set_native_content, 1);
1733
1753
  rb_define_method(klass, "lang", get_lang, 0);
@@ -319,7 +319,7 @@ static VALUE slice(int argc, VALUE *argv, VALUE self)
319
319
  *
320
320
  * Return this list as an Array
321
321
  */
322
- static VALUE to_array(VALUE self, VALUE rb_node)
322
+ static VALUE to_array(VALUE self)
323
323
  {
324
324
  xmlNodeSetPtr node_set ;
325
325
  VALUE list;
@@ -28,35 +28,24 @@ static int has_attributes(xmlTextReaderPtr reader)
28
28
  static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
29
29
  {
30
30
  xmlNsPtr ns;
31
- static char buffer[XMLNS_BUFFER_LEN] ;
32
- char *key ;
33
- size_t keylen ;
31
+ VALUE key;
34
32
 
35
33
  if (node->type != XML_ELEMENT_NODE) return ;
36
34
 
37
35
  ns = node->nsDef;
38
36
  while (ns != NULL) {
39
37
 
40
- keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
41
- if (keylen > XMLNS_BUFFER_LEN) {
42
- key = (char*)malloc(keylen) ;
43
- } else {
44
- key = buffer ;
45
- }
46
-
38
+ key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding());
47
39
  if (ns->prefix) {
48
- sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
49
- } else {
50
- sprintf(key, "%s", XMLNS_PREFIX);
40
+ rb_str_cat_cstr(key, ":");
41
+ rb_str_cat_cstr(key, (const char*)ns->prefix);
51
42
  }
52
43
 
44
+ key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding());
53
45
  rb_hash_aset(attr_hash,
54
- NOKOGIRI_STR_NEW2(key),
46
+ key,
55
47
  (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
56
48
  );
57
- if (key != buffer) {
58
- free(key);
59
- }
60
49
  ns = ns->next ;
61
50
  }
62
51
  }
@@ -53,16 +53,24 @@ static VALUE validate_document(VALUE self, VALUE document)
53
53
  *
54
54
  * Create a new RelaxNG from the contents of +string+
55
55
  */
56
- static VALUE read_memory(VALUE klass, VALUE content)
56
+ static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
57
57
  {
58
- xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
59
- (const char *)StringValuePtr(content),
60
- (int)RSTRING_LEN(content)
61
- );
58
+ VALUE content;
59
+ VALUE parse_options;
60
+ xmlRelaxNGParserCtxtPtr ctx;
62
61
  xmlRelaxNGPtr schema;
63
- VALUE errors = rb_ary_new();
62
+ VALUE errors;
64
63
  VALUE rb_schema;
64
+ int scanned_args = 0;
65
+
66
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
67
+ if (scanned_args == 1) {
68
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
69
+ }
65
70
 
71
+ ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
72
+
73
+ errors = rb_ary_new();
66
74
  xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
67
75
 
68
76
  #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
@@ -90,6 +98,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
90
98
 
91
99
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
92
100
  rb_iv_set(rb_schema, "@errors", errors);
101
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
93
102
 
94
103
  return rb_schema;
95
104
  }
@@ -100,18 +109,25 @@ static VALUE read_memory(VALUE klass, VALUE content)
100
109
  *
101
110
  * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
102
111
  */
103
- static VALUE from_document(VALUE klass, VALUE document)
112
+ static VALUE from_document(int argc, VALUE *argv, VALUE klass)
104
113
  {
114
+ VALUE document;
115
+ VALUE parse_options;
105
116
  xmlDocPtr doc;
106
117
  xmlRelaxNGParserCtxtPtr ctx;
107
118
  xmlRelaxNGPtr schema;
108
119
  VALUE errors;
109
120
  VALUE rb_schema;
121
+ int scanned_args = 0;
122
+
123
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
110
124
 
111
125
  Data_Get_Struct(document, xmlDoc, doc);
126
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
112
127
 
113
- /* In case someone passes us a node. ugh. */
114
- doc = doc->doc;
128
+ if (scanned_args == 1) {
129
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
130
+ }
115
131
 
116
132
  ctx = xmlRelaxNGNewDocParserCtxt(doc);
117
133
 
@@ -129,6 +145,7 @@ static VALUE from_document(VALUE klass, VALUE document)
129
145
  schema = xmlRelaxNGParse(ctx);
130
146
 
131
147
  xmlSetStructuredErrorFunc(NULL, NULL);
148
+ xmlRelaxNGFreeParserCtxt(ctx);
132
149
 
133
150
  if(NULL == schema) {
134
151
  xmlErrorPtr error = xmlGetLastError();
@@ -142,6 +159,7 @@ static VALUE from_document(VALUE klass, VALUE document)
142
159
 
143
160
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
144
161
  rb_iv_set(rb_schema, "@errors", errors);
162
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
145
163
 
146
164
  return rb_schema;
147
165
  }
@@ -155,7 +173,7 @@ void init_xml_relax_ng()
155
173
 
156
174
  cNokogiriXmlRelaxNG = klass;
157
175
 
158
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
159
- rb_define_singleton_method(klass, "from_document", from_document, 1);
176
+ rb_define_singleton_method(klass, "read_memory", read_memory, -1);
177
+ rb_define_singleton_method(klass, "from_document", from_document, -1);
160
178
  rb_define_private_method(klass, "validate_document", validate_document, 1);
161
179
  }
@@ -1,8 +1,5 @@
1
1
  #include <xml_sax_parser.h>
2
2
 
3
- int vasprintf (char **strp, const char *fmt, va_list ap);
4
- void vasprintf_free (void *p);
5
-
6
3
  static ID id_start_document, id_end_document, id_start_element, id_end_element;
7
4
  static ID id_start_element_namespace, id_end_element_namespace;
8
5
  static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
@@ -206,7 +203,7 @@ static void warning_func(void * ctx, const char *msg, ...)
206
203
  va_end(args);
207
204
 
208
205
  ruby_message = NOKOGIRI_STR_NEW2(message);
209
- vasprintf_free(message);
206
+ free(message);
210
207
  rb_funcall(doc, id_warning, 1, ruby_message);
211
208
  }
212
209
 
@@ -223,7 +220,7 @@ static void error_func(void * ctx, const char *msg, ...)
223
220
  va_end(args);
224
221
 
225
222
  ruby_message = NOKOGIRI_STR_NEW2(message);
226
- vasprintf_free(message);
223
+ free(message);
227
224
  rb_funcall(doc, id_error, 1, ruby_message);
228
225
  }
229
226
 
@@ -262,8 +259,6 @@ static VALUE allocate(VALUE klass)
262
259
  {
263
260
  xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
264
261
 
265
- xmlSetStructuredErrorFunc(NULL, NULL);
266
-
267
262
  handler->startDocument = start_document;
268
263
  handler->endDocument = end_document;
269
264
  handler->startElement = start_element;
@@ -4,13 +4,13 @@ VALUE cNokogiriXmlSaxParserContext ;
4
4
 
5
5
  static void deallocate(xmlParserCtxtPtr ctxt)
6
6
  {
7
- NOKOGIRI_DEBUG_START(handler);
7
+ NOKOGIRI_DEBUG_START(ctxt);
8
8
 
9
9
  ctxt->sax = NULL;
10
10
 
11
11
  xmlFreeParserCtxt(ctxt);
12
12
 
13
- NOKOGIRI_DEBUG_END(handler);
13
+ NOKOGIRI_DEBUG_END(ctxt);
14
14
  }
15
15
 
16
16
  /*
@@ -93,15 +93,27 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
93
93
  *
94
94
  * Create a new Schema from the contents of +string+
95
95
  */
96
- static VALUE read_memory(VALUE klass, VALUE content)
96
+ static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
97
97
  {
98
+ VALUE content;
99
+ VALUE parse_options;
100
+ int parse_options_int;
101
+ xmlSchemaParserCtxtPtr ctx;
98
102
  xmlSchemaPtr schema;
99
- xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
100
- (const char *)StringValuePtr(content),
101
- (int)RSTRING_LEN(content)
102
- );
103
+ VALUE errors;
103
104
  VALUE rb_schema;
104
- VALUE errors = rb_ary_new();
105
+ int scanned_args = 0;
106
+ xmlExternalEntityLoader old_loader = 0;
107
+
108
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
109
+ if (scanned_args == 1) {
110
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
111
+ }
112
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
113
+
114
+ ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
115
+
116
+ errors = rb_ary_new();
105
117
  xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
106
118
 
107
119
  #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
@@ -109,10 +121,19 @@ static VALUE read_memory(VALUE klass, VALUE content)
109
121
  ctx,
110
122
  Nokogiri_error_array_pusher,
111
123
  (void *)errors
112
- );
124
+ );
113
125
  #endif
114
126
 
115
- schema = xmlSchemaParse(ctx);
127
+ if (parse_options_int & XML_PARSE_NONET) {
128
+ old_loader = xmlGetExternalEntityLoader();
129
+ xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
130
+ }
131
+
132
+ schema = xmlSchemaParse(ctx);
133
+
134
+ if (old_loader) {
135
+ xmlSetExternalEntityLoader(old_loader);
136
+ }
116
137
 
117
138
  xmlSetStructuredErrorFunc(NULL, NULL);
118
139
  xmlSchemaFreeParserCtxt(ctx);
@@ -129,28 +150,68 @@ static VALUE read_memory(VALUE klass, VALUE content)
129
150
 
130
151
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
131
152
  rb_iv_set(rb_schema, "@errors", errors);
153
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
132
154
 
133
155
  return rb_schema;
134
156
  }
135
157
 
158
+ /* Schema creation will remove and deallocate "blank" nodes.
159
+ * If those blank nodes have been exposed to Ruby, they could get freed
160
+ * out from under the VALUE pointer. This function checks to see if any of
161
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
162
+ */
163
+ static int has_blank_nodes_p(VALUE cache)
164
+ {
165
+ long i;
166
+
167
+ if (NIL_P(cache)) {
168
+ return 0;
169
+ }
170
+
171
+ for (i = 0; i < RARRAY_LEN(cache); i++) {
172
+ xmlNodePtr node;
173
+ VALUE element = rb_ary_entry(cache, i);
174
+ Data_Get_Struct(element, xmlNode, node);
175
+ if (xmlIsBlankNode(node)) {
176
+ return 1;
177
+ }
178
+ }
179
+
180
+ return 0;
181
+ }
182
+
136
183
  /*
137
184
  * call-seq:
138
185
  * from_document(doc)
139
186
  *
140
187
  * Create a new Schema from the Nokogiri::XML::Document +doc+
141
188
  */
142
- static VALUE from_document(VALUE klass, VALUE document)
189
+ static VALUE from_document(int argc, VALUE *argv, VALUE klass)
143
190
  {
191
+ VALUE document;
192
+ VALUE parse_options;
193
+ int parse_options_int;
144
194
  xmlDocPtr doc;
145
195
  xmlSchemaParserCtxtPtr ctx;
146
196
  xmlSchemaPtr schema;
147
197
  VALUE errors;
148
198
  VALUE rb_schema;
199
+ int scanned_args = 0;
200
+ xmlExternalEntityLoader old_loader = 0;
201
+
202
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
149
203
 
150
204
  Data_Get_Struct(document, xmlDoc, doc);
205
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
151
206
 
152
- /* In case someone passes us a node. ugh. */
153
- doc = doc->doc;
207
+ if (scanned_args == 1) {
208
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
209
+ }
210
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
211
+
212
+ if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
213
+ rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
214
+ }
154
215
 
155
216
  ctx = xmlSchemaNewDocParserCtxt(doc);
156
217
 
@@ -165,8 +226,17 @@ static VALUE from_document(VALUE klass, VALUE document)
165
226
  );
166
227
  #endif
167
228
 
229
+ if (parse_options_int & XML_PARSE_NONET) {
230
+ old_loader = xmlGetExternalEntityLoader();
231
+ xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
232
+ }
233
+
168
234
  schema = xmlSchemaParse(ctx);
169
235
 
236
+ if (old_loader) {
237
+ xmlSetExternalEntityLoader(old_loader);
238
+ }
239
+
170
240
  xmlSetStructuredErrorFunc(NULL, NULL);
171
241
  xmlSchemaFreeParserCtxt(ctx);
172
242
 
@@ -182,6 +252,7 @@ static VALUE from_document(VALUE klass, VALUE document)
182
252
 
183
253
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
184
254
  rb_iv_set(rb_schema, "@errors", errors);
255
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
185
256
 
186
257
  return rb_schema;
187
258
 
@@ -197,8 +268,8 @@ void init_xml_schema()
197
268
 
198
269
  cNokogiriXmlSchema = klass;
199
270
 
200
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
201
- rb_define_singleton_method(klass, "from_document", from_document, 1);
271
+ rb_define_singleton_method(klass, "read_memory", read_memory, -1);
272
+ rb_define_singleton_method(klass, "from_document", from_document, -1);
202
273
 
203
274
  rb_define_private_method(klass, "validate_document", validate_document, 1);
204
275
  rb_define_private_method(klass, "validate_file", validate_file, 1);
@@ -1,6 +1,11 @@
1
1
  #include <xml_xpath_context.h>
2
2
 
3
- int vasprintf (char **strp, const char *fmt, va_list ap);
3
+ /*
4
+ * these constants have matching declarations in
5
+ * ext/java/nokogiri/internals/NokogiriNamespaceContext.java
6
+ */
7
+ static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
8
+ static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
4
9
 
5
10
  static void deallocate(xmlXPathContextPtr ctx)
6
11
  {
@@ -9,6 +14,73 @@ static void deallocate(xmlXPathContextPtr ctx)
9
14
  NOKOGIRI_DEBUG_END(ctx);
10
15
  }
11
16
 
17
+ /* find a CSS class in an HTML element's `class` attribute */
18
+ const xmlChar* builtin_css_class(const xmlChar* str, const xmlChar *val)
19
+ {
20
+ int val_len;
21
+
22
+ if (str == NULL) { return(NULL); }
23
+ if (val == NULL) { return(NULL); }
24
+
25
+ val_len = xmlStrlen(val);
26
+ if (val_len == 0) { return(str); }
27
+
28
+ while (*str != 0) {
29
+ if ((*str == *val) && !xmlStrncmp(str, val, val_len)) {
30
+ const xmlChar* next_byte = str + val_len;
31
+
32
+ /* only match if the next byte is whitespace or end of string */
33
+ if ((*next_byte == 0) || (IS_BLANK_CH(*next_byte))) {
34
+ return((const xmlChar*)str);
35
+ }
36
+ }
37
+
38
+ /* advance str to whitespace */
39
+ while ((*str != 0) && !IS_BLANK_CH(*str)) {
40
+ str++;
41
+ }
42
+
43
+ /* advance str to start of next word or end of string */
44
+ while ((*str != 0) && IS_BLANK_CH(*str)) {
45
+ str++;
46
+ }
47
+ }
48
+
49
+ return(NULL);
50
+ }
51
+
52
+ /* xmlXPathFunction to wrap builtin_css_class() */
53
+ static void xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
54
+ {
55
+ xmlXPathObjectPtr hay, needle;
56
+
57
+ CHECK_ARITY(2);
58
+
59
+ CAST_TO_STRING;
60
+ needle = valuePop(ctxt);
61
+ if ((needle == NULL) || (needle->type != XPATH_STRING)) {
62
+ xmlXPathFreeObject(needle);
63
+ XP_ERROR(XPATH_INVALID_TYPE);
64
+ }
65
+
66
+ CAST_TO_STRING;
67
+ hay = valuePop(ctxt);
68
+ if ((hay == NULL) || (hay->type != XPATH_STRING)) {
69
+ xmlXPathFreeObject(hay);
70
+ xmlXPathFreeObject(needle);
71
+ XP_ERROR(XPATH_INVALID_TYPE);
72
+ }
73
+
74
+ if (builtin_css_class(hay->stringval, needle->stringval)) {
75
+ valuePush(ctxt, xmlXPathNewBoolean(1));
76
+ } else {
77
+ valuePush(ctxt, xmlXPathNewBoolean(0));
78
+ }
79
+
80
+ xmlXPathFreeObject(hay);
81
+ xmlXPathFreeObject(needle);
82
+ }
83
+
12
84
  /*
13
85
  * call-seq:
14
86
  * register_ns(prefix, uri)
@@ -263,14 +335,18 @@ static VALUE new(VALUE klass, VALUE nodeobj)
263
335
  xmlXPathContextPtr ctx;
264
336
  VALUE self;
265
337
 
266
- xmlXPathInit();
267
-
268
338
  Data_Get_Struct(nodeobj, xmlNode, node);
269
339
 
340
+ xmlXPathInit();
341
+
270
342
  ctx = xmlXPathNewContext(node->doc);
271
343
  ctx->node = node;
344
+
345
+ xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
346
+ xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
347
+ xpath_builtin_css_class);
348
+
272
349
  self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
273
- /*rb_iv_set(self, "@xpath_handler", Qnil); */
274
350
  return self;
275
351
  }
276
352