nokogiri 1.16.8-java → 1.17.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/java/nokogiri/Html4Document.java +3 -3
  6. data/ext/java/nokogiri/Html4SaxParserContext.java +47 -175
  7. data/ext/java/nokogiri/NokogiriService.java +2 -2
  8. data/ext/java/nokogiri/XmlCdata.java +3 -0
  9. data/ext/java/nokogiri/XmlDocument.java +7 -14
  10. data/ext/java/nokogiri/XmlDocumentFragment.java +4 -92
  11. data/ext/java/nokogiri/XmlDtd.java +2 -2
  12. data/ext/java/nokogiri/XmlEntityReference.java +16 -12
  13. data/ext/java/nokogiri/XmlNode.java +26 -47
  14. data/ext/java/nokogiri/XmlNodeSet.java +10 -1
  15. data/ext/java/nokogiri/XmlSaxParserContext.java +73 -36
  16. data/ext/java/nokogiri/XmlSchema.java +15 -16
  17. data/ext/java/nokogiri/XsltStylesheet.java +1 -1
  18. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +1 -1
  19. data/ext/java/nokogiri/internals/NokogiriDomParser.java +3 -3
  20. data/ext/java/nokogiri/internals/NokogiriHandler.java +59 -15
  21. data/ext/java/nokogiri/internals/NokogiriHelpers.java +1 -1
  22. data/ext/java/nokogiri/internals/ParserContext.java +51 -21
  23. data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
  24. data/ext/java/nokogiri/internals/XmlDomParserContext.java +8 -19
  25. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +1 -1
  26. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +1 -1
  27. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -11
  28. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +5 -5
  29. data/ext/java/nokogiri/internals/c14n/{UtfHelpper.java → UtfHelper.java} +2 -2
  30. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +8 -8
  31. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +2 -2
  32. data/ext/nokogiri/extconf.rb +191 -137
  33. data/ext/nokogiri/gumbo.c +69 -53
  34. data/ext/nokogiri/html4_document.c +10 -4
  35. data/ext/nokogiri/html4_element_description.c +18 -18
  36. data/ext/nokogiri/html4_sax_parser.c +40 -0
  37. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  38. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  39. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  40. data/ext/nokogiri/nokogiri.c +9 -2
  41. data/ext/nokogiri/xml_attr.c +1 -1
  42. data/ext/nokogiri/xml_cdata.c +2 -10
  43. data/ext/nokogiri/xml_comment.c +3 -8
  44. data/ext/nokogiri/xml_document.c +163 -156
  45. data/ext/nokogiri/xml_document_fragment.c +10 -25
  46. data/ext/nokogiri/xml_dtd.c +1 -1
  47. data/ext/nokogiri/xml_element_content.c +9 -9
  48. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  49. data/ext/nokogiri/xml_namespace.c +6 -6
  50. data/ext/nokogiri/xml_node.c +130 -104
  51. data/ext/nokogiri/xml_node_set.c +46 -44
  52. data/ext/nokogiri/xml_reader.c +54 -58
  53. data/ext/nokogiri/xml_relax_ng.c +35 -56
  54. data/ext/nokogiri/xml_sax_parser.c +156 -88
  55. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  56. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  57. data/ext/nokogiri/xml_schema.c +50 -85
  58. data/ext/nokogiri/xml_syntax_error.c +19 -11
  59. data/ext/nokogiri/xml_text.c +2 -4
  60. data/ext/nokogiri/xml_xpath_context.c +2 -2
  61. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  62. data/lib/nokogiri/class_resolver.rb +1 -1
  63. data/lib/nokogiri/css/node.rb +6 -2
  64. data/lib/nokogiri/css/parser.rb +6 -4
  65. data/lib/nokogiri/css/parser.y +2 -2
  66. data/lib/nokogiri/css/parser_extras.rb +6 -66
  67. data/lib/nokogiri/css/selector_cache.rb +38 -0
  68. data/lib/nokogiri/css/tokenizer.rb +4 -4
  69. data/lib/nokogiri/css/tokenizer.rex +9 -8
  70. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  71. data/lib/nokogiri/css.rb +86 -20
  72. data/lib/nokogiri/decorators/slop.rb +3 -5
  73. data/lib/nokogiri/encoding_handler.rb +2 -2
  74. data/lib/nokogiri/html4/document.rb +44 -23
  75. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  76. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  77. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  78. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  79. data/lib/nokogiri/html4.rb +9 -14
  80. data/lib/nokogiri/html5/builder.rb +40 -0
  81. data/lib/nokogiri/html5/document.rb +61 -30
  82. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  83. data/lib/nokogiri/html5/node.rb +4 -4
  84. data/lib/nokogiri/html5.rb +114 -72
  85. data/lib/nokogiri/nokogiri.jar +0 -0
  86. data/lib/nokogiri/version/constant.rb +1 -1
  87. data/lib/nokogiri/xml/builder.rb +8 -1
  88. data/lib/nokogiri/xml/document.rb +70 -26
  89. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  90. data/lib/nokogiri/xml/node.rb +82 -11
  91. data/lib/nokogiri/xml/node_set.rb +9 -7
  92. data/lib/nokogiri/xml/parse_options.rb +1 -1
  93. data/lib/nokogiri/xml/pp/node.rb +6 -1
  94. data/lib/nokogiri/xml/reader.rb +46 -13
  95. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  96. data/lib/nokogiri/xml/sax/document.rb +174 -83
  97. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  98. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  99. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  100. data/lib/nokogiri/xml/sax.rb +48 -0
  101. data/lib/nokogiri/xml/schema.rb +112 -45
  102. data/lib/nokogiri/xml/searchable.rb +6 -8
  103. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  104. data/lib/nokogiri/xml.rb +13 -24
  105. data/lib/nokogiri/xslt.rb +3 -9
  106. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  107. metadata +9 -5
  108. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -3,13 +3,10 @@
3
3
  VALUE cNokogiriHtml4SaxPushParser;
4
4
 
5
5
  /*
6
- * call-seq:
7
- * native_write(chunk, last_chunk)
8
- *
9
6
  * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
10
7
  */
11
8
  static VALUE
12
- native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
9
+ noko_html4_sax_push_parser__native_write(VALUE self, VALUE rb_chunk, VALUE rb_last_chunk)
13
10
  {
14
11
  xmlParserCtxtPtr ctx;
15
12
  const char *chunk = NULL;
@@ -19,44 +16,45 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
19
16
 
20
17
  ctx = noko_xml_sax_push_parser_unwrap(self);
21
18
 
22
- if (Qnil != _chunk) {
23
- chunk = StringValuePtr(_chunk);
24
- size = (int)RSTRING_LEN(_chunk);
19
+ if (Qnil != rb_chunk) {
20
+ chunk = StringValuePtr(rb_chunk);
21
+ size = (int)RSTRING_LEN(rb_chunk);
25
22
  }
26
23
 
27
- Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);
24
+ noko__structured_error_func_save_and_set(&handler_state, NULL, NULL);
28
25
 
29
- status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);
26
+ status = htmlParseChunk(ctx, chunk, size, Qtrue == rb_last_chunk ? 1 : 0);
30
27
 
31
- Nokogiri_structured_error_func_restore(&handler_state);
28
+ noko__structured_error_func_restore(&handler_state);
32
29
 
33
- if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
30
+ if ((status != 0) && !(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
34
31
  // TODO: there appear to be no tests for this block
35
32
  xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
36
- Nokogiri_error_raise(NULL, e);
33
+ noko__error_raise(NULL, e);
37
34
  }
38
35
 
39
36
  return self;
40
37
  }
41
38
 
42
39
  /*
43
- * call-seq:
44
- * initialize_native(xml_sax, filename)
45
- *
46
40
  * Initialize the push parser with +xml_sax+ using +filename+
47
41
  */
48
42
  static VALUE
49
- initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
50
- VALUE encoding)
43
+ noko_html4_sax_push_parser__initialize_native(
44
+ VALUE self,
45
+ VALUE rb_xml_sax,
46
+ VALUE rb_filename,
47
+ VALUE encoding
48
+ )
51
49
  {
52
50
  htmlSAXHandlerPtr sax;
53
51
  const char *filename = NULL;
54
52
  htmlParserCtxtPtr ctx;
55
53
  xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
56
54
 
57
- sax = noko_sax_handler_unwrap(_xml_sax);
55
+ sax = noko_xml_sax_parser_unwrap(rb_xml_sax);
58
56
 
59
- if (_filename != Qnil) { filename = StringValueCStr(_filename); }
57
+ if (rb_filename != Qnil) { filename = StringValueCStr(rb_filename); }
60
58
 
61
59
  if (!NIL_P(encoding)) {
62
60
  enc = xmlParseCharEncoding(StringValueCStr(encoding));
@@ -77,9 +75,9 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
77
75
  rb_raise(rb_eRuntimeError, "Could not create a parser context");
78
76
  }
79
77
 
80
- ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
78
+ ctx->userData = ctx;
79
+ ctx->_private = (void *)rb_xml_sax;
81
80
 
82
- ctx->sax2 = 1;
83
81
  DATA_PTR(self) = ctx;
84
82
  return self;
85
83
  }
@@ -88,8 +86,11 @@ void
88
86
  noko_init_html_sax_push_parser(void)
89
87
  {
90
88
  assert(cNokogiriXmlSaxPushParser);
91
- cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
89
+ cNokogiriHtml4SaxPushParser =
90
+ rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
92
91
 
93
- rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native", initialize_native, 3);
94
- rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write", native_write, 2);
92
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native",
93
+ noko_html4_sax_push_parser__initialize_native, 3);
94
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write",
95
+ noko_html4_sax_push_parser__native_write, 2);
95
96
  }
@@ -0,0 +1,114 @@
1
+ #include <nokogiri.h>
2
+
3
+ #ifndef HAVE_XMLCTXTSETOPTIONS
4
+ /* based on libxml2-2.14.0-dev (1d8bd126) parser.c xmlCtxtSetInternalOptions */
5
+ int
6
+ xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
7
+ {
8
+ int keepMask = 0;
9
+ int allMask;
10
+
11
+ if (ctxt == NULL) {
12
+ return (-1);
13
+ }
14
+
15
+ /*
16
+ * XInclude options aren't handled by the parser.
17
+ *
18
+ * XML_PARSE_XINCLUDE
19
+ * XML_PARSE_NOXINCNODE
20
+ * XML_PARSE_NOBASEFIX
21
+ */
22
+ allMask = XML_PARSE_RECOVER |
23
+ XML_PARSE_NOENT |
24
+ XML_PARSE_DTDLOAD |
25
+ XML_PARSE_DTDATTR |
26
+ XML_PARSE_DTDVALID |
27
+ XML_PARSE_NOERROR |
28
+ XML_PARSE_NOWARNING |
29
+ XML_PARSE_PEDANTIC |
30
+ XML_PARSE_NOBLANKS |
31
+ #ifdef LIBXML_SAX1_ENABLED
32
+ XML_PARSE_SAX1 |
33
+ #endif
34
+ XML_PARSE_NONET |
35
+ XML_PARSE_NODICT |
36
+ XML_PARSE_NSCLEAN |
37
+ XML_PARSE_NOCDATA |
38
+ XML_PARSE_COMPACT |
39
+ XML_PARSE_OLD10 |
40
+ XML_PARSE_HUGE |
41
+ XML_PARSE_OLDSAX |
42
+ XML_PARSE_IGNORE_ENC |
43
+ XML_PARSE_BIG_LINES;
44
+
45
+ ctxt->options = (ctxt->options & keepMask) | (options & allMask);
46
+
47
+ /*
48
+ * For some options, struct members are historically the source
49
+ * of truth. The values are initalized from global variables and
50
+ * old code could also modify them directly. Several older API
51
+ * functions that don't take an options argument rely on these
52
+ * deprecated mechanisms.
53
+ *
54
+ * Once public access to struct members and the globals are
55
+ * disabled, we can use the options bitmask as source of
56
+ * truth, making all these struct members obsolete.
57
+ *
58
+ * The XML_DETECT_IDS flags is misnamed. It simply enables
59
+ * loading of the external subset.
60
+ */
61
+ ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
62
+ ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
63
+ ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
64
+ ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
65
+ ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
66
+ ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
67
+ ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
68
+ ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
69
+
70
+ /*
71
+ * Changing SAX callbacks is a bad idea. This should be fixed.
72
+ */
73
+ if (options & XML_PARSE_NOBLANKS) {
74
+ ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
75
+ }
76
+ if (options & XML_PARSE_NOCDATA) {
77
+ ctxt->sax->cdataBlock = NULL;
78
+ }
79
+ if (options & XML_PARSE_HUGE) {
80
+ if (ctxt->dict != NULL) {
81
+ xmlDictSetLimit(ctxt->dict, 0);
82
+ }
83
+ }
84
+
85
+ ctxt->linenumbers = 1;
86
+
87
+ return (options & ~allMask);
88
+ }
89
+ #endif
90
+
91
+ #ifndef HAVE_XMLCTXTGETOPTIONS
92
+ int
93
+ xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
94
+ {
95
+ return (ctxt->options);
96
+ }
97
+ #endif
98
+
99
+ #ifndef HAVE_XMLSWITCHENCODINGNAME
100
+ int
101
+ xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding)
102
+ {
103
+ if (ctxt == NULL) {
104
+ return (-1);
105
+ }
106
+
107
+ xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
108
+ if (handler == NULL) {
109
+ return (-1);
110
+ }
111
+
112
+ return (xmlSwitchToEncoding(ctxt, handler));
113
+ }
114
+ #endif
@@ -46,6 +46,7 @@ void noko_init_html_element_description(void);
46
46
  void noko_init_html_entity_lookup(void);
47
47
  void noko_init_html_sax_parser_context(void);
48
48
  void noko_init_html_sax_push_parser(void);
49
+ void noko_init_html4_sax_parser(void);
49
50
  void noko_init_gumbo(void);
50
51
  void noko_init_test_global_handlers(void);
51
52
 
@@ -184,8 +185,8 @@ Init_nokogiri(void)
184
185
  {
185
186
  mNokogiri = rb_define_module("Nokogiri");
186
187
  mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
187
- mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
188
- mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
188
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
189
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
189
190
  mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
190
191
  mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
191
192
  mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
@@ -202,6 +203,9 @@ Init_nokogiri(void)
202
203
  rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
203
204
  rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
204
205
 
206
+ rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
207
+ xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
208
+
205
209
  #ifdef NOKOGIRI_PACKAGED_LIBRARIES
206
210
  rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
207
211
  # ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
@@ -244,7 +248,10 @@ Init_nokogiri(void)
244
248
  noko_init_xml_namespace();
245
249
  noko_init_xml_node_set();
246
250
  noko_init_xml_reader();
251
+
247
252
  noko_init_xml_sax_parser();
253
+ noko_init_html4_sax_parser();
254
+
248
255
  noko_init_xml_xpath_context();
249
256
  noko_init_xslt_stylesheet();
250
257
  noko_init_html_element_description();
@@ -6,7 +6,7 @@ VALUE cNokogiriXmlAttr;
6
6
  * call-seq:
7
7
  * value=(content)
8
8
  *
9
- * Set the value for this Attr to +content+. Use `nil` to remove the value
9
+ * Set the value for this Attr to +content+. Use +nil+ to remove the value
10
10
  * (e.g., a HTML boolean attribute).
11
11
  */
12
12
  static VALUE
@@ -20,11 +20,10 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
20
20
  VALUE rb_content;
21
21
  VALUE rb_rest;
22
22
  VALUE rb_node;
23
- xmlChar *c_content = NULL;
24
- int c_content_len = 0;
25
23
 
26
24
  rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
27
25
 
26
+ Check_Type(rb_content, T_STRING);
28
27
  if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
29
28
  rb_raise(rb_eTypeError,
30
29
  "expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
@@ -40,15 +39,8 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
40
39
  c_document = noko_xml_document_unwrap(rb_document);
41
40
  }
42
41
 
43
- if (!NIL_P(rb_content)) {
44
- c_content = (xmlChar *)StringValuePtr(rb_content);
45
- c_content_len = RSTRING_LENINT(rb_content);
46
- }
47
-
48
- c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
49
-
42
+ c_node = xmlNewCDataBlock(c_document, (xmlChar *)StringValueCStr(rb_content), RSTRING_LENINT(rb_content));
50
43
  noko_xml_document_pin_node(c_node);
51
-
52
44
  rb_node = noko_xml_node_wrap(klass, c_node);
53
45
  rb_obj_call_init(rb_node, argc, argv);
54
46
 
@@ -23,25 +23,20 @@ new (int argc, VALUE *argv, VALUE klass)
23
23
 
24
24
  rb_scan_args(argc, argv, "2*", &document, &content, &rest);
25
25
 
26
+ Check_Type(content, T_STRING);
26
27
  if (rb_obj_is_kind_of(document, cNokogiriXmlNode)) {
27
28
  document = rb_funcall(document, document_id, 0);
28
29
  } else if (!rb_obj_is_kind_of(document, cNokogiriXmlDocument)
29
30
  && !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment)) {
30
31
  rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
31
32
  }
32
-
33
33
  xml_doc = noko_xml_document_unwrap(document);
34
34
 
35
- node = xmlNewDocComment(
36
- xml_doc,
37
- (const xmlChar *)StringValueCStr(content)
38
- );
39
-
35
+ node = xmlNewDocComment(xml_doc, (const xmlChar *)StringValueCStr(content));
36
+ noko_xml_document_pin_node(node);
40
37
  rb_node = noko_xml_node_wrap(klass, node);
41
38
  rb_obj_call_init(rb_node, argc, argv);
42
39
 
43
- noko_xml_document_pin_node(node);
44
-
45
40
  if (rb_block_given_p()) { rb_yield(rb_node); }
46
41
 
47
42
  return rb_node;