nokogiri 1.16.8-java → 1.17.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/java/nokogiri/Html4Document.java +3 -3
  6. data/ext/java/nokogiri/Html4SaxParserContext.java +47 -175
  7. data/ext/java/nokogiri/NokogiriService.java +2 -2
  8. data/ext/java/nokogiri/XmlCdata.java +3 -0
  9. data/ext/java/nokogiri/XmlDocument.java +7 -14
  10. data/ext/java/nokogiri/XmlDocumentFragment.java +4 -92
  11. data/ext/java/nokogiri/XmlDtd.java +2 -2
  12. data/ext/java/nokogiri/XmlEntityReference.java +16 -12
  13. data/ext/java/nokogiri/XmlNode.java +26 -47
  14. data/ext/java/nokogiri/XmlNodeSet.java +10 -1
  15. data/ext/java/nokogiri/XmlSaxParserContext.java +73 -36
  16. data/ext/java/nokogiri/XmlSchema.java +15 -16
  17. data/ext/java/nokogiri/XsltStylesheet.java +1 -1
  18. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +1 -1
  19. data/ext/java/nokogiri/internals/NokogiriDomParser.java +3 -3
  20. data/ext/java/nokogiri/internals/NokogiriHandler.java +59 -15
  21. data/ext/java/nokogiri/internals/NokogiriHelpers.java +1 -1
  22. data/ext/java/nokogiri/internals/ParserContext.java +51 -21
  23. data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
  24. data/ext/java/nokogiri/internals/XmlDomParserContext.java +8 -19
  25. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +1 -1
  26. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +1 -1
  27. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -11
  28. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +5 -5
  29. data/ext/java/nokogiri/internals/c14n/{UtfHelpper.java → UtfHelper.java} +2 -2
  30. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +8 -8
  31. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +2 -2
  32. data/ext/nokogiri/extconf.rb +191 -137
  33. data/ext/nokogiri/gumbo.c +69 -53
  34. data/ext/nokogiri/html4_document.c +10 -4
  35. data/ext/nokogiri/html4_element_description.c +18 -18
  36. data/ext/nokogiri/html4_sax_parser.c +40 -0
  37. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  38. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  39. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  40. data/ext/nokogiri/nokogiri.c +9 -2
  41. data/ext/nokogiri/xml_attr.c +1 -1
  42. data/ext/nokogiri/xml_cdata.c +2 -10
  43. data/ext/nokogiri/xml_comment.c +3 -8
  44. data/ext/nokogiri/xml_document.c +163 -156
  45. data/ext/nokogiri/xml_document_fragment.c +10 -25
  46. data/ext/nokogiri/xml_dtd.c +1 -1
  47. data/ext/nokogiri/xml_element_content.c +9 -9
  48. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  49. data/ext/nokogiri/xml_namespace.c +6 -6
  50. data/ext/nokogiri/xml_node.c +130 -104
  51. data/ext/nokogiri/xml_node_set.c +46 -44
  52. data/ext/nokogiri/xml_reader.c +54 -58
  53. data/ext/nokogiri/xml_relax_ng.c +35 -56
  54. data/ext/nokogiri/xml_sax_parser.c +156 -88
  55. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  56. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  57. data/ext/nokogiri/xml_schema.c +50 -85
  58. data/ext/nokogiri/xml_syntax_error.c +19 -11
  59. data/ext/nokogiri/xml_text.c +2 -4
  60. data/ext/nokogiri/xml_xpath_context.c +2 -2
  61. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  62. data/lib/nokogiri/class_resolver.rb +1 -1
  63. data/lib/nokogiri/css/node.rb +6 -2
  64. data/lib/nokogiri/css/parser.rb +6 -4
  65. data/lib/nokogiri/css/parser.y +2 -2
  66. data/lib/nokogiri/css/parser_extras.rb +6 -66
  67. data/lib/nokogiri/css/selector_cache.rb +38 -0
  68. data/lib/nokogiri/css/tokenizer.rb +4 -4
  69. data/lib/nokogiri/css/tokenizer.rex +9 -8
  70. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  71. data/lib/nokogiri/css.rb +86 -20
  72. data/lib/nokogiri/decorators/slop.rb +3 -5
  73. data/lib/nokogiri/encoding_handler.rb +2 -2
  74. data/lib/nokogiri/html4/document.rb +44 -23
  75. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  76. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  77. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  78. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  79. data/lib/nokogiri/html4.rb +9 -14
  80. data/lib/nokogiri/html5/builder.rb +40 -0
  81. data/lib/nokogiri/html5/document.rb +61 -30
  82. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  83. data/lib/nokogiri/html5/node.rb +4 -4
  84. data/lib/nokogiri/html5.rb +114 -72
  85. data/lib/nokogiri/nokogiri.jar +0 -0
  86. data/lib/nokogiri/version/constant.rb +1 -1
  87. data/lib/nokogiri/xml/builder.rb +8 -1
  88. data/lib/nokogiri/xml/document.rb +70 -26
  89. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  90. data/lib/nokogiri/xml/node.rb +82 -11
  91. data/lib/nokogiri/xml/node_set.rb +9 -7
  92. data/lib/nokogiri/xml/parse_options.rb +1 -1
  93. data/lib/nokogiri/xml/pp/node.rb +6 -1
  94. data/lib/nokogiri/xml/reader.rb +46 -13
  95. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  96. data/lib/nokogiri/xml/sax/document.rb +174 -83
  97. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  98. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  99. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  100. data/lib/nokogiri/xml/sax.rb +48 -0
  101. data/lib/nokogiri/xml/schema.rb +112 -45
  102. data/lib/nokogiri/xml/searchable.rb +6 -8
  103. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  104. data/lib/nokogiri/xml.rb +13 -24
  105. data/lib/nokogiri/xslt.rb +3 -9
  106. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  107. metadata +9 -5
  108. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -3,13 +3,10 @@
3
3
  VALUE cNokogiriHtml4SaxPushParser;
4
4
 
5
5
  /*
6
- * call-seq:
7
- * native_write(chunk, last_chunk)
8
- *
9
6
  * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
10
7
  */
11
8
  static VALUE
12
- native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
9
+ noko_html4_sax_push_parser__native_write(VALUE self, VALUE rb_chunk, VALUE rb_last_chunk)
13
10
  {
14
11
  xmlParserCtxtPtr ctx;
15
12
  const char *chunk = NULL;
@@ -19,44 +16,45 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
19
16
 
20
17
  ctx = noko_xml_sax_push_parser_unwrap(self);
21
18
 
22
- if (Qnil != _chunk) {
23
- chunk = StringValuePtr(_chunk);
24
- size = (int)RSTRING_LEN(_chunk);
19
+ if (Qnil != rb_chunk) {
20
+ chunk = StringValuePtr(rb_chunk);
21
+ size = (int)RSTRING_LEN(rb_chunk);
25
22
  }
26
23
 
27
- Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);
24
+ noko__structured_error_func_save_and_set(&handler_state, NULL, NULL);
28
25
 
29
- status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);
26
+ status = htmlParseChunk(ctx, chunk, size, Qtrue == rb_last_chunk ? 1 : 0);
30
27
 
31
- Nokogiri_structured_error_func_restore(&handler_state);
28
+ noko__structured_error_func_restore(&handler_state);
32
29
 
33
- if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
30
+ if ((status != 0) && !(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
34
31
  // TODO: there appear to be no tests for this block
35
32
  xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
36
- Nokogiri_error_raise(NULL, e);
33
+ noko__error_raise(NULL, e);
37
34
  }
38
35
 
39
36
  return self;
40
37
  }
41
38
 
42
39
  /*
43
- * call-seq:
44
- * initialize_native(xml_sax, filename)
45
- *
46
40
  * Initialize the push parser with +xml_sax+ using +filename+
47
41
  */
48
42
  static VALUE
49
- initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
50
- VALUE encoding)
43
+ noko_html4_sax_push_parser__initialize_native(
44
+ VALUE self,
45
+ VALUE rb_xml_sax,
46
+ VALUE rb_filename,
47
+ VALUE encoding
48
+ )
51
49
  {
52
50
  htmlSAXHandlerPtr sax;
53
51
  const char *filename = NULL;
54
52
  htmlParserCtxtPtr ctx;
55
53
  xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
56
54
 
57
- sax = noko_sax_handler_unwrap(_xml_sax);
55
+ sax = noko_xml_sax_parser_unwrap(rb_xml_sax);
58
56
 
59
- if (_filename != Qnil) { filename = StringValueCStr(_filename); }
57
+ if (rb_filename != Qnil) { filename = StringValueCStr(rb_filename); }
60
58
 
61
59
  if (!NIL_P(encoding)) {
62
60
  enc = xmlParseCharEncoding(StringValueCStr(encoding));
@@ -77,9 +75,9 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
77
75
  rb_raise(rb_eRuntimeError, "Could not create a parser context");
78
76
  }
79
77
 
80
- ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
78
+ ctx->userData = ctx;
79
+ ctx->_private = (void *)rb_xml_sax;
81
80
 
82
- ctx->sax2 = 1;
83
81
  DATA_PTR(self) = ctx;
84
82
  return self;
85
83
  }
@@ -88,8 +86,11 @@ void
88
86
  noko_init_html_sax_push_parser(void)
89
87
  {
90
88
  assert(cNokogiriXmlSaxPushParser);
91
- cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
89
+ cNokogiriHtml4SaxPushParser =
90
+ rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
92
91
 
93
- rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native", initialize_native, 3);
94
- rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write", native_write, 2);
92
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native",
93
+ noko_html4_sax_push_parser__initialize_native, 3);
94
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write",
95
+ noko_html4_sax_push_parser__native_write, 2);
95
96
  }
@@ -0,0 +1,114 @@
1
+ #include <nokogiri.h>
2
+
3
+ #ifndef HAVE_XMLCTXTSETOPTIONS
4
+ /* based on libxml2-2.14.0-dev (1d8bd126) parser.c xmlCtxtSetInternalOptions */
5
+ int
6
+ xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
7
+ {
8
+ int keepMask = 0;
9
+ int allMask;
10
+
11
+ if (ctxt == NULL) {
12
+ return (-1);
13
+ }
14
+
15
+ /*
16
+ * XInclude options aren't handled by the parser.
17
+ *
18
+ * XML_PARSE_XINCLUDE
19
+ * XML_PARSE_NOXINCNODE
20
+ * XML_PARSE_NOBASEFIX
21
+ */
22
+ allMask = XML_PARSE_RECOVER |
23
+ XML_PARSE_NOENT |
24
+ XML_PARSE_DTDLOAD |
25
+ XML_PARSE_DTDATTR |
26
+ XML_PARSE_DTDVALID |
27
+ XML_PARSE_NOERROR |
28
+ XML_PARSE_NOWARNING |
29
+ XML_PARSE_PEDANTIC |
30
+ XML_PARSE_NOBLANKS |
31
+ #ifdef LIBXML_SAX1_ENABLED
32
+ XML_PARSE_SAX1 |
33
+ #endif
34
+ XML_PARSE_NONET |
35
+ XML_PARSE_NODICT |
36
+ XML_PARSE_NSCLEAN |
37
+ XML_PARSE_NOCDATA |
38
+ XML_PARSE_COMPACT |
39
+ XML_PARSE_OLD10 |
40
+ XML_PARSE_HUGE |
41
+ XML_PARSE_OLDSAX |
42
+ XML_PARSE_IGNORE_ENC |
43
+ XML_PARSE_BIG_LINES;
44
+
45
+ ctxt->options = (ctxt->options & keepMask) | (options & allMask);
46
+
47
+ /*
48
+ * For some options, struct members are historically the source
49
+ * of truth. The values are initalized from global variables and
50
+ * old code could also modify them directly. Several older API
51
+ * functions that don't take an options argument rely on these
52
+ * deprecated mechanisms.
53
+ *
54
+ * Once public access to struct members and the globals are
55
+ * disabled, we can use the options bitmask as source of
56
+ * truth, making all these struct members obsolete.
57
+ *
58
+ * The XML_DETECT_IDS flags is misnamed. It simply enables
59
+ * loading of the external subset.
60
+ */
61
+ ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
62
+ ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
63
+ ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
64
+ ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
65
+ ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
66
+ ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
67
+ ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
68
+ ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
69
+
70
+ /*
71
+ * Changing SAX callbacks is a bad idea. This should be fixed.
72
+ */
73
+ if (options & XML_PARSE_NOBLANKS) {
74
+ ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
75
+ }
76
+ if (options & XML_PARSE_NOCDATA) {
77
+ ctxt->sax->cdataBlock = NULL;
78
+ }
79
+ if (options & XML_PARSE_HUGE) {
80
+ if (ctxt->dict != NULL) {
81
+ xmlDictSetLimit(ctxt->dict, 0);
82
+ }
83
+ }
84
+
85
+ ctxt->linenumbers = 1;
86
+
87
+ return (options & ~allMask);
88
+ }
89
+ #endif
90
+
91
+ #ifndef HAVE_XMLCTXTGETOPTIONS
92
+ int
93
+ xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
94
+ {
95
+ return (ctxt->options);
96
+ }
97
+ #endif
98
+
99
+ #ifndef HAVE_XMLSWITCHENCODINGNAME
100
+ int
101
+ xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding)
102
+ {
103
+ if (ctxt == NULL) {
104
+ return (-1);
105
+ }
106
+
107
+ xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
108
+ if (handler == NULL) {
109
+ return (-1);
110
+ }
111
+
112
+ return (xmlSwitchToEncoding(ctxt, handler));
113
+ }
114
+ #endif
@@ -46,6 +46,7 @@ void noko_init_html_element_description(void);
46
46
  void noko_init_html_entity_lookup(void);
47
47
  void noko_init_html_sax_parser_context(void);
48
48
  void noko_init_html_sax_push_parser(void);
49
+ void noko_init_html4_sax_parser(void);
49
50
  void noko_init_gumbo(void);
50
51
  void noko_init_test_global_handlers(void);
51
52
 
@@ -184,8 +185,8 @@ Init_nokogiri(void)
184
185
  {
185
186
  mNokogiri = rb_define_module("Nokogiri");
186
187
  mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
187
- mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
188
- mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
188
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
189
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
189
190
  mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
190
191
  mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
191
192
  mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
@@ -202,6 +203,9 @@ Init_nokogiri(void)
202
203
  rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
203
204
  rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
204
205
 
206
+ rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
207
+ xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
208
+
205
209
  #ifdef NOKOGIRI_PACKAGED_LIBRARIES
206
210
  rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
207
211
  # ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
@@ -244,7 +248,10 @@ Init_nokogiri(void)
244
248
  noko_init_xml_namespace();
245
249
  noko_init_xml_node_set();
246
250
  noko_init_xml_reader();
251
+
247
252
  noko_init_xml_sax_parser();
253
+ noko_init_html4_sax_parser();
254
+
248
255
  noko_init_xml_xpath_context();
249
256
  noko_init_xslt_stylesheet();
250
257
  noko_init_html_element_description();
@@ -6,7 +6,7 @@ VALUE cNokogiriXmlAttr;
6
6
  * call-seq:
7
7
  * value=(content)
8
8
  *
9
- * Set the value for this Attr to +content+. Use `nil` to remove the value
9
+ * Set the value for this Attr to +content+. Use +nil+ to remove the value
10
10
  * (e.g., a HTML boolean attribute).
11
11
  */
12
12
  static VALUE
@@ -20,11 +20,10 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
20
20
  VALUE rb_content;
21
21
  VALUE rb_rest;
22
22
  VALUE rb_node;
23
- xmlChar *c_content = NULL;
24
- int c_content_len = 0;
25
23
 
26
24
  rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
27
25
 
26
+ Check_Type(rb_content, T_STRING);
28
27
  if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
29
28
  rb_raise(rb_eTypeError,
30
29
  "expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
@@ -40,15 +39,8 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
40
39
  c_document = noko_xml_document_unwrap(rb_document);
41
40
  }
42
41
 
43
- if (!NIL_P(rb_content)) {
44
- c_content = (xmlChar *)StringValuePtr(rb_content);
45
- c_content_len = RSTRING_LENINT(rb_content);
46
- }
47
-
48
- c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
49
-
42
+ c_node = xmlNewCDataBlock(c_document, (xmlChar *)StringValueCStr(rb_content), RSTRING_LENINT(rb_content));
50
43
  noko_xml_document_pin_node(c_node);
51
-
52
44
  rb_node = noko_xml_node_wrap(klass, c_node);
53
45
  rb_obj_call_init(rb_node, argc, argv);
54
46
 
@@ -23,25 +23,20 @@ new (int argc, VALUE *argv, VALUE klass)
23
23
 
24
24
  rb_scan_args(argc, argv, "2*", &document, &content, &rest);
25
25
 
26
+ Check_Type(content, T_STRING);
26
27
  if (rb_obj_is_kind_of(document, cNokogiriXmlNode)) {
27
28
  document = rb_funcall(document, document_id, 0);
28
29
  } else if (!rb_obj_is_kind_of(document, cNokogiriXmlDocument)
29
30
  && !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment)) {
30
31
  rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
31
32
  }
32
-
33
33
  xml_doc = noko_xml_document_unwrap(document);
34
34
 
35
- node = xmlNewDocComment(
36
- xml_doc,
37
- (const xmlChar *)StringValueCStr(content)
38
- );
39
-
35
+ node = xmlNewDocComment(xml_doc, (const xmlChar *)StringValueCStr(content));
36
+ noko_xml_document_pin_node(node);
40
37
  rb_node = noko_xml_node_wrap(klass, node);
41
38
  rb_obj_call_init(rb_node, argc, argv);
42
39
 
43
- noko_xml_document_pin_node(node);
44
-
45
40
  if (rb_block_given_p()) { rb_yield(rb_node); }
46
41
 
47
42
  return rb_node;