nokogiri 1.13.10-java → 1.14.0.rc1-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +33 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/README.md +18 -11
  5. data/dependencies.yml +25 -7
  6. data/ext/java/nokogiri/Html4Document.java +2 -0
  7. data/ext/java/nokogiri/Html4ElementDescription.java +9 -9
  8. data/ext/java/nokogiri/Html4EntityLookup.java +14 -3
  9. data/ext/java/nokogiri/Html4SaxParserContext.java +2 -2
  10. data/ext/java/nokogiri/Html4SaxPushParser.java +3 -0
  11. data/ext/java/nokogiri/NokogiriService.java +1 -24
  12. data/ext/java/nokogiri/XmlAttr.java +1 -1
  13. data/ext/java/nokogiri/XmlAttributeDecl.java +2 -1
  14. data/ext/java/nokogiri/XmlCdata.java +2 -1
  15. data/ext/java/nokogiri/XmlComment.java +2 -1
  16. data/ext/java/nokogiri/XmlDocument.java +5 -6
  17. data/ext/java/nokogiri/XmlDocumentFragment.java +2 -1
  18. data/ext/java/nokogiri/XmlDtd.java +4 -3
  19. data/ext/java/nokogiri/XmlElement.java +1 -0
  20. data/ext/java/nokogiri/XmlElementContent.java +4 -1
  21. data/ext/java/nokogiri/XmlElementDecl.java +3 -1
  22. data/ext/java/nokogiri/XmlEntityDecl.java +2 -0
  23. data/ext/java/nokogiri/XmlEntityReference.java +1 -0
  24. data/ext/java/nokogiri/XmlNamespace.java +2 -0
  25. data/ext/java/nokogiri/XmlNode.java +39 -24
  26. data/ext/java/nokogiri/XmlNodeSet.java +10 -7
  27. data/ext/java/nokogiri/XmlProcessingInstruction.java +1 -0
  28. data/ext/java/nokogiri/XmlReader.java +4 -3
  29. data/ext/java/nokogiri/XmlRelaxng.java +1 -0
  30. data/ext/java/nokogiri/XmlSaxParserContext.java +1 -0
  31. data/ext/java/nokogiri/XmlSaxPushParser.java +3 -0
  32. data/ext/java/nokogiri/XmlSchema.java +4 -2
  33. data/ext/java/nokogiri/XmlSyntaxError.java +1 -0
  34. data/ext/java/nokogiri/XmlText.java +1 -0
  35. data/ext/java/nokogiri/XmlXpathContext.java +2 -0
  36. data/ext/java/nokogiri/XsltStylesheet.java +16 -13
  37. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +3 -2
  38. data/ext/java/nokogiri/internals/NokogiriHandler.java +2 -2
  39. data/ext/java/nokogiri/internals/NokogiriHelpers.java +4 -5
  40. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +3 -3
  41. data/ext/java/nokogiri/internals/ParserContext.java +2 -0
  42. data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
  43. data/ext/java/nokogiri/internals/SaveContextVisitor.java +4 -2
  44. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +2 -2
  45. data/ext/java/nokogiri/internals/XmlDomParserContext.java +2 -1
  46. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +1 -0
  47. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +5 -4
  48. data/ext/nokogiri/extconf.rb +79 -20
  49. data/ext/nokogiri/gumbo.c +19 -9
  50. data/ext/nokogiri/html4_document.c +1 -1
  51. data/ext/nokogiri/html4_entity_lookup.c +1 -1
  52. data/ext/nokogiri/html4_sax_parser_context.c +0 -5
  53. data/ext/nokogiri/nokogiri.c +32 -51
  54. data/ext/nokogiri/xml_attribute_decl.c +1 -1
  55. data/ext/nokogiri/xml_cdata.c +1 -1
  56. data/ext/nokogiri/xml_document.c +16 -11
  57. data/ext/nokogiri/xml_element_content.c +2 -2
  58. data/ext/nokogiri/xml_element_decl.c +1 -1
  59. data/ext/nokogiri/xml_encoding_handler.c +2 -2
  60. data/ext/nokogiri/xml_namespace.c +38 -8
  61. data/ext/nokogiri/xml_node.c +286 -26
  62. data/ext/nokogiri/xml_node_set.c +0 -2
  63. data/ext/nokogiri/xml_reader.c +40 -20
  64. data/ext/nokogiri/xml_relax_ng.c +0 -2
  65. data/ext/nokogiri/xml_sax_parser.c +22 -16
  66. data/ext/nokogiri/xml_sax_parser_context.c +0 -5
  67. data/ext/nokogiri/xml_sax_push_parser.c +0 -2
  68. data/ext/nokogiri/xml_schema.c +0 -2
  69. data/ext/nokogiri/xml_xpath_context.c +87 -83
  70. data/ext/nokogiri/xslt_stylesheet.c +14 -13
  71. data/gumbo-parser/Makefile +10 -0
  72. data/lib/nokogiri/css/node.rb +2 -2
  73. data/lib/nokogiri/css/xpath_visitor.rb +3 -1
  74. data/lib/nokogiri/css.rb +6 -0
  75. data/lib/nokogiri/encoding_handler.rb +57 -0
  76. data/lib/nokogiri/extension.rb +3 -2
  77. data/lib/nokogiri/html4/document.rb +2 -121
  78. data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
  79. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  80. data/lib/nokogiri/html4.rb +1 -0
  81. data/lib/nokogiri/html5/document.rb +113 -36
  82. data/lib/nokogiri/html5/document_fragment.rb +9 -2
  83. data/lib/nokogiri/html5/node.rb +3 -5
  84. data/lib/nokogiri/html5.rb +127 -216
  85. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  86. data/lib/{isorelax.jar → nokogiri/jruby/isorelax/isorelax/20030108/isorelax-20030108.jar} +0 -0
  87. data/lib/nokogiri/jruby/net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar +0 -0
  88. data/lib/nokogiri/jruby/net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar +0 -0
  89. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  90. data/lib/nokogiri/jruby/nu/validator/jing/20200702VNU/jing-20200702VNU.jar +0 -0
  91. data/lib/nokogiri/jruby/org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar +0 -0
  92. data/lib/{serializer.jar → nokogiri/jruby/xalan/serializer/2.7.2/serializer-2.7.2.jar} +0 -0
  93. data/lib/{xalan.jar → nokogiri/jruby/xalan/xalan/2.7.2/xalan-2.7.2.jar} +0 -0
  94. data/lib/{xercesImpl.jar → nokogiri/jruby/xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar} +0 -0
  95. data/lib/{xml-apis.jar → nokogiri/jruby/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar} +0 -0
  96. data/lib/nokogiri/nokogiri.jar +0 -0
  97. data/lib/nokogiri/version/constant.rb +1 -1
  98. data/lib/nokogiri/version/info.rb +11 -10
  99. data/lib/nokogiri/xml/attr.rb +49 -0
  100. data/lib/nokogiri/xml/builder.rb +1 -1
  101. data/lib/nokogiri/xml/document.rb +102 -54
  102. data/lib/nokogiri/xml/document_fragment.rb +49 -6
  103. data/lib/nokogiri/xml/namespace.rb +42 -0
  104. data/lib/nokogiri/xml/node/save_options.rb +4 -2
  105. data/lib/nokogiri/xml/node.rb +190 -35
  106. data/lib/nokogiri/xml/node_set.rb +87 -9
  107. data/lib/nokogiri/xml/parse_options.rb +127 -48
  108. data/lib/nokogiri/xml/pp/node.rb +6 -4
  109. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  110. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  111. data/lib/nokogiri/xslt.rb +1 -1
  112. data/lib/nokogiri.rb +3 -11
  113. metadata +62 -274
  114. data/ext/java/nokogiri/EncodingHandler.java +0 -111
  115. data/lib/jing.jar +0 -0
  116. data/lib/nekodtd.jar +0 -0
  117. data/lib/nekohtml.jar +0 -0
@@ -32,7 +32,6 @@ _xml_namespace_dealloc(void *ptr)
32
32
  * node set. see noko_xml_namespace_wrap().
33
33
  */
34
34
  xmlNsPtr ns = ptr;
35
- NOKOGIRI_DEBUG_START(ns) ;
36
35
 
37
36
  if (ns->href) {
38
37
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
@@ -41,7 +40,6 @@ _xml_namespace_dealloc(void *ptr)
41
40
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix));
42
41
  }
43
42
  xmlFree(ns);
44
- NOKOGIRI_DEBUG_END(ns) ;
45
43
  }
46
44
 
47
45
  #ifdef HAVE_RB_GC_LOCATION
@@ -76,10 +74,26 @@ static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = {
76
74
  };
77
75
 
78
76
  /*
79
- * call-seq:
80
- * prefix
77
+ * :call-seq:
78
+ * prefix() → String or nil
81
79
  *
82
- * Get the prefix for this namespace. Returns +nil+ if there is no prefix.
80
+ * Return the prefix for this Namespace, or +nil+ if there is no prefix (e.g., default namespace).
81
+ *
82
+ * *Example*
83
+ *
84
+ * doc = Nokogiri::XML.parse(<<~XML)
85
+ * <?xml version="1.0"?>
86
+ * <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
87
+ * <child1 foo="abc" noko:bar="def"/>
88
+ * <noko:child2 foo="qwe" noko:bar="rty"/>
89
+ * </root>
90
+ * XML
91
+ *
92
+ * doc.root.elements.first.namespace.prefix
93
+ * # => nil
94
+ *
95
+ * doc.root.elements.last.namespace.prefix
96
+ * # => "noko"
83
97
  */
84
98
  static VALUE
85
99
  prefix(VALUE self)
@@ -93,10 +107,26 @@ prefix(VALUE self)
93
107
  }
94
108
 
95
109
  /*
96
- * call-seq:
97
- * href
110
+ * :call-seq:
111
+ * href() → String
112
+ *
113
+ * Returns the URI reference for this Namespace.
114
+ *
115
+ * *Example*
116
+ *
117
+ * doc = Nokogiri::XML.parse(<<~XML)
118
+ * <?xml version="1.0"?>
119
+ * <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
120
+ * <child1 foo="abc" noko:bar="def"/>
121
+ * <noko:child2 foo="qwe" noko:bar="rty"/>
122
+ * </root>
123
+ * XML
124
+ *
125
+ * doc.root.elements.first.namespace.href
126
+ * # => "http://nokogiri.org/ns/default"
98
127
  *
99
- * Get the href for this namespace
128
+ * doc.root.elements.last.namespace.href
129
+ * # => "http://nokogiri.org/ns/noko"
100
130
  */
101
131
  static VALUE
102
132
  href(VALUE self)
@@ -1,5 +1,7 @@
1
1
  #include <nokogiri.h>
2
2
 
3
+ #include <stdbool.h>
4
+
3
5
  // :stopdoc:
4
6
 
5
7
  VALUE cNokogiriXmlNode ;
@@ -7,20 +9,11 @@ static ID id_decorate, id_decorate_bang;
7
9
 
8
10
  typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
9
11
 
10
- #ifdef DEBUG
11
12
  static void
12
- _xml_node_dealloc(xmlNodePtr x)
13
+ _xml_node_mark(void *ptr)
13
14
  {
14
- NOKOGIRI_DEBUG_START(x)
15
- NOKOGIRI_DEBUG_END(x)
16
- }
17
- #else
18
- # define _xml_node_dealloc 0
19
- #endif
15
+ xmlNodePtr node = ptr;
20
16
 
21
- static void
22
- _xml_node_mark(xmlNodePtr node)
23
- {
24
17
  if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
25
18
  return;
26
19
  }
@@ -37,24 +30,21 @@ _xml_node_mark(xmlNodePtr node)
37
30
 
38
31
  #ifdef HAVE_RB_GC_LOCATION
39
32
  static void
40
- _xml_node_update_references(xmlNodePtr node)
33
+ _xml_node_update_references(void *ptr)
41
34
  {
35
+ xmlNodePtr node = ptr;
36
+
42
37
  if (node->_private) {
43
38
  node->_private = (void *)rb_gc_location((VALUE)node->_private);
44
39
  }
45
40
  }
41
+ #else
42
+ # define _xml_node_update_references 0
46
43
  #endif
47
44
 
48
- typedef void (*gc_callback_t)(void *);
49
-
50
45
  static const rb_data_type_t nokogiri_node_type = {
51
46
  "Nokogiri/XMLNode",
52
- {
53
- (gc_callback_t)_xml_node_mark, (gc_callback_t)_xml_node_dealloc, 0,
54
- #ifdef HAVE_RB_GC_LOCATION
55
- (gc_callback_t)_xml_node_update_references
56
- #endif
57
- },
47
+ {_xml_node_mark, 0, 0, _xml_node_update_references},
58
48
  0, 0,
59
49
  #ifdef RUBY_TYPED_FREE_IMMEDIATELY
60
50
  RUBY_TYPED_FREE_IMMEDIATELY,
@@ -809,7 +799,7 @@ rb_xml_node_pointer_id(VALUE self)
809
799
  xmlNodePtr node;
810
800
  Noko_Node_Get_Struct(self, xmlNode, node);
811
801
 
812
- return INT2NUM((long)(node));
802
+ return rb_uint2inum((uintptr_t)(node));
813
803
  }
814
804
 
815
805
  /*
@@ -1509,7 +1499,7 @@ node_type(VALUE self)
1509
1499
  {
1510
1500
  xmlNodePtr node;
1511
1501
  Noko_Node_Get_Struct(self, xmlNode, node);
1512
- return INT2NUM((long)node->type);
1502
+ return INT2NUM(node->type);
1513
1503
  }
1514
1504
 
1515
1505
  /*
@@ -1724,6 +1714,269 @@ native_write_to(
1724
1714
  return io;
1725
1715
  }
1726
1716
 
1717
+
1718
+ static inline void
1719
+ output_partial_string(VALUE out, char const *str, size_t length)
1720
+ {
1721
+ if (length) {
1722
+ rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
1723
+ }
1724
+ }
1725
+
1726
+ static inline void
1727
+ output_char(VALUE out, char ch)
1728
+ {
1729
+ output_partial_string(out, &ch, 1);
1730
+ }
1731
+
1732
+ static inline void
1733
+ output_string(VALUE out, char const *str)
1734
+ {
1735
+ output_partial_string(out, str, strlen(str));
1736
+ }
1737
+
1738
+ static inline void
1739
+ output_tagname(VALUE out, xmlNodePtr elem)
1740
+ {
1741
+ // Elements in the HTML, MathML, and SVG namespaces do not use a namespace
1742
+ // prefix in the HTML syntax.
1743
+ char const *name = (char const *)elem->name;
1744
+ xmlNsPtr ns = elem->ns;
1745
+ if (ns && ns->href && ns->prefix
1746
+ && strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
1747
+ && strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
1748
+ && strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
1749
+ output_string(out, (char const *)elem->ns->prefix);
1750
+ output_char(out, ':');
1751
+ char const *colon = strchr(name, ':');
1752
+ if (colon) {
1753
+ name = colon + 1;
1754
+ }
1755
+ }
1756
+ output_string(out, name);
1757
+ }
1758
+
1759
+ static inline void
1760
+ output_attr_name(VALUE out, xmlAttrPtr attr)
1761
+ {
1762
+ xmlNsPtr ns = attr->ns;
1763
+ char const *name = (char const *)attr->name;
1764
+ if (ns && ns->href) {
1765
+ char const *uri = (char const *)ns->href;
1766
+ char const *localname = strchr(name, ':');
1767
+ if (localname) {
1768
+ ++localname;
1769
+ } else {
1770
+ localname = name;
1771
+ }
1772
+
1773
+ if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
1774
+ output_string(out, "xml:");
1775
+ name = localname;
1776
+ } else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
1777
+ // xmlns:xmlns -> xmlns
1778
+ // xmlns:foo -> xmlns:foo
1779
+ if (strcmp(localname, "xmlns")) {
1780
+ output_string(out, "xmlns:");
1781
+ }
1782
+ name = localname;
1783
+ } else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
1784
+ output_string(out, "xlink:");
1785
+ name = localname;
1786
+ } else if (ns->prefix) {
1787
+ output_string(out, (char const *)ns->prefix);
1788
+ output_char(out, ':');
1789
+ name = localname;
1790
+ }
1791
+ }
1792
+ output_string(out, name);
1793
+ }
1794
+
1795
+ static void
1796
+ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1797
+ {
1798
+ xmlChar const *next = start;
1799
+ int ch;
1800
+
1801
+ while ((ch = *next) != 0) {
1802
+ char const *replacement = NULL;
1803
+ size_t replaced_bytes = 1;
1804
+ if (ch == '&') {
1805
+ replacement = "&amp;";
1806
+ } else if (ch == 0xC2 && next[1] == 0xA0) {
1807
+ // U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
1808
+ replacement = "&nbsp;";
1809
+ replaced_bytes = 2;
1810
+ } else if (attr && ch == '"') {
1811
+ replacement = "&quot;";
1812
+ } else if (!attr && ch == '<') {
1813
+ replacement = "&lt;";
1814
+ } else if (!attr && ch == '>') {
1815
+ replacement = "&gt;";
1816
+ } else {
1817
+ ++next;
1818
+ continue;
1819
+ }
1820
+ output_partial_string(out, (char const *)start, next - start);
1821
+ output_string(out, replacement);
1822
+ next += replaced_bytes;
1823
+ start = next;
1824
+ }
1825
+ output_partial_string(out, (char const *)start, next - start);
1826
+ }
1827
+
1828
+ static bool
1829
+ should_prepend_newline(xmlNodePtr node)
1830
+ {
1831
+ char const *name = (char const *)node->name;
1832
+ xmlNodePtr child = node->children;
1833
+
1834
+ if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
1835
+ return false;
1836
+ }
1837
+
1838
+ return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
1839
+ }
1840
+
1841
+ static VALUE
1842
+ rb_prepend_newline(VALUE self)
1843
+ {
1844
+ xmlNodePtr node;
1845
+ Noko_Node_Get_Struct(self, xmlNode, node);
1846
+ return should_prepend_newline(node) ? Qtrue : Qfalse;
1847
+ }
1848
+
1849
+ static bool
1850
+ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1851
+ {
1852
+ char const *name = (char const *)node->name;
1853
+ if (name == NULL) { // fragments don't have a name
1854
+ return false;
1855
+ }
1856
+ for (size_t idx = 0; idx < num_tagnames; ++idx) {
1857
+ if (!strcmp(name, tagnames[idx])) {
1858
+ return true;
1859
+ }
1860
+ }
1861
+ return false;
1862
+
1863
+ }
1864
+
1865
+ static void
1866
+ output_node(
1867
+ VALUE out,
1868
+ xmlNodePtr node,
1869
+ bool preserve_newline
1870
+ )
1871
+ {
1872
+ static char const *const VOID_ELEMENTS[] = {
1873
+ "area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
1874
+ "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
1875
+ };
1876
+
1877
+ static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
1878
+ "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
1879
+ };
1880
+
1881
+ switch (node->type) {
1882
+ case XML_ELEMENT_NODE:
1883
+ // Serialize the start tag.
1884
+ output_char(out, '<');
1885
+ output_tagname(out, node);
1886
+
1887
+ // Add attributes.
1888
+ for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1889
+ output_char(out, ' ');
1890
+ output_attr_name(out, attr);
1891
+ if (attr->children) {
1892
+ output_string(out, "=\"");
1893
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1894
+ output_escaped_string(out, value, true);
1895
+ xmlFree(value);
1896
+ output_char(out, '"');
1897
+ } else {
1898
+ // Output name=""
1899
+ output_string(out, "=\"\"");
1900
+ }
1901
+ }
1902
+ output_char(out, '>');
1903
+
1904
+ // Add children and end tag if element is not void.
1905
+ if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
1906
+ if (preserve_newline && should_prepend_newline(node)) {
1907
+ output_char(out, '\n');
1908
+ }
1909
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1910
+ output_node(out, child, preserve_newline);
1911
+ }
1912
+ output_string(out, "</");
1913
+ output_tagname(out, node);
1914
+ output_char(out, '>');
1915
+ }
1916
+ break;
1917
+
1918
+ case XML_TEXT_NODE:
1919
+ if (node->parent
1920
+ && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
1921
+ sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
1922
+ output_string(out, (char const *)node->content);
1923
+ } else {
1924
+ output_escaped_string(out, node->content, false);
1925
+ }
1926
+ break;
1927
+
1928
+ case XML_CDATA_SECTION_NODE:
1929
+ output_string(out, "<![CDATA[");
1930
+ output_string(out, (char const *)node->content);
1931
+ output_string(out, "]]>");
1932
+ break;
1933
+
1934
+ case XML_COMMENT_NODE:
1935
+ output_string(out, "<!--");
1936
+ output_string(out, (char const *)node->content);
1937
+ output_string(out, "-->");
1938
+ break;
1939
+
1940
+ case XML_PI_NODE:
1941
+ output_string(out, "<?");
1942
+ output_string(out, (char const *)node->content);
1943
+ output_char(out, '>');
1944
+ break;
1945
+
1946
+ case XML_DOCUMENT_TYPE_NODE:
1947
+ case XML_DTD_NODE:
1948
+ output_string(out, "<!DOCTYPE ");
1949
+ output_string(out, (char const *)node->name);
1950
+ output_string(out, ">");
1951
+ break;
1952
+
1953
+ case XML_DOCUMENT_NODE:
1954
+ case XML_DOCUMENT_FRAG_NODE:
1955
+ case XML_HTML_DOCUMENT_NODE:
1956
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1957
+ output_node(out, child, preserve_newline);
1958
+ }
1959
+ break;
1960
+
1961
+ default:
1962
+ rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
1963
+ break;
1964
+ }
1965
+ }
1966
+
1967
+ static VALUE
1968
+ html_standard_serialize(
1969
+ VALUE self,
1970
+ VALUE preserve_newline
1971
+ )
1972
+ {
1973
+ xmlNodePtr node;
1974
+ Noko_Node_Get_Struct(self, xmlNode, node);
1975
+ VALUE output = rb_str_buf_new(4096);
1976
+ output_node(output, node, RTEST(preserve_newline));
1977
+ return output;
1978
+ }
1979
+
1727
1980
  /*
1728
1981
  * :call-seq:
1729
1982
  * line() → Integer
@@ -1757,7 +2010,7 @@ rb_xml_node_line(VALUE rb_node)
1757
2010
  xmlNodePtr c_node;
1758
2011
  Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1759
2012
 
1760
- return INT2NUM(xmlGetLineNo(c_node));
2013
+ return LONG2NUM(xmlGetLineNo(c_node));
1761
2014
  }
1762
2015
 
1763
2016
  /*
@@ -1860,7 +2113,7 @@ compare(VALUE self, VALUE _other)
1860
2113
  Noko_Node_Get_Struct(self, xmlNode, node);
1861
2114
  Noko_Node_Get_Struct(_other, xmlNode, other);
1862
2115
 
1863
- return INT2NUM((long)xmlXPathCmpNodes(other, node));
2116
+ return INT2NUM(xmlXPathCmpNodes(other, node));
1864
2117
  }
1865
2118
 
1866
2119
 
@@ -1960,12 +2213,17 @@ in_context(VALUE self, VALUE _str, VALUE _options)
1960
2213
 
1961
2214
  xmlSetStructuredErrorFunc(NULL, NULL);
1962
2215
 
1963
- /* Workaround for a libxml2 bug where a parsing error may leave a broken
2216
+ /*
2217
+ * Workaround for a libxml2 bug where a parsing error may leave a broken
1964
2218
  * node reference in node->doc->children.
2219
+ *
2220
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2221
+ *
1965
2222
  * This workaround is limited to when a parse error occurs, the document
1966
2223
  * went from having no children to having children, and the context node is
1967
2224
  * part of a document fragment.
1968
- * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2225
+ *
2226
+ * TODO: This was fixed in libxml 2.8.0 by 71a243d
1969
2227
  */
1970
2228
  if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
1971
2229
  child_iter = node;
@@ -2155,6 +2413,8 @@ noko_init_xml_node()
2155
2413
  rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
2156
2414
  rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
2157
2415
  rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2416
+ rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2417
+ rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2158
2418
  rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2159
2419
  rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2160
2420
  rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
@@ -88,13 +88,11 @@ deallocate(xmlNodeSetPtr node_set)
88
88
  * For reasons outlined in xml_namespace.c, here we reproduce xmlXPathFreeNodeSet() except for the
89
89
  * offending call to xmlXPathNodeSetFreeNs().
90
90
  */
91
- NOKOGIRI_DEBUG_START(node_set) ;
92
91
  if (node_set->nodeTab != NULL) {
93
92
  xmlFree(node_set->nodeTab);
94
93
  }
95
94
 
96
95
  xmlFree(node_set);
97
- NOKOGIRI_DEBUG_END(node_set) ;
98
96
  }
99
97
 
100
98
 
@@ -5,9 +5,7 @@ VALUE cNokogiriXmlReader;
5
5
  static void
6
6
  dealloc(xmlTextReaderPtr reader)
7
7
  {
8
- NOKOGIRI_DEBUG_START(reader);
9
8
  xmlFreeTextReader(reader);
10
- NOKOGIRI_DEBUG_END(reader);
11
9
  }
12
10
 
13
11
  static int
@@ -126,26 +124,37 @@ attributes_eh(VALUE self)
126
124
  * Get a hash of namespaces for this Node
127
125
  */
128
126
  static VALUE
129
- namespaces(VALUE self)
127
+ rb_xml_reader_namespaces(VALUE rb_reader)
130
128
  {
131
- xmlTextReaderPtr reader;
132
- xmlNodePtr ptr;
133
- VALUE attr ;
134
-
135
- Data_Get_Struct(self, xmlTextReader, reader);
129
+ VALUE rb_namespaces = rb_hash_new() ;
130
+ xmlTextReaderPtr c_reader;
131
+ xmlNodePtr c_node;
132
+ VALUE rb_errors;
136
133
 
137
- attr = rb_hash_new() ;
134
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
138
135
 
139
- if (! has_attributes(reader)) {
140
- return attr ;
136
+ if (! has_attributes(c_reader)) {
137
+ return rb_namespaces ;
141
138
  }
142
139
 
143
- ptr = xmlTextReaderExpand(reader);
144
- if (ptr == NULL) { return Qnil; }
140
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
145
141
 
146
- Nokogiri_xml_node_namespaces(ptr, attr);
142
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
143
+ c_node = xmlTextReaderExpand(c_reader);
144
+ xmlSetStructuredErrorFunc(NULL, NULL);
147
145
 
148
- return attr ;
146
+ if (c_node == NULL) {
147
+ if (RARRAY_LEN(rb_errors) > 0) {
148
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
149
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
150
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
151
+ }
152
+ return Qnil;
153
+ }
154
+
155
+ Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
156
+
157
+ return rb_namespaces ;
149
158
  }
150
159
 
151
160
  /*
@@ -204,6 +213,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
204
213
  xmlTextReaderPtr c_reader;
205
214
  xmlNodePtr c_node;
206
215
  xmlAttrPtr c_property;
216
+ VALUE rb_errors;
207
217
 
208
218
  Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
209
219
 
@@ -211,8 +221,18 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
211
221
  return rb_attributes;
212
222
  }
213
223
 
224
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
225
+
226
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
214
227
  c_node = xmlTextReaderExpand(c_reader);
228
+ xmlSetStructuredErrorFunc(NULL, NULL);
229
+
215
230
  if (c_node == NULL) {
231
+ if (RARRAY_LEN(rb_errors) > 0) {
232
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
233
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
234
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
235
+ }
216
236
  return Qnil;
217
237
  }
218
238
 
@@ -306,7 +326,7 @@ attribute_count(VALUE self)
306
326
  count = xmlTextReaderAttributeCount(reader);
307
327
  if (count == -1) { return Qnil; }
308
328
 
309
- return INT2NUM((long)count);
329
+ return INT2NUM(count);
310
330
  }
311
331
 
312
332
  /*
@@ -325,7 +345,7 @@ depth(VALUE self)
325
345
  depth = xmlTextReaderDepth(reader);
326
346
  if (depth == -1) { return Qnil; }
327
347
 
328
- return INT2NUM((long)depth);
348
+ return INT2NUM(depth);
329
349
  }
330
350
 
331
351
  /*
@@ -498,7 +518,7 @@ state(VALUE self)
498
518
  {
499
519
  xmlTextReaderPtr reader;
500
520
  Data_Get_Struct(self, xmlTextReader, reader);
501
- return INT2NUM((long)xmlTextReaderReadState(reader));
521
+ return INT2NUM(xmlTextReaderReadState(reader));
502
522
  }
503
523
 
504
524
  /*
@@ -512,7 +532,7 @@ node_type(VALUE self)
512
532
  {
513
533
  xmlTextReaderPtr reader;
514
534
  Data_Get_Struct(self, xmlTextReader, reader);
515
- return INT2NUM((long)xmlTextReaderNodeType(reader));
535
+ return INT2NUM(xmlTextReaderNodeType(reader));
516
536
  }
517
537
 
518
538
  /*
@@ -762,7 +782,7 @@ noko_init_xml_reader()
762
782
  rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
763
783
  rb_define_method(cNokogiriXmlReader, "name", name, 0);
764
784
  rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
765
- rb_define_method(cNokogiriXmlReader, "namespaces", namespaces, 0);
785
+ rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
766
786
  rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
767
787
  rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
768
788
  rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
@@ -5,9 +5,7 @@ VALUE cNokogiriXmlRelaxNG;
5
5
  static void
6
6
  dealloc(xmlRelaxNGPtr schema)
7
7
  {
8
- NOKOGIRI_DEBUG_START(schema);
9
8
  xmlRelaxNGFree(schema);
10
- NOKOGIRI_DEBUG_END(schema);
11
9
  }
12
10
 
13
11
  /*
@@ -195,40 +195,48 @@ comment_func(void *ctx, const xmlChar *value)
195
195
  rb_funcall(doc, id_comment, 1, str);
196
196
  }
197
197
 
198
+ PRINTFLIKE_DECL(2, 3)
198
199
  static void
199
200
  warning_func(void *ctx, const char *msg, ...)
200
201
  {
201
202
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
202
203
  VALUE doc = rb_iv_get(self, "@document");
203
- char *message;
204
- VALUE ruby_message;
204
+ VALUE rb_message;
205
205
 
206
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
207
+ /* It is not currently possible to pass var args from native
208
+ functions to sulong, so we work around the issue here. */
209
+ rb_message = rb_sprintf("warning_func: %s", msg);
210
+ #else
206
211
  va_list args;
207
212
  va_start(args, msg);
208
- vasprintf(&message, msg, args);
213
+ rb_message = rb_vsprintf(msg, args);
209
214
  va_end(args);
215
+ #endif
210
216
 
211
- ruby_message = NOKOGIRI_STR_NEW2(message);
212
- free(message);
213
- rb_funcall(doc, id_warning, 1, ruby_message);
217
+ rb_funcall(doc, id_warning, 1, rb_message);
214
218
  }
215
219
 
220
+ PRINTFLIKE_DECL(2, 3)
216
221
  static void
217
222
  error_func(void *ctx, const char *msg, ...)
218
223
  {
219
224
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
220
225
  VALUE doc = rb_iv_get(self, "@document");
221
- char *message;
222
- VALUE ruby_message;
226
+ VALUE rb_message;
223
227
 
228
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
229
+ /* It is not currently possible to pass var args from native
230
+ functions to sulong, so we work around the issue here. */
231
+ rb_message = rb_sprintf("error_func: %s", msg);
232
+ #else
224
233
  va_list args;
225
234
  va_start(args, msg);
226
- vasprintf(&message, msg, args);
235
+ rb_message = rb_vsprintf(msg, args);
227
236
  va_end(args);
237
+ #endif
228
238
 
229
- ruby_message = NOKOGIRI_STR_NEW2(message);
230
- free(message);
231
- rb_funcall(doc, id_error, 1, ruby_message);
239
+ rb_funcall(doc, id_error, 1, rb_message);
232
240
  }
233
241
 
234
242
  static void
@@ -260,15 +268,13 @@ processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
260
268
  static void
261
269
  deallocate(xmlSAXHandlerPtr handler)
262
270
  {
263
- NOKOGIRI_DEBUG_START(handler);
264
- free(handler);
265
- NOKOGIRI_DEBUG_END(handler);
271
+ ruby_xfree(handler);
266
272
  }
267
273
 
268
274
  static VALUE
269
275
  allocate(VALUE klass)
270
276
  {
271
- xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
277
+ xmlSAXHandlerPtr handler = ruby_xcalloc((size_t)1, sizeof(xmlSAXHandler));
272
278
 
273
279
  handler->startDocument = start_document;
274
280
  handler->endDocument = end_document;