nokogiri 1.9.1 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +45 -0
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -89
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +864 -418
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -240
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +135 -61
  33. data/ext/nokogiri/xml_node.c +1346 -677
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1088 -418
  142. data/lib/nokogiri/xml/node_set.rb +173 -63
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +128 -265
  178. data/ext/nokogiri/html_document.c +0 -170
  179. data/ext/nokogiri/html_document.h +0 -10
  180. data/ext/nokogiri/html_element_description.c +0 -279
  181. data/ext/nokogiri/html_element_description.h +0 -10
  182. data/ext/nokogiri/html_entity_lookup.c +0 -32
  183. data/ext/nokogiri/html_entity_lookup.h +0 -8
  184. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  185. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  186. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  187. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  188. data/ext/nokogiri/xml_attr.h +0 -9
  189. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  190. data/ext/nokogiri/xml_cdata.h +0 -9
  191. data/ext/nokogiri/xml_comment.h +0 -9
  192. data/ext/nokogiri/xml_document.h +0 -23
  193. data/ext/nokogiri/xml_document_fragment.h +0 -10
  194. data/ext/nokogiri/xml_dtd.h +0 -10
  195. data/ext/nokogiri/xml_element_content.h +0 -10
  196. data/ext/nokogiri/xml_element_decl.h +0 -9
  197. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  198. data/ext/nokogiri/xml_entity_decl.h +0 -10
  199. data/ext/nokogiri/xml_entity_reference.h +0 -9
  200. data/ext/nokogiri/xml_io.c +0 -61
  201. data/ext/nokogiri/xml_io.h +0 -11
  202. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  203. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  204. data/ext/nokogiri/xml_namespace.h +0 -14
  205. data/ext/nokogiri/xml_node.h +0 -13
  206. data/ext/nokogiri/xml_node_set.h +0 -12
  207. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  208. data/ext/nokogiri/xml_reader.h +0 -10
  209. data/ext/nokogiri/xml_relax_ng.h +0 -9
  210. data/ext/nokogiri/xml_sax_parser.h +0 -39
  211. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  212. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  213. data/ext/nokogiri/xml_schema.h +0 -9
  214. data/ext/nokogiri/xml_syntax_error.h +0 -13
  215. data/ext/nokogiri/xml_text.h +0 -9
  216. data/ext/nokogiri/xml_xpath_context.h +0 -10
  217. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  218. data/lib/nokogiri/html/document.rb +0 -335
  219. data/lib/nokogiri/html/document_fragment.rb +0 -49
  220. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  221. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  222. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  223. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  224. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  225. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  226. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,4 +1,5 @@
1
- require 'nokogiri/xslt/stylesheet'
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
4
5
  class << self
@@ -9,7 +10,7 @@ module Nokogiri
9
10
  #
10
11
  # xslt = Nokogiri::XSLT(File.read(ARGV[0]))
11
12
  #
12
- def XSLT stylesheet, modules = {}
13
+ def XSLT(stylesheet, modules = {})
13
14
  XSLT.parse(stylesheet, modules)
14
15
  end
15
16
  end
@@ -19,38 +20,116 @@ module Nokogiri
19
20
  # Stylesheet object.
20
21
  module XSLT
21
22
  class << self
22
- ###
23
- # Parse the stylesheet in +string+, register any +modules+
24
- def parse string, modules = {}
23
+ # :call-seq:
24
+ # parse(xsl) Nokogiri::XSLT::Stylesheet
25
+ # parse(xsl, modules) Nokogiri::XSLT::Stylesheet
26
+ #
27
+ # Parse the stylesheet in +xsl+, registering optional +modules+ as custom class handlers.
28
+ #
29
+ # [Parameters]
30
+ # - +xsl+ (String) XSL content to be parsed into a stylesheet
31
+ # - +modules+ (Hash<String ⇒ Class>) A hash of URI-to-handler relations for linking a
32
+ # namespace to a custom function handler.
33
+ #
34
+ # ⚠ The XSLT handler classes are registered *globally*.
35
+ #
36
+ # Also see Nokogiri::XSLT.register
37
+ #
38
+ # *Example*
39
+ #
40
+ # xml = Nokogiri.XML(<<~XML)
41
+ # <nodes>
42
+ # <node>Foo</node>
43
+ # <node>Bar</node>
44
+ # </nodes>
45
+ # XML
46
+ #
47
+ # handler = Class.new do
48
+ # def reverse(node)
49
+ # node.text.reverse
50
+ # end
51
+ # end
52
+ #
53
+ # xsl = <<~XSL
54
+ # <xsl:stylesheet version="1.0"
55
+ # xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
56
+ # xmlns:myfuncs="http://nokogiri.org/xslt/myfuncs"
57
+ # extension-element-prefixes="myfuncs">
58
+ # <xsl:template match="/">
59
+ # <reversed>
60
+ # <xsl:for-each select="nodes/node">
61
+ # <reverse><xsl:copy-of select="myfuncs:reverse(.)"/></reverse>
62
+ # </xsl:for-each>
63
+ # </reversed>
64
+ # </xsl:template>
65
+ # </xsl:stylesheet>
66
+ # XSL
67
+ #
68
+ # xsl = Nokogiri.XSLT(xsl, "http://nokogiri.org/xslt/myfuncs" => handler)
69
+ # xsl.transform(xml).to_xml
70
+ # # => "<?xml version=\"1.0\"?>\n" +
71
+ # # "<reversed>\n" +
72
+ # # " <reverse>ooF</reverse>\n" +
73
+ # # " <reverse>raB</reverse>\n" +
74
+ # # "</reversed>\n"
75
+ #
76
+ def parse(string, modules = {})
25
77
  modules.each do |url, klass|
26
- XSLT.register url, klass
78
+ XSLT.register(url, klass)
27
79
  end
28
80
 
81
+ doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
29
82
  if Nokogiri.jruby?
30
- Stylesheet.parse_stylesheet_doc(XML.parse(string), string)
83
+ Stylesheet.parse_stylesheet_doc(doc, string)
31
84
  else
32
- Stylesheet.parse_stylesheet_doc(XML.parse(string))
85
+ Stylesheet.parse_stylesheet_doc(doc)
33
86
  end
34
87
  end
35
88
 
36
- ###
37
- # Quote parameters in +params+ for stylesheet safety
38
- def quote_params params
39
- parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
40
- parray.each_with_index do |v,i|
41
- if i % 2 > 0
42
- parray[i]=
43
- if v =~ /'/
44
- "concat('#{ v.gsub(/'/, %q{', "'", '}) }')"
45
- else
46
- "'#{v}'";
47
- end
89
+ # :call-seq:
90
+ # quote_params(params) Array
91
+ #
92
+ # Quote parameters in +params+ for stylesheet safety.
93
+ # See Nokogiri::XSLT::Stylesheet.transform for example usage.
94
+ #
95
+ # [Parameters]
96
+ # - +params+ (Hash, Array) XSLT parameters (key->value, or tuples of [key, value])
97
+ #
98
+ # [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
99
+ #
100
+ def quote_params(params)
101
+ params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
102
+ key, value = kv.map(&:to_s)
103
+ value = if value.include?("'")
104
+ "concat('#{value.gsub(/'/, %q{', "'", '})}')"
48
105
  else
49
- parray[i] = v.to_s
106
+ "'#{value}'"
50
107
  end
108
+ quoted_params << key
109
+ quoted_params << value
51
110
  end
52
- parray.flatten
53
111
  end
112
+
113
+ # call-seq:
114
+ # register(uri, custom_handler_class)
115
+ #
116
+ # Register a class that implements custom XSLT transformation functions.
117
+ #
118
+ # ⚠ The XSLT handler classes are registered *globally*.
119
+ #
120
+ # [Parameters}
121
+ # - +uri+ (String) The namespace for the custom handlers
122
+ # - +custom_handler_class+ (Class) A class with ruby methods that can be called during
123
+ # transformation
124
+ #
125
+ # See Nokogiri::XSLT.parse for usage.
126
+ #
127
+ def register(uri, custom_handler_class)
128
+ # NOTE: this is implemented in the C extension, see ext/nokogiri/xslt_stylesheet.c
129
+ raise NotImplementedError, "Nokogiri::XSLT.register is not implemented on JRuby"
130
+ end if Nokogiri.jruby?
54
131
  end
55
132
  end
56
133
  end
134
+
135
+ require_relative "xslt/stylesheet"
data/lib/nokogiri.rb CHANGED
@@ -1,96 +1,73 @@
1
- # -*- coding: utf-8 -*-
2
- # Modify the PATH on windows so that the external DLLs will get loaded.
3
-
4
- require 'rbconfig'
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
5
3
 
6
4
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
7
- # The line below caused a problem on non-GAE rack environment.
8
- # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
9
- #
10
- # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
11
- # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
12
- # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
13
- # should skip loading xml jars. This is because those are in WEB-INF/lib and
14
- # already set in the classpath.
15
- unless $LOAD_PATH.to_s.include?("appengine-rack")
16
- require 'stringio'
17
- require 'isorelax.jar'
18
- require 'jing.jar'
19
- require 'nekohtml.jar'
20
- require 'nekodtd.jar'
21
- require 'xercesImpl.jar'
22
- require 'serializer.jar'
23
- require 'xalan.jar'
24
- require 'xml-apis.jar'
25
- end
5
+ require_relative "nokogiri/jruby/dependencies"
26
6
  end
27
7
 
28
- begin
29
- RUBY_VERSION =~ /(\d+\.\d+)/
30
- require "nokogiri/#{$1}/nokogiri"
31
- rescue LoadError
32
- require 'nokogiri/nokogiri'
33
- end
34
- require 'nokogiri/version'
35
- require 'nokogiri/syntax_error'
36
- require 'nokogiri/xml'
37
- require 'nokogiri/xslt'
38
- require 'nokogiri/html'
39
- require 'nokogiri/decorators/slop'
40
- require 'nokogiri/css'
41
- require 'nokogiri/html/builder'
8
+ require_relative "nokogiri/extension"
42
9
 
43
10
  # Nokogiri parses and searches XML/HTML very quickly, and also has
44
11
  # correctly implemented CSS3 selector support as well as XPath 1.0
45
12
  # support.
46
13
  #
47
14
  # Parsing a document returns either a Nokogiri::XML::Document, or a
48
- # Nokogiri::HTML::Document depending on the kind of document you parse.
15
+ # Nokogiri::HTML4::Document depending on the kind of document you parse.
49
16
  #
50
17
  # Here is an example:
51
18
  #
52
- # require 'nokogiri'
53
- # require 'open-uri'
19
+ # require 'nokogiri'
20
+ # require 'open-uri'
21
+ #
22
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
54
23
  #
55
- # # Get a Nokogiri::HTML:Document for the page we’re interested in...
24
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
56
25
  #
57
- # doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
26
+ # # Do funky things with it using Nokogiri::XML::Node methods...
58
27
  #
59
- # # Do funky things with it using Nokogiri::XML::Node methods...
28
+ # ####
29
+ # # Search for nodes by css
30
+ # doc.css('h3.r a.l').each do |link|
31
+ # puts link.content
32
+ # end
60
33
  #
61
- # ####
62
- # # Search for nodes by css
63
- # doc.css('h3.r a.l').each do |link|
64
- # puts link.content
65
- # end
34
+ # See also:
66
35
  #
67
- # See Nokogiri::XML::Searchable#css for more information about CSS searching.
68
- # See Nokogiri::XML::Searchable#xpath for more information about XPath searching.
36
+ # - Nokogiri::XML::Searchable#css for more information about CSS searching
37
+ # - Nokogiri::XML::Searchable#xpath for more information about XPath searching
69
38
  module Nokogiri
70
39
  class << self
71
40
  ###
72
41
  # Parse an HTML or XML document. +string+ contains the document.
73
- def parse string, url = nil, encoding = nil, options = nil
42
+ def parse(string, url = nil, encoding = nil, options = nil)
74
43
  if string.respond_to?(:read) ||
75
- /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
44
+ /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i.match?(string[0, 512])
76
45
  # Expect an HTML indicator to appear within the first 512
77
46
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
78
47
  # shouldn't be that long)
79
- Nokogiri.HTML(string, url, encoding,
80
- options || XML::ParseOptions::DEFAULT_HTML)
48
+ Nokogiri.HTML4(
49
+ string,
50
+ url,
51
+ encoding,
52
+ options || XML::ParseOptions::DEFAULT_HTML,
53
+ )
81
54
  else
82
- Nokogiri.XML(string, url, encoding,
83
- options || XML::ParseOptions::DEFAULT_XML)
84
- end.tap { |doc|
55
+ Nokogiri.XML(
56
+ string,
57
+ url,
58
+ encoding,
59
+ options || XML::ParseOptions::DEFAULT_XML,
60
+ )
61
+ end.tap do |doc|
85
62
  yield doc if block_given?
86
- }
63
+ end
87
64
  end
88
65
 
89
66
  ###
90
67
  # Create a new Nokogiri::XML::DocumentFragment
91
- def make input = nil, opts = {}, &blk
68
+ def make(input = nil, opts = {}, &blk)
92
69
  if input
93
- Nokogiri::HTML.fragment(input).children.first
70
+ Nokogiri::HTML4.fragment(input).children.first
94
71
  else
95
72
  Nokogiri(&blk)
96
73
  end
@@ -115,30 +92,37 @@ module Nokogiri
115
92
  Nokogiri(*args, &block).slop!
116
93
  end
117
94
 
95
+ # :nodoc:
118
96
  def install_default_aliases
119
- # Make sure to support some popular encoding aliases not known by
120
- # all iconv implementations.
121
- {
122
- 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
123
- }.each { |alias_name, name|
124
- EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
125
- }
97
+ warn("Nokogiri.install_default_aliases is deprecated and will be removed in a future version of Nokogiri. Please call Nokogiri::EncodingHandler.install_default_aliases instead.")
98
+ Nokogiri::EncodingHandler.install_default_aliases
126
99
  end
127
100
  end
128
-
129
- Nokogiri.install_default_aliases
130
101
  end
131
102
 
132
103
  ###
133
- # Parser a document contained in +args+. Nokogiri will try to guess what
134
- # type of document you are attempting to parse. For more information, see
135
- # Nokogiri.parse
104
+ # Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
105
+ # attempting to parse. For more information, see Nokogiri.parse
136
106
  #
137
- # To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
107
+ # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
138
108
  def Nokogiri(*args, &block)
139
- if block_given?
140
- Nokogiri::HTML::Builder.new(&block).doc.root
109
+ if block
110
+ Nokogiri::HTML4::Builder.new(&block).doc.root
141
111
  else
142
112
  Nokogiri.parse(*args)
143
113
  end
144
114
  end
115
+
116
+ require_relative "nokogiri/version"
117
+ require_relative "nokogiri/class_resolver"
118
+ require_relative "nokogiri/syntax_error"
119
+ require_relative "nokogiri/xml"
120
+ require_relative "nokogiri/xslt"
121
+ require_relative "nokogiri/html4"
122
+ require_relative "nokogiri/html"
123
+ require_relative "nokogiri/decorators/slop"
124
+ require_relative "nokogiri/css"
125
+ require_relative "nokogiri/html4/builder"
126
+ require_relative "nokogiri/encoding_handler"
127
+
128
+ require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
@@ -1,7 +1,9 @@
1
- require 'nokogiri'
1
+ # frozen_string_literal: true
2
2
 
3
- module XSD # :nodoc:
4
- module XMLParser # :nodoc:
3
+ require "nokogiri"
4
+
5
+ module XSD
6
+ module XMLParser
5
7
  ###
6
8
  # Nokogiri XML parser for soap4r.
7
9
  #
@@ -26,40 +28,40 @@ module XSD # :nodoc:
26
28
  class Nokogiri < XSD::XMLParser::Parser
27
29
  ###
28
30
  # Create a new XSD parser with +host+ and +opt+
29
- def initialize host, opt = {}
31
+ def initialize(host, opt = {})
30
32
  super
31
- @parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || 'UTF-8')
33
+ @parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || "UTF-8")
32
34
  end
33
35
 
34
36
  ###
35
37
  # Start parsing +string_or_readable+
36
- def do_parse string_or_readable
38
+ def do_parse(string_or_readable)
37
39
  @parser.parse(string_or_readable)
38
40
  end
39
41
 
40
42
  ###
41
43
  # Handle the start_element event with +name+ and +attrs+
42
- def start_element name, attrs = []
44
+ def start_element(name, attrs = [])
43
45
  super(name, Hash[*attrs.flatten])
44
46
  end
45
47
 
46
48
  ###
47
49
  # Handle the end_element event with +name+
48
- def end_element name
50
+ def end_element(name)
49
51
  super
50
52
  end
51
53
 
52
54
  ###
53
55
  # Handle errors with message +msg+
54
- def error msg
55
- raise ParseError.new(msg)
56
+ def error(msg)
57
+ raise ParseError, msg
56
58
  end
57
- alias :warning :error
59
+ alias_method :warning, :error
58
60
 
59
61
  ###
60
62
  # Handle cdata_blocks containing +string+
61
- def cdata_block string
62
- characters string
63
+ def cdata_block(string)
64
+ characters(string)
63
65
  end
64
66
 
65
67
  ###
@@ -69,16 +71,16 @@ module XSD # :nodoc:
69
71
  # +prefix+ is the namespace prefix for the element
70
72
  # +uri+ is the associated namespace URI
71
73
  # +ns+ is a hash of namespace prefix:urls associated with the element
72
- def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
74
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
73
75
  ###
74
76
  # Deal with SAX v1 interface
75
- name = [prefix, name].compact.join(':')
76
- attributes = ns.map { |ns_prefix,ns_uri|
77
- [['xmlns', ns_prefix].compact.join(':'), ns_uri]
78
- } + attrs.map { |attr|
79
- [[attr.prefix, attr.localname].compact.join(':'), attr.value]
80
- }.flatten
81
- start_element name, attributes
77
+ name = [prefix, name].compact.join(":")
78
+ attributes = ns.map do |ns_prefix, ns_uri|
79
+ [["xmlns", ns_prefix].compact.join(":"), ns_uri]
80
+ end + attrs.map do |attr|
81
+ [[attr.prefix, attr.localname].compact.join(":"), attr.value]
82
+ end.flatten
83
+ start_element(name, attributes)
82
84
  end
83
85
 
84
86
  ###
@@ -86,14 +88,16 @@ module XSD # :nodoc:
86
88
  # +name+ is the element's name
87
89
  # +prefix+ is the namespace prefix associated with the element
88
90
  # +uri+ is the associated namespace URI
89
- def end_element_namespace name, prefix = nil, uri = nil
91
+ def end_element_namespace(name, prefix = nil, uri = nil)
90
92
  ###
91
93
  # Deal with SAX v1 interface
92
- end_element [prefix, name].compact.join(':')
94
+ end_element([prefix, name].compact.join(":"))
93
95
  end
94
96
 
95
- %w{ xmldecl start_document end_document comment }.each do |name|
96
- class_eval %{ def #{name}(*args); end }
97
+ ["xmldecl", "start_document", "end_document", "comment"].each do |name|
98
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
99
+ def #{name}(*args); end
100
+ RUBY
97
101
  end
98
102
 
99
103
  add_factory(self)
@@ -0,0 +1,40 @@
1
+ From 27e4aa8d885e47a296ea78d114dbbe8fc7aa3508 Mon Sep 17 00:00:00 2001
2
+ From: Kevin Solorio <soloriok@gmail.com>
3
+ Date: Fri, 1 Feb 2019 14:32:42 -0800
4
+ Subject: [PATCH] Revert-support-html-h-b-7-1
5
+
6
+ ---
7
+ entities.c | 17 -----------------
8
+ 1 file changed, 17 deletions(-)
9
+
10
+ diff --git a/entities.c b/entities.c
11
+ index 43549bc5..82652f6d 100644
12
+ --- a/entities.c
13
+ +++ b/entities.c
14
+ @@ -623,23 +623,6 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
15
+ *out++ = 't';
16
+ *out++ = ';';
17
+ } else if (*cur == '&') {
18
+ - /*
19
+ - * Special handling of &{...} construct from HTML 4, see
20
+ - * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
21
+ - */
22
+ - if (html && attr && (cur[1] == '{') &&
23
+ - (strchr((const char *) cur, '}'))) {
24
+ - while (*cur != '}') {
25
+ - *out++ = *cur++;
26
+ - indx = out - buffer;
27
+ - if (indx + 100 > buffer_size) {
28
+ - growBufferReentrant();
29
+ - out = &buffer[indx];
30
+ - }
31
+ - }
32
+ - *out++ = *cur++;
33
+ - continue;
34
+ - }
35
+ *out++ = '&';
36
+ *out++ = 'a';
37
+ *out++ = 'm';
38
+ --
39
+ 2.16.2
40
+
@@ -0,0 +1,44 @@
1
+ From ffc08467744bd2305d41ca882c37fa30adf3a067 Mon Sep 17 00:00:00 2001
2
+ From: Kevin Solorio <soloriok@gmail.com>
3
+ Date: Wed, 27 Feb 2019 14:34:17 -0800
4
+ Subject: [PATCH 2/2] update entities.c to remove handling of ssi
5
+
6
+ ---
7
+ entities.c | 21 ---------------------
8
+ 1 file changed, 21 deletions(-)
9
+
10
+ diff --git a/entities.c b/entities.c
11
+ index 43549bc5..5c4a2a60 100644
12
+ --- a/entities.c
13
+ +++ b/entities.c
14
+ @@ -592,27 +592,6 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
15
+ * By default one have to encode at least '<', '>', '"' and '&' !
16
+ */
17
+ if (*cur == '<') {
18
+ - const xmlChar *end;
19
+ -
20
+ - /*
21
+ - * Special handling of server side include in HTML attributes
22
+ - */
23
+ - if (html && attr &&
24
+ - (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
25
+ - ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
26
+ - while (cur != end) {
27
+ - *out++ = *cur++;
28
+ - indx = out - buffer;
29
+ - if (indx + 100 > buffer_size) {
30
+ - growBufferReentrant();
31
+ - out = &buffer[indx];
32
+ - }
33
+ - }
34
+ - *out++ = *cur++;
35
+ - *out++ = *cur++;
36
+ - *out++ = *cur++;
37
+ - continue;
38
+ - }
39
+ *out++ = '&';
40
+ *out++ = 'l';
41
+ *out++ = 't';
42
+ --
43
+ 2.16.2
44
+
@@ -0,0 +1,25 @@
1
+ From 0b6ae484761fa01242fe8b67b54e3eb2d282d83d Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Wed, 4 Dec 2019 08:43:51 -0500
4
+ Subject: [PATCH] fix libxml2.la's path
5
+
6
+ ---
7
+ Makefile.in | 2 +-
8
+ 1 file changed, 1 insertion(+), 1 deletion(-)
9
+
10
+ diff --git a/Makefile.in b/Makefile.in
11
+ index cf96d41..1372d8b 100644
12
+ --- a/Makefile.in
13
+ +++ b/Makefile.in
14
+ @@ -1057,7 +1057,7 @@ clean-noinstLTLIBRARIES:
15
+ rm -f $${locs}; \
16
+ }
17
+
18
+ -libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
19
+ +$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
20
+ $(AM_V_CCLD)$(libxml2_la_LINK) -rpath $(libdir) $(libxml2_la_OBJECTS) $(libxml2_la_LIBADD) $(LIBS)
21
+
22
+ testdso.la: $(testdso_la_OBJECTS) $(testdso_la_DEPENDENCIES) $(EXTRA_testdso_la_DEPENDENCIES)
23
+ --
24
+ 2.17.1
25
+
@@ -0,0 +1,77 @@
1
+ From 74c95ec5932c737d4fcb06b8646b0017364ada14 Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Fri, 24 Dec 2021 19:08:01 -0500
4
+ Subject: [PATCH] attempt to hack in wildcard namespaces to xpath
5
+
6
+ I'm not confident this is a bulletproof patch.
7
+ ---
8
+ xpath.c | 24 ++++++++++++++++++------
9
+ 1 file changed, 18 insertions(+), 6 deletions(-)
10
+
11
+ diff --git a/xpath.c b/xpath.c
12
+ index 1aa2f1a..c7f0885 100644
13
+ --- a/xpath.c
14
+ +++ b/xpath.c
15
+ @@ -146,6 +146,9 @@
16
+ #define XPATH_MAX_RECURSION_DEPTH 5000
17
+ #endif
18
+
19
+ +#define WILDCARD_PREFIX "*"
20
+ +#define IS_WILDCARD_PREFIX(p) xmlStrEqual((xmlChar*)WILDCARD_PREFIX, p)
21
+ +
22
+ /*
23
+ * TODO:
24
+ * There are a few spots where some tests are done which depend upon ascii
25
+ @@ -11073,12 +11076,15 @@ xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test,
26
+ SKIP_BLANKS;
27
+
28
+ if ((name == NULL) && (CUR == '*')) {
29
+ - /*
30
+ - * All elements
31
+ - */
32
+ NEXT;
33
+ - *test = NODE_TEST_ALL;
34
+ - return(NULL);
35
+ + if (CUR != ':') {
36
+ + /*
37
+ + * All elements
38
+ + */
39
+ + *test = NODE_TEST_ALL;
40
+ + return(NULL);
41
+ + }
42
+ + name = xmlCharStrdup(WILDCARD_PREFIX);
43
+ }
44
+
45
+ if (name == NULL)
46
+ @@ -11327,6 +11333,10 @@ xmlXPathCompStep(xmlXPathParserContextPtr ctxt) {
47
+ }
48
+ #endif
49
+ if (CUR == '*') {
50
+ + if (NXT(1) == ':') {
51
+ + NEXT;
52
+ + name = xmlCharStrdup(WILDCARD_PREFIX);
53
+ + }
54
+ axis = AXIS_CHILD;
55
+ } else {
56
+ if (name == NULL)
57
+ @@ -12030,7 +12040,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
58
+ /*
59
+ * Setup namespaces.
60
+ */
61
+ - if (prefix != NULL) {
62
+ + if (prefix != NULL && !IS_WILDCARD_PREFIX(prefix)) {
63
+ URI = xmlXPathNsLookup(xpctxt, prefix);
64
+ if (URI == NULL) {
65
+ xmlXPathReleaseObject(xpctxt, obj);
66
+ @@ -12369,6 +12379,8 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
67
+ {
68
+ XP_TEST_HIT
69
+ }
70
+ + } else if (IS_WILDCARD_PREFIX(prefix)) {
71
+ + XP_TEST_HIT
72
+ } else {
73
+ if ((cur->ns != NULL) &&
74
+ (xmlStrEqual(URI, cur->ns->href)))
75
+ --
76
+ 2.31.0
77
+