nokogiri 1.18.0.rc1-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +502 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +297 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +49 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +324 -0
@@ -0,0 +1,48 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "html4"
5
+
6
+ module Nokogiri
7
+ # Alias for Nokogiri::HTML4
8
+ HTML = Nokogiri::HTML4
9
+
10
+ # :singleton-method: HTML
11
+ # :call-seq: HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) → Nokogiri::HTML4::Document
12
+ #
13
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
14
+
15
+ # :nodoc:
16
+ define_singleton_method(:HTML, Nokogiri.method(:HTML4))
17
+
18
+ # 💡 This module/namespace is an alias for Nokogiri::HTML4 as of v1.12.0. Before v1.12.0,
19
+ # Nokogiri::HTML4 did not exist, and this was the module/namespace for all HTML-related
20
+ # classes.
21
+ module HTML
22
+ # 💡 This class is an alias for Nokogiri::HTML4::Document as of v1.12.0.
23
+ class Document < Nokogiri::XML::Document
24
+ end
25
+
26
+ # 💡 This class is an alias for Nokogiri::HTML4::DocumentFragment as of v1.12.0.
27
+ class DocumentFragment < Nokogiri::XML::DocumentFragment
28
+ end
29
+
30
+ # 💡 This class is an alias for Nokogiri::HTML4::Builder as of v1.12.0.
31
+ class Builder < Nokogiri::XML::Builder
32
+ end
33
+
34
+ module SAX
35
+ # 💡 This class is an alias for Nokogiri::HTML4::SAX::Parser as of v1.12.0.
36
+ class Parser < Nokogiri::XML::SAX::Parser
37
+ end
38
+
39
+ # 💡 This class is an alias for Nokogiri::HTML4::SAX::ParserContext as of v1.12.0.
40
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
41
+ end
42
+
43
+ # 💡 This class is an alias for Nokogiri::HTML4::SAX::PushParser as of v1.12.0.
44
+ class PushParser
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module HTML4
5
+ ###
6
+ # Nokogiri HTML builder is used for building HTML documents. It is very
7
+ # similar to the Nokogiri::XML::Builder. In fact, you should go read the
8
+ # documentation for Nokogiri::XML::Builder before reading this
9
+ # documentation.
10
+ #
11
+ # == Synopsis:
12
+ #
13
+ # Create an HTML document with a body that has an onload attribute, and a
14
+ # span tag with a class of "bold" that has content of "Hello world".
15
+ #
16
+ # builder = Nokogiri::HTML4::Builder.new do |doc|
17
+ # doc.html {
18
+ # doc.body(:onload => 'some_func();') {
19
+ # doc.span.bold {
20
+ # doc.text "Hello world"
21
+ # }
22
+ # }
23
+ # }
24
+ # end
25
+ # puts builder.to_html
26
+ #
27
+ # The HTML builder inherits from the XML builder, so make sure to read the
28
+ # Nokogiri::XML::Builder documentation.
29
+ class Builder < Nokogiri::XML::Builder
30
+ ###
31
+ # Convert the builder to HTML
32
+ def to_html
33
+ @doc.to_html
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,235 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "pathname"
5
+
6
+ module Nokogiri
7
+ module HTML4
8
+ class Document < Nokogiri::XML::Document
9
+ ###
10
+ # Get the meta tag encoding for this document. If there is no meta tag,
11
+ # then nil is returned.
12
+ def meta_encoding
13
+ if (meta = at_xpath("//meta[@charset]"))
14
+ meta[:charset]
15
+ elsif (meta = meta_content_type)
16
+ meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
17
+ end
18
+ end
19
+
20
+ ###
21
+ # Set the meta tag encoding for this document.
22
+ #
23
+ # If an meta encoding tag is already present, its content is
24
+ # replaced with the given text.
25
+ #
26
+ # Otherwise, this method tries to create one at an appropriate
27
+ # place supplying head and/or html elements as necessary, which
28
+ # is inside a head element if any, and before any text node or
29
+ # content element (typically <body>) if any.
30
+ #
31
+ # The result when trying to set an encoding that is different
32
+ # from the document encoding is undefined.
33
+ #
34
+ # Beware in CRuby, that libxml2 automatically inserts a meta tag
35
+ # into a head element.
36
+ def meta_encoding=(encoding)
37
+ if (meta = meta_content_type)
38
+ meta["content"] = format("text/html; charset=%s", encoding)
39
+ encoding
40
+ elsif (meta = at_xpath("//meta[@charset]"))
41
+ meta["charset"] = encoding
42
+ else
43
+ meta = XML::Node.new("meta", self)
44
+ if (dtd = internal_subset) && dtd.html5_dtd?
45
+ meta["charset"] = encoding
46
+ else
47
+ meta["http-equiv"] = "Content-Type"
48
+ meta["content"] = format("text/html; charset=%s", encoding)
49
+ end
50
+
51
+ if (head = at_xpath("//head"))
52
+ head.prepend_child(meta)
53
+ else
54
+ set_metadata_element(meta)
55
+ end
56
+ encoding
57
+ end
58
+ end
59
+
60
+ def meta_content_type
61
+ xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
62
+ node["http-equiv"] =~ /\AContent-Type\z/i
63
+ end
64
+ end
65
+ private :meta_content_type
66
+
67
+ ###
68
+ # Get the title string of this document. Return nil if there is
69
+ # no title tag.
70
+ def title
71
+ (title = at_xpath("//title")) && title.inner_text
72
+ end
73
+
74
+ ###
75
+ # Set the title string of this document.
76
+ #
77
+ # If a title element is already present, its content is replaced
78
+ # with the given text.
79
+ #
80
+ # Otherwise, this method tries to create one at an appropriate
81
+ # place supplying head and/or html elements as necessary, which
82
+ # is inside a head element if any, right after a meta
83
+ # encoding/charset tag if any, and before any text node or
84
+ # content element (typically <body>) if any.
85
+ def title=(text)
86
+ tnode = XML::Text.new(text, self)
87
+ if (title = at_xpath("//title"))
88
+ title.children = tnode
89
+ return text
90
+ end
91
+
92
+ title = XML::Node.new("title", self) << tnode
93
+ if (head = at_xpath("//head"))
94
+ head << title
95
+ elsif (meta = at_xpath("//meta[@charset]") || meta_content_type)
96
+ # better put after charset declaration
97
+ meta.add_next_sibling(title)
98
+ else
99
+ set_metadata_element(title)
100
+ end
101
+ end
102
+
103
+ def set_metadata_element(element) # rubocop:disable Naming/AccessorMethodName
104
+ if (head = at_xpath("//head"))
105
+ head << element
106
+ elsif (html = at_xpath("//html"))
107
+ head = html.prepend_child(XML::Node.new("head", self))
108
+ head.prepend_child(element)
109
+ elsif (first = children.find do |node|
110
+ case node
111
+ when XML::Element, XML::Text
112
+ true
113
+ end
114
+ end)
115
+ # We reach here only if the underlying document model
116
+ # allows <html>/<head> elements to be omitted and does not
117
+ # automatically supply them.
118
+ first.add_previous_sibling(element)
119
+ else
120
+ html = add_child(XML::Node.new("html", self))
121
+ head = html.add_child(XML::Node.new("head", self))
122
+ head.prepend_child(element)
123
+ end
124
+ end
125
+ private :set_metadata_element
126
+
127
+ ####
128
+ # Serialize Node using +options+. Save options can also be set using a block.
129
+ #
130
+ # See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
131
+ #
132
+ # These two statements are equivalent:
133
+ #
134
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
135
+ #
136
+ # or
137
+ #
138
+ # node.serialize(:encoding => 'UTF-8') do |config|
139
+ # config.format.as_xml
140
+ # end
141
+ #
142
+ def serialize(options = {})
143
+ options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
144
+ super
145
+ end
146
+
147
+ ####
148
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
149
+ def fragment(tags = nil)
150
+ DocumentFragment.new(self, tags, root)
151
+ end
152
+
153
+ # :call-seq:
154
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
155
+ #
156
+ # [Returns] The document type which determines CSS-to-XPath translation.
157
+ #
158
+ # See XPathVisitor for more information.
159
+ def xpath_doctype
160
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML4
161
+ end
162
+
163
+ class << self
164
+ # :call-seq:
165
+ # parse(input) { |options| ... } => Nokogiri::HTML4::Document
166
+ # parse(input, url:, encoding:, options:) => Nokogiri::HTML4::Document
167
+ #
168
+ # Parse \HTML4 input from a String or IO object, and return a new HTML4::Document.
169
+ #
170
+ # [Required Parameters]
171
+ # - +input+ (String | IO) The content to be parsed.
172
+ #
173
+ # [Optional Keyword Arguments]
174
+ # - +url:+ (String) The base URI for this document.
175
+ #
176
+ # - +encoding:+ (String) The name of the encoding that should be used when processing the
177
+ # document. When not provided, the encoding will be determined based on the document
178
+ # content.
179
+ #
180
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
181
+ # behaviors during parsing. See ParseOptions for more information. The default value is
182
+ # +ParseOptions::DEFAULT_HTML+.
183
+ #
184
+ # [Yields]
185
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
186
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
187
+ #
188
+ # [Returns] Nokogiri::HTML4::Document
189
+ def parse(
190
+ input,
191
+ url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML,
192
+ url: url_, encoding: encoding_, options: options_
193
+ )
194
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
195
+ yield options if block_given?
196
+
197
+ url ||= input.respond_to?(:path) ? input.path : nil
198
+
199
+ if input.respond_to?(:encoding)
200
+ unless input.encoding == Encoding::ASCII_8BIT
201
+ encoding ||= input.encoding.name
202
+ end
203
+ end
204
+
205
+ if input.respond_to?(:read)
206
+ if input.is_a?(Pathname)
207
+ # resolve the Pathname to the file and open it as an IO object, see #2110
208
+ input = input.expand_path.open
209
+ url ||= input.path
210
+ end
211
+
212
+ unless encoding
213
+ input = EncodingReader.new(input)
214
+ begin
215
+ return read_io(input, url, encoding, options.to_i)
216
+ rescue EncodingReader::EncodingFound => e
217
+ encoding = e.found_encoding
218
+ end
219
+ end
220
+ return read_io(input, url, encoding, options.to_i)
221
+ end
222
+
223
+ # read_memory pukes on empty docs
224
+ if input.nil? || input.empty?
225
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
226
+ end
227
+
228
+ encoding ||= EncodingReader.detect_encoding(input)
229
+
230
+ read_memory(input, url, encoding, options.to_i)
231
+ end
232
+ end
233
+ end
234
+ end
235
+ end
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module HTML4
5
+ class DocumentFragment < Nokogiri::XML::DocumentFragment
6
+ #
7
+ # :call-seq:
8
+ # parse(input) { |options| ... } → HTML4::DocumentFragment
9
+ # parse(input, encoding:, options:) { |options| ... } → HTML4::DocumentFragment
10
+ #
11
+ # Parse \HTML4 fragment input from a String, and return a new HTML4::DocumentFragment. This
12
+ # method creates a new, empty HTML4::Document to contain the fragment.
13
+ #
14
+ # [Required Parameters]
15
+ # - +input+ (String | IO) The content to be parsed.
16
+ #
17
+ # [Optional Keyword Arguments]
18
+ # - +encoding:+ (String) The name of the encoding that should be used when processing the
19
+ # document. When not provided, the encoding will be determined based on the document
20
+ # content.
21
+ #
22
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
23
+ # behaviors during parsing. See ParseOptions for more information. The default value is
24
+ # +ParseOptions::DEFAULT_HTML+.
25
+ #
26
+ # [Yields]
27
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
28
+ # can be configured before parsing. See ParseOptions for more information.
29
+ #
30
+ # [Returns] HTML4::DocumentFragment
31
+ #
32
+ # *Example:* Parsing a string
33
+ #
34
+ # fragment = HTML4::DocumentFragment.parse("<div>Hello World</div>")
35
+ #
36
+ # *Example:* Parsing an IO
37
+ #
38
+ # fragment = File.open("fragment.html") do |file|
39
+ # HTML4::DocumentFragment.parse(file)
40
+ # end
41
+ #
42
+ # *Example:* Specifying encoding
43
+ #
44
+ # fragment = HTML4::DocumentFragment.parse(input, encoding: "EUC-JP")
45
+ #
46
+ # *Example:* Setting parse options dynamically
47
+ #
48
+ # HTML4::DocumentFragment.parse("<div>Hello World") do |options|
49
+ # options.huge.pedantic
50
+ # end
51
+ #
52
+ def self.parse(
53
+ input,
54
+ encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML,
55
+ encoding: encoding_, options: options_,
56
+ &block
57
+ )
58
+ # TODO: this method should take a context node.
59
+ doc = HTML4::Document.new
60
+
61
+ if input.respond_to?(:read)
62
+ # Handle IO-like objects (IO, File, StringIO, etc.)
63
+ # The _read_ method of these objects doesn't accept an +encoding+ parameter.
64
+ # Encoding is usually set when the IO object is created or opened,
65
+ # or by using the _set_encoding_ method.
66
+ #
67
+ # 1. If +encoding+ is provided and the object supports _set_encoding_,
68
+ # set the encoding before reading.
69
+ # 2. Read the content from the IO-like object.
70
+ #
71
+ # Note: After reading, the content's encoding will be:
72
+ # - The encoding set by _set_encoding_ if it was called
73
+ # - The default encoding of the IO object otherwise
74
+ #
75
+ # For StringIO specifically, _set_encoding_ affects only the internal string,
76
+ # not how the data is read out.
77
+ input.set_encoding(encoding) if encoding && input.respond_to?(:set_encoding)
78
+ input = input.read
79
+ end
80
+
81
+ encoding ||= if input.respond_to?(:encoding)
82
+ encoding = input.encoding
83
+ if encoding == ::Encoding::ASCII_8BIT
84
+ "UTF-8"
85
+ else
86
+ encoding.name
87
+ end
88
+ else
89
+ "UTF-8"
90
+ end
91
+
92
+ doc.encoding = encoding
93
+
94
+ new(doc, input, options: options, &block)
95
+ end
96
+
97
+ #
98
+ # :call-seq:
99
+ # new(document) { |options| ... } → HTML4::DocumentFragment
100
+ # new(document, input) { |options| ... } → HTML4::DocumentFragment
101
+ # new(document, input, context:, options:) { |options| ... } → HTML4::DocumentFragment
102
+ #
103
+ # Parse \HTML4 fragment input from a String, and return a new HTML4::DocumentFragment.
104
+ #
105
+ # 💡 It's recommended to use either HTML4::DocumentFragment.parse or XML::Node#parse rather
106
+ # than call this method directly.
107
+ #
108
+ # [Required Parameters]
109
+ # - +document+ (HTML4::Document) The parent document to associate the returned fragment with.
110
+ #
111
+ # [Optional Parameters]
112
+ # - +input+ (String) The content to be parsed.
113
+ #
114
+ # [Optional Keyword Arguments]
115
+ # - +context:+ (Nokogiri::XML::Node) The <b>context node</b> for the subtree created. See
116
+ # below for more information.
117
+ #
118
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
119
+ # behaviors during parsing. See ParseOptions for more information. The default value is
120
+ # +ParseOptions::DEFAULT_HTML+.
121
+ #
122
+ # [Yields]
123
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
124
+ # can be configured before parsing. See ParseOptions for more information.
125
+ #
126
+ # [Returns] HTML4::DocumentFragment
127
+ #
128
+ # === Context \Node
129
+ #
130
+ # If a context node is specified using +context:+, then the fragment will be created by
131
+ # calling XML::Node#parse on that node, so the parser will behave as if that Node is the
132
+ # parent of the fragment subtree.
133
+ #
134
+ def initialize(
135
+ document, input = nil,
136
+ context_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML,
137
+ context: context_, options: options_
138
+ ) # rubocop:disable Lint/MissingSuper
139
+ return self unless input
140
+
141
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
142
+ @parse_options = options
143
+ yield options if block_given?
144
+
145
+ if context
146
+ preexisting_errors = document.errors.dup
147
+ node_set = context.parse("<div>#{input}</div>", options)
148
+ node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
149
+ self.errors = document.errors - preexisting_errors
150
+ else
151
+ # This is a horrible hack, but I don't care
152
+ path = if /^\s*?<body/i.match?(input)
153
+ "/html/body"
154
+ else
155
+ "/html/body/node()"
156
+ end
157
+
158
+ temp_doc = HTML4::Document.parse("<html><body>#{input}", nil, document.encoding, options)
159
+ temp_doc.xpath(path).each { |child| child.parent = self }
160
+ self.errors = temp_doc.errors
161
+ end
162
+ children
163
+ end
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module HTML4
5
+ class ElementDescription
6
+ ###
7
+ # Is this element a block element?
8
+ def block?
9
+ !inline?
10
+ end
11
+
12
+ ###
13
+ # Convert this description to a string
14
+ def to_s
15
+ "#{name}: #{description}"
16
+ end
17
+
18
+ ###
19
+ # Inspection information
20
+ def inspect
21
+ "#<#{self.class.name}: #{name} #{description}>"
22
+ end
23
+ end
24
+ end
25
+ end