nokogiri 1.11.0.rc4-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (218) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE-DEPENDENCIES.md +1682 -0
  3. data/LICENSE.md +9 -0
  4. data/README.md +200 -0
  5. data/bin/nokogiri +118 -0
  6. data/dependencies.yml +74 -0
  7. data/ext/nokogiri/depend +477 -0
  8. data/ext/nokogiri/extconf.rb +819 -0
  9. data/ext/nokogiri/html_document.c +171 -0
  10. data/ext/nokogiri/html_document.h +10 -0
  11. data/ext/nokogiri/html_element_description.c +279 -0
  12. data/ext/nokogiri/html_element_description.h +10 -0
  13. data/ext/nokogiri/html_entity_lookup.c +32 -0
  14. data/ext/nokogiri/html_entity_lookup.h +8 -0
  15. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  16. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  17. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  18. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  19. data/ext/nokogiri/include/libexslt/exslt.h +102 -0
  20. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  21. data/ext/nokogiri/include/libexslt/exsltexports.h +140 -0
  22. data/ext/nokogiri/include/libxml2/libxml/DOCBparser.h +96 -0
  23. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  24. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  25. data/ext/nokogiri/include/libxml2/libxml/SAX.h +173 -0
  26. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +178 -0
  27. data/ext/nokogiri/include/libxml2/libxml/c14n.h +126 -0
  28. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  29. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  30. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  31. data/ext/nokogiri/include/libxml2/libxml/dict.h +79 -0
  32. data/ext/nokogiri/include/libxml2/libxml/encoding.h +245 -0
  33. data/ext/nokogiri/include/libxml2/libxml/entities.h +151 -0
  34. data/ext/nokogiri/include/libxml2/libxml/globals.h +508 -0
  35. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  36. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  37. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +163 -0
  38. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  39. data/ext/nokogiri/include/libxml2/libxml/parser.h +1241 -0
  40. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +644 -0
  41. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  42. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +217 -0
  43. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  44. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  45. data/ext/nokogiri/include/libxml2/libxml/threads.h +89 -0
  46. data/ext/nokogiri/include/libxml2/libxml/tree.h +1311 -0
  47. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  48. data/ext/nokogiri/include/libxml2/libxml/valid.h +458 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +366 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +945 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +153 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +224 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +151 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +485 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  66. data/ext/nokogiri/include/libxml2/libxml/xpath.h +566 -0
  67. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  68. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +114 -0
  69. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  70. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  71. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  72. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  73. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  74. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  75. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  76. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  77. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  78. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  79. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  80. data/ext/nokogiri/include/libxslt/security.h +104 -0
  81. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  82. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  83. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  84. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  85. data/ext/nokogiri/include/libxslt/xsltInternals.h +1978 -0
  86. data/ext/nokogiri/include/libxslt/xsltconfig.h +180 -0
  87. data/ext/nokogiri/include/libxslt/xsltexports.h +142 -0
  88. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  89. data/ext/nokogiri/include/libxslt/xsltutils.h +313 -0
  90. data/ext/nokogiri/nokogiri.c +135 -0
  91. data/ext/nokogiri/nokogiri.h +130 -0
  92. data/ext/nokogiri/xml_attr.c +103 -0
  93. data/ext/nokogiri/xml_attr.h +9 -0
  94. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  95. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  96. data/ext/nokogiri/xml_cdata.c +62 -0
  97. data/ext/nokogiri/xml_cdata.h +9 -0
  98. data/ext/nokogiri/xml_comment.c +69 -0
  99. data/ext/nokogiri/xml_comment.h +9 -0
  100. data/ext/nokogiri/xml_document.c +622 -0
  101. data/ext/nokogiri/xml_document.h +23 -0
  102. data/ext/nokogiri/xml_document_fragment.c +48 -0
  103. data/ext/nokogiri/xml_document_fragment.h +10 -0
  104. data/ext/nokogiri/xml_dtd.c +202 -0
  105. data/ext/nokogiri/xml_dtd.h +10 -0
  106. data/ext/nokogiri/xml_element_content.c +123 -0
  107. data/ext/nokogiri/xml_element_content.h +10 -0
  108. data/ext/nokogiri/xml_element_decl.c +69 -0
  109. data/ext/nokogiri/xml_element_decl.h +9 -0
  110. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  111. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  112. data/ext/nokogiri/xml_entity_decl.c +110 -0
  113. data/ext/nokogiri/xml_entity_decl.h +10 -0
  114. data/ext/nokogiri/xml_entity_reference.c +52 -0
  115. data/ext/nokogiri/xml_entity_reference.h +9 -0
  116. data/ext/nokogiri/xml_io.c +63 -0
  117. data/ext/nokogiri/xml_io.h +11 -0
  118. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  119. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  120. data/ext/nokogiri/xml_namespace.c +111 -0
  121. data/ext/nokogiri/xml_namespace.h +14 -0
  122. data/ext/nokogiri/xml_node.c +1773 -0
  123. data/ext/nokogiri/xml_node.h +13 -0
  124. data/ext/nokogiri/xml_node_set.c +486 -0
  125. data/ext/nokogiri/xml_node_set.h +12 -0
  126. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  127. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  128. data/ext/nokogiri/xml_reader.c +657 -0
  129. data/ext/nokogiri/xml_reader.h +10 -0
  130. data/ext/nokogiri/xml_relax_ng.c +179 -0
  131. data/ext/nokogiri/xml_relax_ng.h +9 -0
  132. data/ext/nokogiri/xml_sax_parser.c +305 -0
  133. data/ext/nokogiri/xml_sax_parser.h +39 -0
  134. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  135. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  136. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  137. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  138. data/ext/nokogiri/xml_schema.c +276 -0
  139. data/ext/nokogiri/xml_schema.h +9 -0
  140. data/ext/nokogiri/xml_syntax_error.c +64 -0
  141. data/ext/nokogiri/xml_syntax_error.h +13 -0
  142. data/ext/nokogiri/xml_text.c +52 -0
  143. data/ext/nokogiri/xml_text.h +9 -0
  144. data/ext/nokogiri/xml_xpath_context.c +374 -0
  145. data/ext/nokogiri/xml_xpath_context.h +10 -0
  146. data/ext/nokogiri/xslt_stylesheet.c +263 -0
  147. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  148. data/lib/nokogiri.rb +127 -0
  149. data/lib/nokogiri/2.5/nokogiri.bundle +0 -0
  150. data/lib/nokogiri/2.6/nokogiri.bundle +0 -0
  151. data/lib/nokogiri/2.7/nokogiri.bundle +0 -0
  152. data/lib/nokogiri/3.0/nokogiri.bundle +0 -0
  153. data/lib/nokogiri/css.rb +28 -0
  154. data/lib/nokogiri/css/node.rb +53 -0
  155. data/lib/nokogiri/css/parser.rb +751 -0
  156. data/lib/nokogiri/css/parser.y +272 -0
  157. data/lib/nokogiri/css/parser_extras.rb +94 -0
  158. data/lib/nokogiri/css/syntax_error.rb +8 -0
  159. data/lib/nokogiri/css/tokenizer.rb +154 -0
  160. data/lib/nokogiri/css/tokenizer.rex +55 -0
  161. data/lib/nokogiri/css/xpath_visitor.rb +260 -0
  162. data/lib/nokogiri/decorators/slop.rb +43 -0
  163. data/lib/nokogiri/html.rb +38 -0
  164. data/lib/nokogiri/html/builder.rb +36 -0
  165. data/lib/nokogiri/html/document.rb +322 -0
  166. data/lib/nokogiri/html/document_fragment.rb +50 -0
  167. data/lib/nokogiri/html/element_description.rb +24 -0
  168. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  169. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  170. data/lib/nokogiri/html/sax/parser.rb +63 -0
  171. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  172. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  173. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  174. data/lib/nokogiri/syntax_error.rb +5 -0
  175. data/lib/nokogiri/version.rb +3 -0
  176. data/lib/nokogiri/version/constant.rb +5 -0
  177. data/lib/nokogiri/version/info.rb +182 -0
  178. data/lib/nokogiri/xml.rb +76 -0
  179. data/lib/nokogiri/xml/attr.rb +15 -0
  180. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  181. data/lib/nokogiri/xml/builder.rb +447 -0
  182. data/lib/nokogiri/xml/cdata.rb +12 -0
  183. data/lib/nokogiri/xml/character_data.rb +8 -0
  184. data/lib/nokogiri/xml/document.rb +290 -0
  185. data/lib/nokogiri/xml/document_fragment.rb +159 -0
  186. data/lib/nokogiri/xml/dtd.rb +33 -0
  187. data/lib/nokogiri/xml/element_content.rb +37 -0
  188. data/lib/nokogiri/xml/element_decl.rb +14 -0
  189. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  190. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  191. data/lib/nokogiri/xml/namespace.rb +14 -0
  192. data/lib/nokogiri/xml/node.rb +1240 -0
  193. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  194. data/lib/nokogiri/xml/node_set.rb +372 -0
  195. data/lib/nokogiri/xml/notation.rb +7 -0
  196. data/lib/nokogiri/xml/parse_options.rb +127 -0
  197. data/lib/nokogiri/xml/pp.rb +3 -0
  198. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  199. data/lib/nokogiri/xml/pp/node.rb +57 -0
  200. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  201. data/lib/nokogiri/xml/reader.rb +116 -0
  202. data/lib/nokogiri/xml/relax_ng.rb +37 -0
  203. data/lib/nokogiri/xml/sax.rb +5 -0
  204. data/lib/nokogiri/xml/sax/document.rb +172 -0
  205. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  206. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  207. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  208. data/lib/nokogiri/xml/schema.rb +72 -0
  209. data/lib/nokogiri/xml/searchable.rb +239 -0
  210. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  211. data/lib/nokogiri/xml/text.rb +10 -0
  212. data/lib/nokogiri/xml/xpath.rb +11 -0
  213. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  214. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  215. data/lib/nokogiri/xslt.rb +57 -0
  216. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  217. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  218. metadata +565 -0
@@ -0,0 +1,55 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Tokenizer # :nodoc:
4
+
5
+ macro
6
+ nl \n|\r\n|\r|\f
7
+ w [\s]*
8
+ nonascii [^\0-\177]
9
+ num -?([0-9]+|[0-9]*\.[0-9]+)
10
+ unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
11
+
12
+ escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
13
+ nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_A-Za-z]|{nonascii}|{escape}
15
+ ident [-@]?({nmstart})({nmchar})*
16
+ name ({nmchar})+
17
+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
18
+ string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
19
+ string {string1}|{string2}
20
+
21
+ rule
22
+
23
+ # [:state] pattern [actions]
24
+
25
+ has\({w} { [:HAS, text] }
26
+ {ident}\({w} { [:FUNCTION, text] }
27
+ {ident} { [:IDENT, text] }
28
+ \#{name} { [:HASH, text] }
29
+ {w}~={w} { [:INCLUDES, text] }
30
+ {w}\|={w} { [:DASHMATCH, text] }
31
+ {w}\^={w} { [:PREFIXMATCH, text] }
32
+ {w}\$={w} { [:SUFFIXMATCH, text] }
33
+ {w}\*={w} { [:SUBSTRINGMATCH, text] }
34
+ {w}!={w} { [:NOT_EQUAL, text] }
35
+ {w}={w} { [:EQUAL, text] }
36
+ {w}\) { [:RPAREN, text] }
37
+ \[{w} { [:LSQUARE, text] }
38
+ {w}\] { [:RSQUARE, text] }
39
+ {w}\+{w} { [:PLUS, text] }
40
+ {w}>{w} { [:GREATER, text] }
41
+ {w},{w} { [:COMMA, text] }
42
+ {w}~{w} { [:TILDE, text] }
43
+ \:not\({w} { [:NOT, text] }
44
+ {num} { [:NUMBER, text] }
45
+ {w}\/\/{w} { [:DOUBLESLASH, text] }
46
+ {w}\/{w} { [:SLASH, text] }
47
+
48
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
49
+
50
+ [\s]+ { [:S, text] }
51
+ {string} { [:STRING, text] }
52
+ . { [text, text] }
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,260 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module CSS
4
+ class XPathVisitor # :nodoc:
5
+ def visit_function node
6
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
+ return self.send(msg, node) if self.respond_to?(msg)
8
+
9
+ case node.value.first
10
+ when /^text\(/
11
+ 'child::text()'
12
+ when /^self\(/
13
+ "self::#{node.value[1]}"
14
+ when /^eq\(/
15
+ "position()=#{node.value[1]}"
16
+ when /^(nth|nth-of-type)\(/
17
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
+ nth(node.value[1])
19
+ else
20
+ "position()=#{node.value[1]}"
21
+ end
22
+ when /^nth-child\(/
23
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
+ nth(node.value[1], :child => true)
25
+ else
26
+ "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
27
+ end
28
+ when /^nth-last-of-type\(/
29
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
+ nth(node.value[1], :last => true)
31
+ else
32
+ index = node.value[1].to_i - 1
33
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
+ end
35
+ when /^nth-last-child\(/
36
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
+ nth(node.value[1], :last => true, :child => true)
38
+ else
39
+ "count(following-sibling::*)=#{node.value[1].to_i-1}"
40
+ end
41
+ when /^(first|first-of-type)\(/
42
+ "position()=1"
43
+ when /^(last|last-of-type)\(/
44
+ "position()=last()"
45
+ when /^contains\(/
46
+ "contains(.,#{node.value[1]})"
47
+ when /^gt\(/
48
+ "position()>#{node.value[1]}"
49
+ when /^only-child\(/
50
+ "last()=1"
51
+ when /^comment\(/
52
+ "comment()"
53
+ when /^has\(/
54
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
55
+ ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
56
+ else
57
+ # non-standard. this looks like a function call.
58
+ args = ['.'] + node.value[1..-1]
59
+ "#{node.value.first}#{args.join(',')})"
60
+ end
61
+ end
62
+
63
+ def visit_not node
64
+ child = node.value.first
65
+ if :ELEMENT_NAME == child.type
66
+ "not(self::#{child.accept(self)})"
67
+ else
68
+ "not(#{child.accept(self)})"
69
+ end
70
+ end
71
+
72
+ def visit_id node
73
+ node.value.first =~ /^#(.*)$/
74
+ "@id='#{$1}'"
75
+ end
76
+
77
+ def visit_attribute_condition node
78
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
+ ''
80
+ else
81
+ '@'
82
+ end
83
+ attribute += node.value.first.accept(self)
84
+
85
+ # non-standard. attributes starting with '@'
86
+ attribute.gsub!(/^@@/, '@')
87
+
88
+ return attribute unless node.value.length == 3
89
+
90
+ value = node.value.last
91
+ value = "'#{value}'" if value !~ /^['"]/
92
+
93
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
94
+ if (value[0]==value[-1]) && %q{"'}.include?(value[0])
95
+ str_value = value[1..-2]
96
+ if str_value.include?(value[0])
97
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
98
+ end
99
+ end
100
+
101
+ case node.value[1]
102
+ when :equal
103
+ attribute + "=" + "#{value}"
104
+ when :not_equal
105
+ attribute + "!=" + "#{value}"
106
+ when :substring_match
107
+ "contains(#{attribute},#{value})"
108
+ when :prefix_match
109
+ "starts-with(#{attribute},#{value})"
110
+ when :dash_match
111
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
112
+ when :includes
113
+ value = value[1..-2] # strip quotes
114
+ css_class(attribute, value)
115
+ when :suffix_match
116
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
117
+ else
118
+ attribute + " #{node.value[1]} " + "#{value}"
119
+ end
120
+ end
121
+
122
+ def visit_pseudo_class node
123
+ if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
124
+ node.value.first.accept(self)
125
+ else
126
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
127
+ return self.send(msg, node) if self.respond_to?(msg)
128
+
129
+ case node.value.first
130
+ when "first" then "position()=1"
131
+ when "first-child" then "count(preceding-sibling::*)=0"
132
+ when "last" then "position()=last()"
133
+ when "last-child" then "count(following-sibling::*)=0"
134
+ when "first-of-type" then "position()=1"
135
+ when "last-of-type" then "position()=last()"
136
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
137
+ when "only-of-type" then "last()=1"
138
+ when "empty" then "not(node())"
139
+ when "parent" then "node()"
140
+ when "root" then "not(parent::*)"
141
+ else
142
+ node.value.first + "(.)"
143
+ end
144
+ end
145
+ end
146
+
147
+ def visit_class_condition node
148
+ css_class("@class", node.value.first)
149
+ end
150
+
151
+ def visit_combinator node
152
+ if is_of_type_pseudo_class?(node.value.last)
153
+ "#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
154
+ else
155
+ "#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
156
+ end
157
+ end
158
+
159
+ {
160
+ 'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
161
+ 'following_selector' => "/following-sibling::",
162
+ 'descendant_selector' => '//',
163
+ 'child_selector' => '/',
164
+ }.each do |k,v|
165
+ class_eval %{
166
+ def visit_#{k} node
167
+ "\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
168
+ end
169
+ }
170
+ end
171
+
172
+ def visit_conditional_selector node
173
+ node.value.first.accept(self) + '[' +
174
+ node.value.last.accept(self) + ']'
175
+ end
176
+
177
+ def visit_element_name node
178
+ node.value.first
179
+ end
180
+
181
+ def accept node
182
+ node.accept(self)
183
+ end
184
+
185
+ private
186
+
187
+ def nth node, options={}
188
+ raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
189
+
190
+ a, b = read_a_and_positive_b node.value
191
+ position = if options[:child]
192
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
193
+ else
194
+ options[:last] ? "(last()-position()+1)" : "position()"
195
+ end
196
+
197
+ if b.zero?
198
+ "(#{position} mod #{a})=0"
199
+ else
200
+ compare = a < 0 ? "<=" : ">="
201
+ if a.abs == 1
202
+ "#{position}#{compare}#{b}"
203
+ else
204
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
205
+ end
206
+ end
207
+ end
208
+
209
+ def read_a_and_positive_b values
210
+ op = values[2]
211
+ if op == "+"
212
+ a = values[0].to_i
213
+ b = values[3].to_i
214
+ elsif op == "-"
215
+ a = values[0].to_i
216
+ b = a - (values[3].to_i % a)
217
+ else
218
+ raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
219
+ end
220
+ [a, b]
221
+ end
222
+
223
+ def is_of_type_pseudo_class? node
224
+ if node.type==:PSEUDO_CLASS
225
+ if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
226
+ node.value[0].value[0]
227
+ else
228
+ node.value[0]
229
+ end =~ /(nth|first|last|only)-of-type(\()?/
230
+ end
231
+ end
232
+
233
+ # use only ordinary xpath functions
234
+ def css_class_standard(hay, needle)
235
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
+ end
237
+
238
+ # use the builtin implementation
239
+ def css_class_builtin(hay, needle)
240
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
241
+ end
242
+
243
+ alias_method :css_class, :css_class_standard
244
+ end
245
+
246
+ class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
+ private
248
+ alias_method :css_class, :css_class_builtin
249
+ end
250
+
251
+ class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
+ private
253
+ if Nokogiri.uses_libxml?
254
+ alias_method :css_class, :css_class_builtin
255
+ else
256
+ alias_method :css_class, :css_class_standard
257
+ end
258
+ end
259
+ end
260
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module Decorators
4
+ ###
5
+ # The Slop decorator implements method missing such that a methods may be
6
+ # used instead of XPath or CSS. See Nokogiri.Slop
7
+ module Slop
8
+ # The default XPath search context for Slop
9
+ XPATH_PREFIX = "./"
10
+
11
+ ###
12
+ # look for node with +name+. See Nokogiri.Slop
13
+ def method_missing name, *args, &block
14
+ if args.empty?
15
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
16
+ elsif args.first.is_a? Hash
17
+ hash = args.first
18
+ if hash[:css]
19
+ list = css("#{name}#{hash[:css]}")
20
+ elsif hash[:xpath]
21
+ conds = Array(hash[:xpath]).join(' and ')
22
+ list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
23
+ end
24
+ else
25
+ CSS::Parser.without_cache do
26
+ list = xpath(
27
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
28
+ )
29
+ end
30
+ end
31
+
32
+ super if list.empty?
33
+ list.length == 1 ? list.first : list
34
+ end
35
+
36
+ def respond_to_missing? name, include_private = false
37
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
38
+
39
+ !list.empty?
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+ require 'nokogiri/html/entity_lookup'
3
+ require 'nokogiri/html/document'
4
+ require 'nokogiri/html/document_fragment'
5
+ require 'nokogiri/html/sax/parser_context'
6
+ require 'nokogiri/html/sax/parser'
7
+ require 'nokogiri/html/sax/push_parser'
8
+ require 'nokogiri/html/element_description'
9
+ require 'nokogiri/html/element_description_defaults'
10
+
11
+ module Nokogiri
12
+ class << self
13
+ ###
14
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
15
+ def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
16
+ Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
17
+ end
18
+ end
19
+
20
+ module HTML
21
+ class << self
22
+ ###
23
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
24
+ def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
25
+ Document.parse(thing, url, encoding, options, &block)
26
+ end
27
+
28
+ ####
29
+ # Parse a fragment from +string+ in to a NodeSet.
30
+ def fragment string, encoding = nil
31
+ HTML::DocumentFragment.parse string, encoding
32
+ end
33
+ end
34
+
35
+ # Instance of Nokogiri::HTML::EntityLookup
36
+ NamedCharacters = EntityLookup.new
37
+ end
38
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module HTML
4
+ ###
5
+ # Nokogiri HTML builder is used for building HTML documents. It is very
6
+ # similar to the Nokogiri::XML::Builder. In fact, you should go read the
7
+ # documentation for Nokogiri::XML::Builder before reading this
8
+ # documentation.
9
+ #
10
+ # == Synopsis:
11
+ #
12
+ # Create an HTML document with a body that has an onload attribute, and a
13
+ # span tag with a class of "bold" that has content of "Hello world".
14
+ #
15
+ # builder = Nokogiri::HTML::Builder.new do |doc|
16
+ # doc.html {
17
+ # doc.body(:onload => 'some_func();') {
18
+ # doc.span.bold {
19
+ # doc.text "Hello world"
20
+ # }
21
+ # }
22
+ # }
23
+ # end
24
+ # puts builder.to_html
25
+ #
26
+ # The HTML builder inherits from the XML builder, so make sure to read the
27
+ # Nokogiri::XML::Builder documentation.
28
+ class Builder < Nokogiri::XML::Builder
29
+ ###
30
+ # Convert the builder to HTML
31
+ def to_html
32
+ @doc.to_html
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,322 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ class Document < Nokogiri::XML::Document
8
+ ###
9
+ # Get the meta tag encoding for this document. If there is no meta tag,
10
+ # then nil is returned.
11
+ def meta_encoding
12
+ case
13
+ when meta = at('//meta[@charset]')
14
+ meta[:charset]
15
+ when meta = meta_content_type
16
+ meta['content'][/charset\s*=\s*([\w-]+)/i, 1]
17
+ end
18
+ end
19
+
20
+ ###
21
+ # Set the meta tag encoding for this document.
22
+ #
23
+ # If an meta encoding tag is already present, its content is
24
+ # replaced with the given text.
25
+ #
26
+ # Otherwise, this method tries to create one at an appropriate
27
+ # place supplying head and/or html elements as necessary, which
28
+ # is inside a head element if any, and before any text node or
29
+ # content element (typically <body>) if any.
30
+ #
31
+ # The result when trying to set an encoding that is different
32
+ # from the document encoding is undefined.
33
+ #
34
+ # Beware in CRuby, that libxml2 automatically inserts a meta tag
35
+ # into a head element.
36
+ def meta_encoding= encoding
37
+ case
38
+ when meta = meta_content_type
39
+ meta['content'] = 'text/html; charset=%s' % encoding
40
+ encoding
41
+ when meta = at('//meta[@charset]')
42
+ meta['charset'] = encoding
43
+ else
44
+ meta = XML::Node.new('meta', self)
45
+ if dtd = internal_subset and dtd.html5_dtd?
46
+ meta['charset'] = encoding
47
+ else
48
+ meta['http-equiv'] = 'Content-Type'
49
+ meta['content'] = 'text/html; charset=%s' % encoding
50
+ end
51
+
52
+ case
53
+ when head = at('//head')
54
+ head.prepend_child(meta)
55
+ else
56
+ set_metadata_element(meta)
57
+ end
58
+ encoding
59
+ end
60
+ end
61
+
62
+ def meta_content_type
63
+ xpath('//meta[@http-equiv and boolean(@content)]').find { |node|
64
+ node['http-equiv'] =~ /\AContent-Type\z/i
65
+ }
66
+ end
67
+ private :meta_content_type
68
+
69
+ ###
70
+ # Get the title string of this document. Return nil if there is
71
+ # no title tag.
72
+ def title
73
+ title = at('//title') and title.inner_text
74
+ end
75
+
76
+ ###
77
+ # Set the title string of this document.
78
+ #
79
+ # If a title element is already present, its content is replaced
80
+ # with the given text.
81
+ #
82
+ # Otherwise, this method tries to create one at an appropriate
83
+ # place supplying head and/or html elements as necessary, which
84
+ # is inside a head element if any, right after a meta
85
+ # encoding/charset tag if any, and before any text node or
86
+ # content element (typically <body>) if any.
87
+ def title=(text)
88
+ tnode = XML::Text.new(text, self)
89
+ if title = at('//title')
90
+ title.children = tnode
91
+ return text
92
+ end
93
+
94
+ title = XML::Node.new('title', self) << tnode
95
+ case
96
+ when head = at('//head')
97
+ head << title
98
+ when meta = at('//meta[@charset]') || meta_content_type
99
+ # better put after charset declaration
100
+ meta.add_next_sibling(title)
101
+ else
102
+ set_metadata_element(title)
103
+ end
104
+ text
105
+ end
106
+
107
+ def set_metadata_element(element)
108
+ case
109
+ when head = at('//head')
110
+ head << element
111
+ when html = at('//html')
112
+ head = html.prepend_child(XML::Node.new('head', self))
113
+ head.prepend_child(element)
114
+ when first = children.find { |node|
115
+ case node
116
+ when XML::Element, XML::Text
117
+ true
118
+ end
119
+ }
120
+ # We reach here only if the underlying document model
121
+ # allows <html>/<head> elements to be omitted and does not
122
+ # automatically supply them.
123
+ first.add_previous_sibling(element)
124
+ else
125
+ html = add_child(XML::Node.new('html', self))
126
+ head = html.add_child(XML::Node.new('head', self))
127
+ head.prepend_child(element)
128
+ end
129
+ end
130
+ private :set_metadata_element
131
+
132
+ ####
133
+ # Serialize Node using +options+. Save options can also be set using a
134
+ # block. See SaveOptions.
135
+ #
136
+ # These two statements are equivalent:
137
+ #
138
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
139
+ #
140
+ # or
141
+ #
142
+ # node.serialize(:encoding => 'UTF-8') do |config|
143
+ # config.format.as_xml
144
+ # end
145
+ #
146
+ def serialize options = {}
147
+ options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
148
+ super
149
+ end
150
+
151
+ ####
152
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
153
+ def fragment tags = nil
154
+ DocumentFragment.new(self, tags, self.root)
155
+ end
156
+
157
+ class << self
158
+ ###
159
+ # Parse HTML. +string_or_io+ may be a String, or any object that
160
+ # responds to _read_ and _close_ such as an IO, or StringIO.
161
+ # +url+ is resource where this document is located. +encoding+ is the
162
+ # encoding that should be used when processing the document. +options+
163
+ # is a number that sets options in the parser, such as
164
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
165
+ # Nokogiri::XML::ParseOptions.
166
+ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
167
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
168
+
169
+ yield options if block_given?
170
+
171
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
172
+
173
+ if string_or_io.respond_to?(:encoding)
174
+ unless string_or_io.encoding.name == "ASCII-8BIT"
175
+ encoding ||= string_or_io.encoding.name
176
+ end
177
+ end
178
+
179
+ if string_or_io.respond_to?(:read)
180
+ if string_or_io.is_a?(Pathname)
181
+ # resolve the Pathname to the file and open it as an IO object, see #2110
182
+ string_or_io = string_or_io.expand_path.open
183
+ url ||= string_or_io.path
184
+ end
185
+
186
+ unless encoding
187
+ # Libxml2's parser has poor support for encoding
188
+ # detection. First, it does not recognize the HTML5
189
+ # style meta charset declaration. Secondly, even if it
190
+ # successfully detects an encoding hint, it does not
191
+ # re-decode or re-parse the preceding part which may be
192
+ # garbled.
193
+ #
194
+ # EncodingReader aims to perform advanced encoding
195
+ # detection beyond what Libxml2 does, and to emulate
196
+ # rewinding of a stream and make Libxml2 redo parsing
197
+ # from the start when an encoding hint is found.
198
+ string_or_io = EncodingReader.new(string_or_io)
199
+ begin
200
+ return read_io(string_or_io, url, encoding, options.to_i)
201
+ rescue EncodingFound => e
202
+ encoding = e.found_encoding
203
+ end
204
+ end
205
+ return read_io(string_or_io, url, encoding, options.to_i)
206
+ end
207
+
208
+ # read_memory pukes on empty docs
209
+ if string_or_io.nil? or string_or_io.empty?
210
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
211
+ end
212
+
213
+ encoding ||= EncodingReader.detect_encoding(string_or_io)
214
+
215
+ read_memory(string_or_io, url, encoding, options.to_i)
216
+ end
217
+ end
218
+
219
+ class EncodingFound < StandardError # :nodoc:
220
+ attr_reader :found_encoding
221
+
222
+ def initialize(encoding)
223
+ @found_encoding = encoding
224
+ super("encoding found: %s" % encoding)
225
+ end
226
+ end
227
+
228
+ class EncodingReader # :nodoc:
229
+ class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
230
+ attr_reader :encoding
231
+
232
+ def initialize
233
+ @encoding = nil
234
+ super()
235
+ end
236
+
237
+ def start_element(name, attrs = [])
238
+ return unless name == 'meta'
239
+ attr = Hash[attrs]
240
+ charset = attr['charset'] and
241
+ @encoding = charset
242
+ http_equiv = attr['http-equiv'] and
243
+ http_equiv.match(/\AContent-Type\z/i) and
244
+ content = attr['content'] and
245
+ m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
246
+ @encoding = m[1]
247
+ end
248
+ end
249
+
250
+ class JumpSAXHandler < SAXHandler
251
+ def initialize(jumptag)
252
+ @jumptag = jumptag
253
+ super()
254
+ end
255
+
256
+ def start_element(name, attrs = [])
257
+ super
258
+ throw @jumptag, @encoding if @encoding
259
+ throw @jumptag, nil if name =~ /\A(?:div|h1|img|p|br)\z/
260
+ end
261
+ end
262
+
263
+ def self.detect_encoding(chunk)
264
+ m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
265
+ return Nokogiri.XML(m[1]).encoding
266
+
267
+ if Nokogiri.jruby?
268
+ m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
269
+ return m[4]
270
+ catch(:encoding_found) {
271
+ Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
272
+ nil
273
+ }
274
+ else
275
+ handler = SAXHandler.new
276
+ parser = Nokogiri::HTML::SAX::PushParser.new(handler)
277
+ parser << chunk rescue Nokogiri::SyntaxError
278
+ handler.encoding
279
+ end
280
+ end
281
+
282
+ def initialize(io)
283
+ @io = io
284
+ @firstchunk = nil
285
+ @encoding_found = nil
286
+ end
287
+
288
+ # This method is used by the C extension so that
289
+ # Nokogiri::HTML::Document#read_io() does not leak memory when
290
+ # EncodingFound is raised.
291
+ attr_reader :encoding_found
292
+
293
+ def read(len)
294
+ # no support for a call without len
295
+
296
+ if !@firstchunk
297
+ @firstchunk = @io.read(len) or return nil
298
+
299
+ # This implementation expects that the first call from
300
+ # htmlReadIO() is made with a length long enough (~1KB) to
301
+ # achieve advanced encoding detection.
302
+ if encoding = EncodingReader.detect_encoding(@firstchunk)
303
+ # The first chunk is stored for the next read in retry.
304
+ raise @encoding_found = EncodingFound.new(encoding)
305
+ end
306
+ end
307
+ @encoding_found = nil
308
+
309
+ ret = @firstchunk.slice!(0, len)
310
+ if (len -= ret.length) > 0
311
+ rest = @io.read(len) and ret << rest
312
+ end
313
+ if ret.empty?
314
+ nil
315
+ else
316
+ ret
317
+ end
318
+ end
319
+ end
320
+ end
321
+ end
322
+ end