nokogiri 1.10.3 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (218) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +176 -96
  6. data/dependencies.yml +28 -26
  7. data/ext/nokogiri/depend +38 -358
  8. data/ext/nokogiri/extconf.rb +716 -414
  9. data/ext/nokogiri/gumbo.c +584 -0
  10. data/ext/nokogiri/html4_document.c +166 -0
  11. data/ext/nokogiri/html4_element_description.c +294 -0
  12. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  13. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  14. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  15. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  16. data/ext/nokogiri/nokogiri.c +228 -91
  17. data/ext/nokogiri/nokogiri.h +191 -89
  18. data/ext/nokogiri/test_global_handlers.c +40 -0
  19. data/ext/nokogiri/xml_attr.c +15 -15
  20. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  21. data/ext/nokogiri/xml_cdata.c +13 -18
  22. data/ext/nokogiri/xml_comment.c +19 -26
  23. data/ext/nokogiri/xml_document.c +267 -195
  24. data/ext/nokogiri/xml_document_fragment.c +13 -15
  25. data/ext/nokogiri/xml_dtd.c +54 -48
  26. data/ext/nokogiri/xml_element_content.c +31 -26
  27. data/ext/nokogiri/xml_element_decl.c +22 -22
  28. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  29. data/ext/nokogiri/xml_entity_decl.c +32 -30
  30. data/ext/nokogiri/xml_entity_reference.c +16 -18
  31. data/ext/nokogiri/xml_namespace.c +60 -51
  32. data/ext/nokogiri/xml_node.c +493 -407
  33. data/ext/nokogiri/xml_node_set.c +174 -162
  34. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  35. data/ext/nokogiri/xml_reader.c +197 -172
  36. data/ext/nokogiri/xml_relax_ng.c +52 -28
  37. data/ext/nokogiri/xml_sax_parser.c +112 -112
  38. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  39. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  40. data/ext/nokogiri/xml_schema.c +112 -33
  41. data/ext/nokogiri/xml_syntax_error.c +42 -21
  42. data/ext/nokogiri/xml_text.c +13 -17
  43. data/ext/nokogiri/xml_xpath_context.c +158 -73
  44. data/ext/nokogiri/xslt_stylesheet.c +158 -164
  45. data/gumbo-parser/CHANGES.md +63 -0
  46. data/gumbo-parser/Makefile +101 -0
  47. data/gumbo-parser/THANKS +27 -0
  48. data/gumbo-parser/src/Makefile +34 -0
  49. data/gumbo-parser/src/README.md +41 -0
  50. data/gumbo-parser/src/ascii.c +75 -0
  51. data/gumbo-parser/src/ascii.h +115 -0
  52. data/gumbo-parser/src/attribute.c +42 -0
  53. data/gumbo-parser/src/attribute.h +17 -0
  54. data/gumbo-parser/src/char_ref.c +22225 -0
  55. data/gumbo-parser/src/char_ref.h +29 -0
  56. data/gumbo-parser/src/char_ref.rl +2154 -0
  57. data/gumbo-parser/src/error.c +626 -0
  58. data/gumbo-parser/src/error.h +148 -0
  59. data/gumbo-parser/src/foreign_attrs.c +104 -0
  60. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  61. data/gumbo-parser/src/gumbo.h +943 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/parser.c +4886 -0
  65. data/gumbo-parser/src/parser.h +41 -0
  66. data/gumbo-parser/src/replacement.h +33 -0
  67. data/gumbo-parser/src/string_buffer.c +103 -0
  68. data/gumbo-parser/src/string_buffer.h +68 -0
  69. data/gumbo-parser/src/string_piece.c +48 -0
  70. data/gumbo-parser/src/svg_attrs.c +174 -0
  71. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  72. data/gumbo-parser/src/svg_tags.c +137 -0
  73. data/gumbo-parser/src/svg_tags.gperf +55 -0
  74. data/gumbo-parser/src/tag.c +222 -0
  75. data/gumbo-parser/src/tag_lookup.c +382 -0
  76. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  77. data/gumbo-parser/src/tag_lookup.h +13 -0
  78. data/gumbo-parser/src/token_buffer.c +79 -0
  79. data/gumbo-parser/src/token_buffer.h +71 -0
  80. data/gumbo-parser/src/token_type.h +17 -0
  81. data/gumbo-parser/src/tokenizer.c +3463 -0
  82. data/gumbo-parser/src/tokenizer.h +112 -0
  83. data/gumbo-parser/src/tokenizer_states.h +339 -0
  84. data/gumbo-parser/src/utf8.c +245 -0
  85. data/gumbo-parser/src/utf8.h +164 -0
  86. data/gumbo-parser/src/util.c +68 -0
  87. data/gumbo-parser/src/util.h +30 -0
  88. data/gumbo-parser/src/vector.c +111 -0
  89. data/gumbo-parser/src/vector.h +45 -0
  90. data/lib/nokogiri/css/node.rb +1 -0
  91. data/lib/nokogiri/css/parser.rb +64 -63
  92. data/lib/nokogiri/css/parser.y +3 -3
  93. data/lib/nokogiri/css/parser_extras.rb +39 -36
  94. data/lib/nokogiri/css/syntax_error.rb +2 -1
  95. data/lib/nokogiri/css/tokenizer.rb +105 -103
  96. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  97. data/lib/nokogiri/css.rb +15 -14
  98. data/lib/nokogiri/decorators/slop.rb +1 -0
  99. data/lib/nokogiri/extension.rb +31 -0
  100. data/lib/nokogiri/gumbo.rb +14 -0
  101. data/lib/nokogiri/html.rb +32 -27
  102. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  103. data/lib/nokogiri/{html → html4}/document.rb +17 -30
  104. data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
  105. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  106. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  107. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
  109. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  110. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
  111. data/lib/nokogiri/html4.rb +40 -0
  112. data/lib/nokogiri/html5/document.rb +74 -0
  113. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  114. data/lib/nokogiri/html5/node.rb +93 -0
  115. data/lib/nokogiri/html5.rb +473 -0
  116. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  117. data/lib/nokogiri/syntax_error.rb +1 -0
  118. data/lib/nokogiri/version/constant.rb +5 -0
  119. data/lib/nokogiri/version/info.rb +215 -0
  120. data/lib/nokogiri/version.rb +3 -109
  121. data/lib/nokogiri/xml/attr.rb +1 -0
  122. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  123. data/lib/nokogiri/xml/builder.rb +74 -32
  124. data/lib/nokogiri/xml/cdata.rb +1 -0
  125. data/lib/nokogiri/xml/character_data.rb +1 -0
  126. data/lib/nokogiri/xml/document.rb +138 -41
  127. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  128. data/lib/nokogiri/xml/dtd.rb +1 -0
  129. data/lib/nokogiri/xml/element_content.rb +1 -0
  130. data/lib/nokogiri/xml/element_decl.rb +1 -0
  131. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  132. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  133. data/lib/nokogiri/xml/namespace.rb +1 -0
  134. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  135. data/lib/nokogiri/xml/node.rb +629 -293
  136. data/lib/nokogiri/xml/node_set.rb +1 -0
  137. data/lib/nokogiri/xml/notation.rb +1 -0
  138. data/lib/nokogiri/xml/parse_options.rb +12 -3
  139. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  140. data/lib/nokogiri/xml/pp/node.rb +1 -0
  141. data/lib/nokogiri/xml/pp.rb +3 -2
  142. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  143. data/lib/nokogiri/xml/reader.rb +9 -12
  144. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  145. data/lib/nokogiri/xml/sax/document.rb +25 -30
  146. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  147. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  148. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  149. data/lib/nokogiri/xml/sax.rb +5 -4
  150. data/lib/nokogiri/xml/schema.rb +13 -4
  151. data/lib/nokogiri/xml/searchable.rb +25 -16
  152. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  153. data/lib/nokogiri/xml/text.rb +1 -0
  154. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  155. data/lib/nokogiri/xml/xpath.rb +4 -5
  156. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  157. data/lib/nokogiri/xml.rb +36 -36
  158. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  159. data/lib/nokogiri/xslt.rb +17 -16
  160. data/lib/nokogiri.rb +32 -51
  161. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  162. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  163. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  164. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  165. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  166. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  167. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  168. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  169. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  171. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  172. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  173. metadata +151 -153
  174. data/ext/nokogiri/html_document.c +0 -170
  175. data/ext/nokogiri/html_document.h +0 -10
  176. data/ext/nokogiri/html_element_description.c +0 -279
  177. data/ext/nokogiri/html_element_description.h +0 -10
  178. data/ext/nokogiri/html_entity_lookup.c +0 -32
  179. data/ext/nokogiri/html_entity_lookup.h +0 -8
  180. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  181. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  182. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  183. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_attr.h +0 -9
  185. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  186. data/ext/nokogiri/xml_cdata.h +0 -9
  187. data/ext/nokogiri/xml_comment.h +0 -9
  188. data/ext/nokogiri/xml_document.h +0 -23
  189. data/ext/nokogiri/xml_document_fragment.h +0 -10
  190. data/ext/nokogiri/xml_dtd.h +0 -10
  191. data/ext/nokogiri/xml_element_content.h +0 -10
  192. data/ext/nokogiri/xml_element_decl.h +0 -9
  193. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  194. data/ext/nokogiri/xml_entity_decl.h +0 -10
  195. data/ext/nokogiri/xml_entity_reference.h +0 -9
  196. data/ext/nokogiri/xml_io.c +0 -61
  197. data/ext/nokogiri/xml_io.h +0 -11
  198. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  199. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  200. data/ext/nokogiri/xml_namespace.h +0 -14
  201. data/ext/nokogiri/xml_node.h +0 -13
  202. data/ext/nokogiri/xml_node_set.h +0 -12
  203. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  204. data/ext/nokogiri/xml_reader.h +0 -10
  205. data/ext/nokogiri/xml_relax_ng.h +0 -9
  206. data/ext/nokogiri/xml_sax_parser.h +0 -39
  207. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  208. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  209. data/ext/nokogiri/xml_schema.h +0 -9
  210. data/ext/nokogiri/xml_syntax_error.h +0 -13
  211. data/ext/nokogiri/xml_text.h +0 -9
  212. data/ext/nokogiri/xml_xpath_context.h +0 -10
  213. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  214. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  215. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  216. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
  217. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  218. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,151 +1,153 @@
1
+ # frozen_string_literal: true
1
2
  #--
2
3
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by rex 1.0.5
4
+ # This file is automatically generated by rex 1.0.7
4
5
  # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
6
  #++
6
7
 
7
8
  module Nokogiri
8
9
  module CSS
9
10
  class Tokenizer # :nodoc:
10
- require 'strscan'
11
+ require 'strscan'
11
12
 
12
- class ScanError < StandardError ; end
13
+ class ScanError < StandardError ; end
13
14
 
14
- attr_reader :lineno
15
- attr_reader :filename
16
- attr_accessor :state
15
+ attr_reader :lineno
16
+ attr_reader :filename
17
+ attr_accessor :state
17
18
 
18
- def scan_setup(str)
19
- @ss = StringScanner.new(str)
20
- @lineno = 1
21
- @state = nil
22
- end
19
+ def scan_setup(str)
20
+ @ss = StringScanner.new(str)
21
+ @lineno = 1
22
+ @state = nil
23
+ end
23
24
 
24
- def action
25
- yield
26
- end
25
+ def action
26
+ yield
27
+ end
27
28
 
28
- def scan_str(str)
29
- scan_setup(str)
30
- do_parse
31
- end
32
- alias :scan :scan_str
29
+ def scan_str(str)
30
+ scan_setup(str)
31
+ do_parse
32
+ end
33
+ alias :scan :scan_str
33
34
 
34
- def load_file( filename )
35
- @filename = filename
36
- open(filename, "r") do |f|
37
- scan_setup(f.read)
38
- end
39
- end
35
+ def load_file( filename )
36
+ @filename = filename
37
+ File.open(filename, "r") do |f|
38
+ scan_setup(f.read)
39
+ end
40
+ end
40
41
 
41
- def scan_file( filename )
42
- load_file(filename)
43
- do_parse
44
- end
42
+ def scan_file( filename )
43
+ load_file(filename)
44
+ do_parse
45
+ end
45
46
 
46
47
 
47
- def next_token
48
- return if @ss.eos?
49
-
50
- # skips empty actions
51
- until token = _next_token or @ss.eos?; end
52
- token
53
- end
48
+ def next_token
49
+ return if @ss.eos?
54
50
 
55
- def _next_token
56
- text = @ss.peek(1)
57
- @lineno += 1 if text == "\n"
58
- token = case @state
59
- when nil
60
- case
61
- when (text = @ss.scan(/has\([\s]*/))
62
- action { [:HAS, text] }
51
+ # skips empty actions
52
+ until token = _next_token or @ss.eos?; end
53
+ token
54
+ end
63
55
 
64
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
- action { [:FUNCTION, text] }
56
+ def _next_token
57
+ text = @ss.peek(1)
58
+ @lineno += 1 if text == "\n"
59
+ token = case @state
60
+ when nil
61
+ case
62
+ when (text = @ss.scan(/has\([\s]*/))
63
+ action { [:HAS, text] }
66
64
 
67
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
- action { [:IDENT, text] }
65
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ action { [:FUNCTION, text] }
69
67
 
70
- when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
71
- action { [:HASH, text] }
68
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ action { [:IDENT, text] }
72
70
 
73
- when (text = @ss.scan(/[\s]*~=[\s]*/))
74
- action { [:INCLUDES, text] }
71
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
72
+ action { [:HASH, text] }
75
73
 
76
- when (text = @ss.scan(/[\s]*\|=[\s]*/))
77
- action { [:DASHMATCH, text] }
74
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
75
+ action { [:INCLUDES, text] }
78
76
 
79
- when (text = @ss.scan(/[\s]*\^=[\s]*/))
80
- action { [:PREFIXMATCH, text] }
77
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
78
+ action { [:DASHMATCH, text] }
81
79
 
82
- when (text = @ss.scan(/[\s]*\$=[\s]*/))
83
- action { [:SUFFIXMATCH, text] }
80
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
81
+ action { [:PREFIXMATCH, text] }
84
82
 
85
- when (text = @ss.scan(/[\s]*\*=[\s]*/))
86
- action { [:SUBSTRINGMATCH, text] }
83
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
84
+ action { [:SUFFIXMATCH, text] }
87
85
 
88
- when (text = @ss.scan(/[\s]*!=[\s]*/))
89
- action { [:NOT_EQUAL, text] }
86
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
87
+ action { [:SUBSTRINGMATCH, text] }
90
88
 
91
- when (text = @ss.scan(/[\s]*=[\s]*/))
92
- action { [:EQUAL, text] }
89
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
90
+ action { [:NOT_EQUAL, text] }
93
91
 
94
- when (text = @ss.scan(/[\s]*\)/))
95
- action { [:RPAREN, text] }
92
+ when (text = @ss.scan(/[\s]*=[\s]*/))
93
+ action { [:EQUAL, text] }
96
94
 
97
- when (text = @ss.scan(/\[[\s]*/))
98
- action { [:LSQUARE, text] }
95
+ when (text = @ss.scan(/[\s]*\)/))
96
+ action { [:RPAREN, text] }
99
97
 
100
- when (text = @ss.scan(/[\s]*\]/))
101
- action { [:RSQUARE, text] }
98
+ when (text = @ss.scan(/\[[\s]*/))
99
+ action { [:LSQUARE, text] }
102
100
 
103
- when (text = @ss.scan(/[\s]*\+[\s]*/))
104
- action { [:PLUS, text] }
101
+ when (text = @ss.scan(/[\s]*\]/))
102
+ action { [:RSQUARE, text] }
105
103
 
106
- when (text = @ss.scan(/[\s]*>[\s]*/))
107
- action { [:GREATER, text] }
104
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
105
+ action { [:PLUS, text] }
108
106
 
109
- when (text = @ss.scan(/[\s]*,[\s]*/))
110
- action { [:COMMA, text] }
107
+ when (text = @ss.scan(/[\s]*>[\s]*/))
108
+ action { [:GREATER, text] }
111
109
 
112
- when (text = @ss.scan(/[\s]*~[\s]*/))
113
- action { [:TILDE, text] }
110
+ when (text = @ss.scan(/[\s]*,[\s]*/))
111
+ action { [:COMMA, text] }
114
112
 
115
- when (text = @ss.scan(/\:not\([\s]*/))
116
- action { [:NOT, text] }
113
+ when (text = @ss.scan(/[\s]*~[\s]*/))
114
+ action { [:TILDE, text] }
117
115
 
118
- when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
119
- action { [:NUMBER, text] }
116
+ when (text = @ss.scan(/\:not\([\s]*/))
117
+ action { [:NOT, text] }
120
118
 
121
- when (text = @ss.scan(/[\s]*\/\/[\s]*/))
122
- action { [:DOUBLESLASH, text] }
119
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
120
+ action { [:NUMBER, text] }
123
121
 
124
- when (text = @ss.scan(/[\s]*\/[\s]*/))
125
- action { [:SLASH, text] }
122
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
123
+ action { [:DOUBLESLASH, text] }
126
124
 
127
- when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
128
- action {[:UNICODE_RANGE, text] }
125
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
126
+ action { [:SLASH, text] }
129
127
 
130
- when (text = @ss.scan(/[\s]+/))
131
- action { [:S, text] }
128
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
129
+ action {[:UNICODE_RANGE, text] }
132
130
 
133
- when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
134
- action { [:STRING, text] }
131
+ when (text = @ss.scan(/[\s]+/))
132
+ action { [:S, text] }
135
133
 
136
- when (text = @ss.scan(/./))
137
- action { [text, text] }
134
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
135
+ action { [:STRING, text] }
138
136
 
139
- else
140
- text = @ss.string[@ss.pos .. -1]
141
- raise ScanError, "can not match: '" + text + "'"
142
- end # if
137
+ when (text = @ss.scan(/./))
138
+ action { [text, text] }
143
139
 
144
- else
145
- raise ScanError, "undefined state: '" + state.to_s + "'"
146
- end # case state
147
- token
148
- end # def _next_token
140
+
141
+ else
142
+ text = @ss.string[@ss.pos .. -1]
143
+ raise ScanError, "can not match: '" + text + "'"
144
+ end # if
145
+
146
+ else
147
+ raise ScanError, "undefined state: '" + state.to_s + "'"
148
+ end # case state
149
+ token
150
+ end # def _next_token
149
151
 
150
152
  end # class
151
153
  end
@@ -1,8 +1,8 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module CSS
3
4
  class XPathVisitor # :nodoc:
4
5
  def visit_function node
5
-
6
6
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
7
  return self.send(msg, node) if self.respond_to?(msg)
8
8
 
@@ -12,49 +12,51 @@ module Nokogiri
12
12
  when /^self\(/
13
13
  "self::#{node.value[1]}"
14
14
  when /^eq\(/
15
- "position() = #{node.value[1]}"
15
+ "position()=#{node.value[1]}"
16
16
  when /^(nth|nth-of-type)\(/
17
17
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
18
  nth(node.value[1])
19
19
  else
20
- "position() = #{node.value[1]}"
20
+ "position()=#{node.value[1]}"
21
21
  end
22
22
  when /^nth-child\(/
23
23
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
24
  nth(node.value[1], :child => true)
25
25
  else
26
- "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
26
+ "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
27
27
  end
28
28
  when /^nth-last-of-type\(/
29
29
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
30
  nth(node.value[1], :last => true)
31
31
  else
32
32
  index = node.value[1].to_i - 1
33
- index == 0 ? "position() = last()" : "position() = last() - #{index}"
33
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
34
  end
35
35
  when /^nth-last-child\(/
36
36
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
37
  nth(node.value[1], :last => true, :child => true)
38
38
  else
39
- "count(following-sibling::*) = #{node.value[1].to_i-1}"
39
+ "count(following-sibling::*)=#{node.value[1].to_i-1}"
40
40
  end
41
41
  when /^(first|first-of-type)\(/
42
- "position() = 1"
42
+ "position()=1"
43
43
  when /^(last|last-of-type)\(/
44
- "position() = last()"
44
+ "position()=last()"
45
45
  when /^contains\(/
46
- "contains(., #{node.value[1]})"
46
+ "contains(.,#{node.value[1]})"
47
47
  when /^gt\(/
48
- "position() > #{node.value[1]}"
48
+ "position()>#{node.value[1]}"
49
49
  when /^only-child\(/
50
- "last() = 1"
50
+ "last()=1"
51
51
  when /^comment\(/
52
52
  "comment()"
53
53
  when /^has\(/
54
- ".//#{node.value[1].accept(self)}"
54
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
55
+ ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
55
56
  else
57
+ # non-standard. this looks like a function call.
56
58
  args = ['.'] + node.value[1..-1]
57
- "#{node.value.first}#{args.join(', ')})"
59
+ "#{node.value.first}#{args.join(',')})"
58
60
  end
59
61
  end
60
62
 
@@ -69,18 +71,18 @@ module Nokogiri
69
71
 
70
72
  def visit_id node
71
73
  node.value.first =~ /^#(.*)$/
72
- "@id = '#{$1}'"
74
+ "@id='#{$1}'"
73
75
  end
74
76
 
75
77
  def visit_attribute_condition node
76
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
77
- ''
78
- else
79
- '@'
80
- end
78
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
+ ''
80
+ else
81
+ '@'
82
+ end
81
83
  attribute += node.value.first.accept(self)
82
84
 
83
- # Support non-standard css
85
+ # non-standard. attributes starting with '@'
84
86
  attribute.gsub!(/^@@/, '@')
85
87
 
86
88
  return attribute unless node.value.length == 3
@@ -88,29 +90,30 @@ module Nokogiri
88
90
  value = node.value.last
89
91
  value = "'#{value}'" if value !~ /^['"]/
90
92
 
93
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
91
94
  if (value[0]==value[-1]) && %q{"'}.include?(value[0])
92
95
  str_value = value[1..-2]
93
96
  if str_value.include?(value[0])
94
- value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
97
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
95
98
  end
96
99
  end
97
100
 
98
101
  case node.value[1]
99
102
  when :equal
100
- attribute + " = " + "#{value}"
103
+ attribute + "=" + "#{value}"
101
104
  when :not_equal
102
- attribute + " != " + "#{value}"
105
+ attribute + "!=" + "#{value}"
103
106
  when :substring_match
104
- "contains(#{attribute}, #{value})"
107
+ "contains(#{attribute},#{value})"
105
108
  when :prefix_match
106
- "starts-with(#{attribute}, #{value})"
109
+ "starts-with(#{attribute},#{value})"
107
110
  when :dash_match
108
- "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
111
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
109
112
  when :includes
110
- "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
113
+ value = value[1..-2] # strip quotes
114
+ css_class(attribute, value)
111
115
  when :suffix_match
112
- "substring(#{attribute}, string-length(#{attribute}) - " +
113
- "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
116
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
114
117
  else
115
118
  attribute + " #{node.value[1]} " + "#{value}"
116
119
  end
@@ -124,14 +127,14 @@ module Nokogiri
124
127
  return self.send(msg, node) if self.respond_to?(msg)
125
128
 
126
129
  case node.value.first
127
- when "first" then "position() = 1"
128
- when "first-child" then "count(preceding-sibling::*) = 0"
129
- when "last" then "position() = last()"
130
- when "last-child" then "count(following-sibling::*) = 0"
131
- when "first-of-type" then "position() = 1"
132
- when "last-of-type" then "position() = last()"
133
- when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
134
- when "only-of-type" then "last() = 1"
130
+ when "first" then "position()=1"
131
+ when "first-child" then "count(preceding-sibling::*)=0"
132
+ when "last" then "position()=last()"
133
+ when "last-child" then "count(following-sibling::*)=0"
134
+ when "first-of-type" then "position()=1"
135
+ when "last-of-type" then "position()=last()"
136
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
137
+ when "only-of-type" then "last()=1"
135
138
  when "empty" then "not(node())"
136
139
  when "parent" then "node()"
137
140
  when "root" then "not(parent::*)"
@@ -142,7 +145,7 @@ module Nokogiri
142
145
  end
143
146
 
144
147
  def visit_class_condition node
145
- "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
148
+ css_class("@class", node.value.first)
146
149
  end
147
150
 
148
151
  def visit_combinator node
@@ -179,25 +182,26 @@ module Nokogiri
179
182
  node.accept(self)
180
183
  end
181
184
 
182
- private
185
+ private
186
+
183
187
  def nth node, options={}
184
188
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
185
189
 
186
190
  a, b = read_a_and_positive_b node.value
187
191
  position = if options[:child]
188
- options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
192
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
189
193
  else
190
194
  options[:last] ? "(last()-position()+1)" : "position()"
191
195
  end
192
196
 
193
197
  if b.zero?
194
- "(#{position} mod #{a}) = 0"
198
+ "(#{position} mod #{a})=0"
195
199
  else
196
200
  compare = a < 0 ? "<=" : ">="
197
201
  if a.abs == 1
198
- "#{position} #{compare} #{b}"
202
+ "#{position}#{compare}#{b}"
199
203
  else
200
- "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
204
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
201
205
  end
202
206
  end
203
207
  end
@@ -225,6 +229,32 @@ module Nokogiri
225
229
  end =~ /(nth|first|last|only)-of-type(\()?/
226
230
  end
227
231
  end
232
+
233
+ # use only ordinary xpath functions
234
+ def css_class_standard(hay, needle)
235
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
+ end
237
+
238
+ # use the builtin implementation
239
+ def css_class_builtin(hay, needle)
240
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
241
+ end
242
+
243
+ alias_method :css_class, :css_class_standard
244
+ end
245
+
246
+ class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
+ private
248
+ alias_method :css_class, :css_class_builtin
249
+ end
250
+
251
+ class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
+ private
253
+ if Nokogiri.uses_libxml?
254
+ alias_method :css_class, :css_class_builtin
255
+ else
256
+ alias_method :css_class, :css_class_standard
257
+ end
228
258
  end
229
259
  end
230
260
  end
data/lib/nokogiri/css.rb CHANGED
@@ -1,27 +1,28 @@
1
- require 'nokogiri/css/node'
2
- require 'nokogiri/css/xpath_visitor'
3
- x = $-w
4
- $-w = false
5
- require 'nokogiri/css/parser'
6
- $-w = x
7
-
8
- require 'nokogiri/css/tokenizer'
9
- require 'nokogiri/css/syntax_error'
10
-
1
+ # frozen_string_literal: true
11
2
  module Nokogiri
12
3
  module CSS
13
4
  class << self
14
5
  ###
15
6
  # Parse this CSS selector in +selector+. Returns an AST.
16
- def parse selector
17
- Parser.new.parse selector
7
+ def parse(selector)
8
+ Parser.new.parse(selector)
18
9
  end
19
10
 
20
11
  ###
21
12
  # Get the XPath for +selector+.
22
- def xpath_for selector, options={}
23
- Parser.new(options[:ns] || {}).xpath_for selector, options
13
+ def xpath_for(selector, options = {})
14
+ Parser.new(options[:ns] || {}).xpath_for(selector, options)
24
15
  end
25
16
  end
26
17
  end
27
18
  end
19
+
20
+ require_relative "css/node"
21
+ require_relative "css/xpath_visitor"
22
+ x = $-w
23
+ $-w = false
24
+ require_relative "css/parser"
25
+ $-w = x
26
+
27
+ require_relative "css/tokenizer"
28
+ require_relative "css/syntax_error"
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module Decorators
3
4
  ###
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load the C or Java extension
4
+ begin
5
+ # native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
6
+ ::RUBY_VERSION =~ /(\d+\.\d+)/
7
+ require_relative "#{Regexp.last_match(1)}/nokogiri"
8
+ rescue LoadError => e
9
+ if e.message =~ /GLIBC/
10
+ warn(<<~EOM)
11
+
12
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
13
+
14
+ #{e.message}
15
+
16
+ If that's the case, then please install Nokogiri via the `ruby` platform gem:
17
+ gem install nokogiri --platform=ruby
18
+ or:
19
+ bundle config set force_ruby_platform true
20
+
21
+ Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
22
+
23
+ EOM
24
+ raise e
25
+ end
26
+
27
+ # use "require" instead of "require_relative" because non-native gems will place C extension files
28
+ # in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
29
+ # is in $LOAD_PATH but not necessarily relative to this file (see #2300)
30
+ require "nokogiri/nokogiri"
31
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module Gumbo
4
+ # The default maximum number of attributes per element.
5
+ DEFAULT_MAX_ATTRIBUTES = 400
6
+
7
+ # The default maximum number of errors for parsing a document or a fragment.
8
+ DEFAULT_MAX_ERRORS = 0
9
+
10
+ # The default maximum depth of the DOM tree produced by parsing a document
11
+ # or fragment.
12
+ DEFAULT_MAX_TREE_DEPTH = 400
13
+ end
14
+ end
data/lib/nokogiri/html.rb CHANGED
@@ -1,37 +1,42 @@
1
- require 'nokogiri/html/entity_lookup'
2
- require 'nokogiri/html/document'
3
- require 'nokogiri/html/document_fragment'
4
- require 'nokogiri/html/sax/parser_context'
5
- require 'nokogiri/html/sax/parser'
6
- require 'nokogiri/html/sax/push_parser'
7
- require 'nokogiri/html/element_description'
8
- require 'nokogiri/html/element_description_defaults'
1
+ # frozen_string_literal: true
2
+ require_relative "html4"
9
3
 
10
4
  module Nokogiri
11
- class << self
12
- ###
13
- # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
14
- def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
15
- Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
16
- end
17
- end
5
+ HTML = Nokogiri::HTML4
6
+
7
+ # @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
8
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
9
+ # @!scope class
10
+ define_singleton_method(:HTML, Nokogiri.method(:HTML4))
18
11
 
12
+ # @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
13
+ # {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
14
+ # classes.
19
15
  module HTML
20
- class << self
21
- ###
22
- # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
23
- def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
24
- Document.parse(thing, url, encoding, options, &block)
16
+ # @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
17
+ class Document < Nokogiri::XML::Document
18
+ end
19
+
20
+ # @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
21
+ class DocumentFragment < Nokogiri::XML::DocumentFragment
22
+ end
23
+
24
+ # @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
25
+ class Builder < Nokogiri::XML::Builder
26
+ end
27
+
28
+ module SAX
29
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
30
+ class Parser < Nokogiri::XML::SAX::Parser
25
31
  end
26
32
 
27
- ####
28
- # Parse a fragment from +string+ in to a NodeSet.
29
- def fragment string, encoding = nil
30
- HTML::DocumentFragment.parse string, encoding
33
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
34
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
31
35
  end
32
- end
33
36
 
34
- # Instance of Nokogiri::HTML::EntityLookup
35
- NamedCharacters = EntityLookup.new
37
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
38
+ class PushParser
39
+ end
40
+ end
36
41
  end
37
42
  end