nokogiri 1.10.3 → 1.11.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +173 -94
  6. data/dependencies.yml +28 -26
  7. data/ext/nokogiri/depend +37 -358
  8. data/ext/nokogiri/extconf.rb +611 -391
  9. data/ext/nokogiri/html_document.c +78 -82
  10. data/ext/nokogiri/html_element_description.c +84 -71
  11. data/ext/nokogiri/html_entity_lookup.c +21 -16
  12. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  13. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  14. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  15. data/ext/nokogiri/nokogiri.c +192 -87
  16. data/ext/nokogiri/nokogiri.h +181 -89
  17. data/ext/nokogiri/test_global_handlers.c +40 -0
  18. data/ext/nokogiri/xml_attr.c +15 -15
  19. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  20. data/ext/nokogiri/xml_cdata.c +13 -18
  21. data/ext/nokogiri/xml_comment.c +19 -26
  22. data/ext/nokogiri/xml_document.c +255 -183
  23. data/ext/nokogiri/xml_document_fragment.c +13 -15
  24. data/ext/nokogiri/xml_dtd.c +54 -48
  25. data/ext/nokogiri/xml_element_content.c +30 -27
  26. data/ext/nokogiri/xml_element_decl.c +22 -22
  27. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  28. data/ext/nokogiri/xml_entity_decl.c +32 -30
  29. data/ext/nokogiri/xml_entity_reference.c +16 -18
  30. data/ext/nokogiri/xml_namespace.c +56 -49
  31. data/ext/nokogiri/xml_node.c +387 -316
  32. data/ext/nokogiri/xml_node_set.c +168 -156
  33. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  34. data/ext/nokogiri/xml_reader.c +195 -172
  35. data/ext/nokogiri/xml_relax_ng.c +52 -28
  36. data/ext/nokogiri/xml_sax_parser.c +118 -118
  37. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  38. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  39. data/ext/nokogiri/xml_schema.c +111 -34
  40. data/ext/nokogiri/xml_syntax_error.c +42 -21
  41. data/ext/nokogiri/xml_text.c +13 -17
  42. data/ext/nokogiri/xml_xpath_context.c +206 -123
  43. data/ext/nokogiri/xslt_stylesheet.c +158 -165
  44. data/lib/nokogiri/css/node.rb +1 -0
  45. data/lib/nokogiri/css/parser.rb +63 -62
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +39 -36
  48. data/lib/nokogiri/css/syntax_error.rb +1 -0
  49. data/lib/nokogiri/css/tokenizer.rb +105 -103
  50. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  51. data/lib/nokogiri/css.rb +1 -0
  52. data/lib/nokogiri/decorators/slop.rb +1 -0
  53. data/lib/nokogiri/extension.rb +26 -0
  54. data/lib/nokogiri/html/builder.rb +1 -0
  55. data/lib/nokogiri/html/document.rb +13 -26
  56. data/lib/nokogiri/html/document_fragment.rb +16 -15
  57. data/lib/nokogiri/html/element_description.rb +1 -0
  58. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  59. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  60. data/lib/nokogiri/html/sax/parser.rb +1 -0
  61. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  62. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  63. data/lib/nokogiri/html.rb +1 -0
  64. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  65. data/lib/nokogiri/syntax_error.rb +1 -0
  66. data/lib/nokogiri/version/constant.rb +5 -0
  67. data/lib/nokogiri/version/info.rb +205 -0
  68. data/lib/nokogiri/version.rb +3 -109
  69. data/lib/nokogiri/xml/attr.rb +1 -0
  70. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  71. data/lib/nokogiri/xml/builder.rb +36 -32
  72. data/lib/nokogiri/xml/cdata.rb +1 -0
  73. data/lib/nokogiri/xml/character_data.rb +1 -0
  74. data/lib/nokogiri/xml/document.rb +92 -41
  75. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  76. data/lib/nokogiri/xml/dtd.rb +1 -0
  77. data/lib/nokogiri/xml/element_content.rb +1 -0
  78. data/lib/nokogiri/xml/element_decl.rb +1 -0
  79. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  80. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  81. data/lib/nokogiri/xml/namespace.rb +1 -0
  82. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  83. data/lib/nokogiri/xml/node.rb +625 -290
  84. data/lib/nokogiri/xml/node_set.rb +1 -0
  85. data/lib/nokogiri/xml/notation.rb +1 -0
  86. data/lib/nokogiri/xml/parse_options.rb +10 -3
  87. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  88. data/lib/nokogiri/xml/pp/node.rb +1 -0
  89. data/lib/nokogiri/xml/pp.rb +1 -0
  90. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  91. data/lib/nokogiri/xml/reader.rb +9 -12
  92. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  93. data/lib/nokogiri/xml/sax/document.rb +1 -0
  94. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  95. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  96. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  97. data/lib/nokogiri/xml/sax.rb +1 -0
  98. data/lib/nokogiri/xml/schema.rb +13 -4
  99. data/lib/nokogiri/xml/searchable.rb +25 -16
  100. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  101. data/lib/nokogiri/xml/text.rb +1 -0
  102. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  103. data/lib/nokogiri/xml/xpath.rb +2 -3
  104. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  105. data/lib/nokogiri/xml.rb +1 -0
  106. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  107. data/lib/nokogiri/xslt.rb +1 -0
  108. data/lib/nokogiri.rb +6 -27
  109. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  110. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  111. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  112. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  113. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  114. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  115. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  116. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  117. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  118. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  119. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  120. metadata +102 -147
  121. data/ext/nokogiri/html_document.h +0 -10
  122. data/ext/nokogiri/html_element_description.h +0 -10
  123. data/ext/nokogiri/html_entity_lookup.h +0 -8
  124. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  125. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  126. data/ext/nokogiri/xml_attr.h +0 -9
  127. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  128. data/ext/nokogiri/xml_cdata.h +0 -9
  129. data/ext/nokogiri/xml_comment.h +0 -9
  130. data/ext/nokogiri/xml_document.h +0 -23
  131. data/ext/nokogiri/xml_document_fragment.h +0 -10
  132. data/ext/nokogiri/xml_dtd.h +0 -10
  133. data/ext/nokogiri/xml_element_content.h +0 -10
  134. data/ext/nokogiri/xml_element_decl.h +0 -9
  135. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  136. data/ext/nokogiri/xml_entity_decl.h +0 -10
  137. data/ext/nokogiri/xml_entity_reference.h +0 -9
  138. data/ext/nokogiri/xml_io.c +0 -61
  139. data/ext/nokogiri/xml_io.h +0 -11
  140. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  141. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  142. data/ext/nokogiri/xml_namespace.h +0 -14
  143. data/ext/nokogiri/xml_node.h +0 -13
  144. data/ext/nokogiri/xml_node_set.h +0 -12
  145. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  146. data/ext/nokogiri/xml_reader.h +0 -10
  147. data/ext/nokogiri/xml_relax_ng.h +0 -9
  148. data/ext/nokogiri/xml_sax_parser.h +0 -39
  149. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  150. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  151. data/ext/nokogiri/xml_schema.h +0 -9
  152. data/ext/nokogiri/xml_syntax_error.h +0 -13
  153. data/ext/nokogiri/xml_text.h +0 -9
  154. data/ext/nokogiri/xml_xpath_context.h +0 -10
  155. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  156. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  157. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
  158. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  159. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,151 +1,153 @@
1
+ # frozen_string_literal: true
1
2
  #--
2
3
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by rex 1.0.5
4
+ # This file is automatically generated by rex 1.0.7
4
5
  # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
6
  #++
6
7
 
7
8
  module Nokogiri
8
9
  module CSS
9
10
  class Tokenizer # :nodoc:
10
- require 'strscan'
11
+ require 'strscan'
11
12
 
12
- class ScanError < StandardError ; end
13
+ class ScanError < StandardError ; end
13
14
 
14
- attr_reader :lineno
15
- attr_reader :filename
16
- attr_accessor :state
15
+ attr_reader :lineno
16
+ attr_reader :filename
17
+ attr_accessor :state
17
18
 
18
- def scan_setup(str)
19
- @ss = StringScanner.new(str)
20
- @lineno = 1
21
- @state = nil
22
- end
19
+ def scan_setup(str)
20
+ @ss = StringScanner.new(str)
21
+ @lineno = 1
22
+ @state = nil
23
+ end
23
24
 
24
- def action
25
- yield
26
- end
25
+ def action
26
+ yield
27
+ end
27
28
 
28
- def scan_str(str)
29
- scan_setup(str)
30
- do_parse
31
- end
32
- alias :scan :scan_str
29
+ def scan_str(str)
30
+ scan_setup(str)
31
+ do_parse
32
+ end
33
+ alias :scan :scan_str
33
34
 
34
- def load_file( filename )
35
- @filename = filename
36
- open(filename, "r") do |f|
37
- scan_setup(f.read)
38
- end
39
- end
35
+ def load_file( filename )
36
+ @filename = filename
37
+ File.open(filename, "r") do |f|
38
+ scan_setup(f.read)
39
+ end
40
+ end
40
41
 
41
- def scan_file( filename )
42
- load_file(filename)
43
- do_parse
44
- end
42
+ def scan_file( filename )
43
+ load_file(filename)
44
+ do_parse
45
+ end
45
46
 
46
47
 
47
- def next_token
48
- return if @ss.eos?
49
-
50
- # skips empty actions
51
- until token = _next_token or @ss.eos?; end
52
- token
53
- end
48
+ def next_token
49
+ return if @ss.eos?
54
50
 
55
- def _next_token
56
- text = @ss.peek(1)
57
- @lineno += 1 if text == "\n"
58
- token = case @state
59
- when nil
60
- case
61
- when (text = @ss.scan(/has\([\s]*/))
62
- action { [:HAS, text] }
51
+ # skips empty actions
52
+ until token = _next_token or @ss.eos?; end
53
+ token
54
+ end
63
55
 
64
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
- action { [:FUNCTION, text] }
56
+ def _next_token
57
+ text = @ss.peek(1)
58
+ @lineno += 1 if text == "\n"
59
+ token = case @state
60
+ when nil
61
+ case
62
+ when (text = @ss.scan(/has\([\s]*/))
63
+ action { [:HAS, text] }
66
64
 
67
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
- action { [:IDENT, text] }
65
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ action { [:FUNCTION, text] }
69
67
 
70
- when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
71
- action { [:HASH, text] }
68
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ action { [:IDENT, text] }
72
70
 
73
- when (text = @ss.scan(/[\s]*~=[\s]*/))
74
- action { [:INCLUDES, text] }
71
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
72
+ action { [:HASH, text] }
75
73
 
76
- when (text = @ss.scan(/[\s]*\|=[\s]*/))
77
- action { [:DASHMATCH, text] }
74
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
75
+ action { [:INCLUDES, text] }
78
76
 
79
- when (text = @ss.scan(/[\s]*\^=[\s]*/))
80
- action { [:PREFIXMATCH, text] }
77
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
78
+ action { [:DASHMATCH, text] }
81
79
 
82
- when (text = @ss.scan(/[\s]*\$=[\s]*/))
83
- action { [:SUFFIXMATCH, text] }
80
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
81
+ action { [:PREFIXMATCH, text] }
84
82
 
85
- when (text = @ss.scan(/[\s]*\*=[\s]*/))
86
- action { [:SUBSTRINGMATCH, text] }
83
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
84
+ action { [:SUFFIXMATCH, text] }
87
85
 
88
- when (text = @ss.scan(/[\s]*!=[\s]*/))
89
- action { [:NOT_EQUAL, text] }
86
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
87
+ action { [:SUBSTRINGMATCH, text] }
90
88
 
91
- when (text = @ss.scan(/[\s]*=[\s]*/))
92
- action { [:EQUAL, text] }
89
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
90
+ action { [:NOT_EQUAL, text] }
93
91
 
94
- when (text = @ss.scan(/[\s]*\)/))
95
- action { [:RPAREN, text] }
92
+ when (text = @ss.scan(/[\s]*=[\s]*/))
93
+ action { [:EQUAL, text] }
96
94
 
97
- when (text = @ss.scan(/\[[\s]*/))
98
- action { [:LSQUARE, text] }
95
+ when (text = @ss.scan(/[\s]*\)/))
96
+ action { [:RPAREN, text] }
99
97
 
100
- when (text = @ss.scan(/[\s]*\]/))
101
- action { [:RSQUARE, text] }
98
+ when (text = @ss.scan(/\[[\s]*/))
99
+ action { [:LSQUARE, text] }
102
100
 
103
- when (text = @ss.scan(/[\s]*\+[\s]*/))
104
- action { [:PLUS, text] }
101
+ when (text = @ss.scan(/[\s]*\]/))
102
+ action { [:RSQUARE, text] }
105
103
 
106
- when (text = @ss.scan(/[\s]*>[\s]*/))
107
- action { [:GREATER, text] }
104
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
105
+ action { [:PLUS, text] }
108
106
 
109
- when (text = @ss.scan(/[\s]*,[\s]*/))
110
- action { [:COMMA, text] }
107
+ when (text = @ss.scan(/[\s]*>[\s]*/))
108
+ action { [:GREATER, text] }
111
109
 
112
- when (text = @ss.scan(/[\s]*~[\s]*/))
113
- action { [:TILDE, text] }
110
+ when (text = @ss.scan(/[\s]*,[\s]*/))
111
+ action { [:COMMA, text] }
114
112
 
115
- when (text = @ss.scan(/\:not\([\s]*/))
116
- action { [:NOT, text] }
113
+ when (text = @ss.scan(/[\s]*~[\s]*/))
114
+ action { [:TILDE, text] }
117
115
 
118
- when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
119
- action { [:NUMBER, text] }
116
+ when (text = @ss.scan(/\:not\([\s]*/))
117
+ action { [:NOT, text] }
120
118
 
121
- when (text = @ss.scan(/[\s]*\/\/[\s]*/))
122
- action { [:DOUBLESLASH, text] }
119
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
120
+ action { [:NUMBER, text] }
123
121
 
124
- when (text = @ss.scan(/[\s]*\/[\s]*/))
125
- action { [:SLASH, text] }
122
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
123
+ action { [:DOUBLESLASH, text] }
126
124
 
127
- when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
128
- action {[:UNICODE_RANGE, text] }
125
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
126
+ action { [:SLASH, text] }
129
127
 
130
- when (text = @ss.scan(/[\s]+/))
131
- action { [:S, text] }
128
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
129
+ action {[:UNICODE_RANGE, text] }
132
130
 
133
- when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
134
- action { [:STRING, text] }
131
+ when (text = @ss.scan(/[\s]+/))
132
+ action { [:S, text] }
135
133
 
136
- when (text = @ss.scan(/./))
137
- action { [text, text] }
134
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
135
+ action { [:STRING, text] }
138
136
 
139
- else
140
- text = @ss.string[@ss.pos .. -1]
141
- raise ScanError, "can not match: '" + text + "'"
142
- end # if
137
+ when (text = @ss.scan(/./))
138
+ action { [text, text] }
143
139
 
144
- else
145
- raise ScanError, "undefined state: '" + state.to_s + "'"
146
- end # case state
147
- token
148
- end # def _next_token
140
+
141
+ else
142
+ text = @ss.string[@ss.pos .. -1]
143
+ raise ScanError, "can not match: '" + text + "'"
144
+ end # if
145
+
146
+ else
147
+ raise ScanError, "undefined state: '" + state.to_s + "'"
148
+ end # case state
149
+ token
150
+ end # def _next_token
149
151
 
150
152
  end # class
151
153
  end
@@ -1,8 +1,8 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module CSS
3
4
  class XPathVisitor # :nodoc:
4
5
  def visit_function node
5
-
6
6
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
7
  return self.send(msg, node) if self.respond_to?(msg)
8
8
 
@@ -12,49 +12,51 @@ module Nokogiri
12
12
  when /^self\(/
13
13
  "self::#{node.value[1]}"
14
14
  when /^eq\(/
15
- "position() = #{node.value[1]}"
15
+ "position()=#{node.value[1]}"
16
16
  when /^(nth|nth-of-type)\(/
17
17
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
18
  nth(node.value[1])
19
19
  else
20
- "position() = #{node.value[1]}"
20
+ "position()=#{node.value[1]}"
21
21
  end
22
22
  when /^nth-child\(/
23
23
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
24
  nth(node.value[1], :child => true)
25
25
  else
26
- "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
26
+ "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
27
27
  end
28
28
  when /^nth-last-of-type\(/
29
29
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
30
  nth(node.value[1], :last => true)
31
31
  else
32
32
  index = node.value[1].to_i - 1
33
- index == 0 ? "position() = last()" : "position() = last() - #{index}"
33
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
34
  end
35
35
  when /^nth-last-child\(/
36
36
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
37
  nth(node.value[1], :last => true, :child => true)
38
38
  else
39
- "count(following-sibling::*) = #{node.value[1].to_i-1}"
39
+ "count(following-sibling::*)=#{node.value[1].to_i-1}"
40
40
  end
41
41
  when /^(first|first-of-type)\(/
42
- "position() = 1"
42
+ "position()=1"
43
43
  when /^(last|last-of-type)\(/
44
- "position() = last()"
44
+ "position()=last()"
45
45
  when /^contains\(/
46
- "contains(., #{node.value[1]})"
46
+ "contains(.,#{node.value[1]})"
47
47
  when /^gt\(/
48
- "position() > #{node.value[1]}"
48
+ "position()>#{node.value[1]}"
49
49
  when /^only-child\(/
50
- "last() = 1"
50
+ "last()=1"
51
51
  when /^comment\(/
52
52
  "comment()"
53
53
  when /^has\(/
54
- ".//#{node.value[1].accept(self)}"
54
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
55
+ ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
55
56
  else
57
+ # non-standard. this looks like a function call.
56
58
  args = ['.'] + node.value[1..-1]
57
- "#{node.value.first}#{args.join(', ')})"
59
+ "#{node.value.first}#{args.join(',')})"
58
60
  end
59
61
  end
60
62
 
@@ -69,18 +71,18 @@ module Nokogiri
69
71
 
70
72
  def visit_id node
71
73
  node.value.first =~ /^#(.*)$/
72
- "@id = '#{$1}'"
74
+ "@id='#{$1}'"
73
75
  end
74
76
 
75
77
  def visit_attribute_condition node
76
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
77
- ''
78
- else
79
- '@'
80
- end
78
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
+ ''
80
+ else
81
+ '@'
82
+ end
81
83
  attribute += node.value.first.accept(self)
82
84
 
83
- # Support non-standard css
85
+ # non-standard. attributes starting with '@'
84
86
  attribute.gsub!(/^@@/, '@')
85
87
 
86
88
  return attribute unless node.value.length == 3
@@ -88,29 +90,30 @@ module Nokogiri
88
90
  value = node.value.last
89
91
  value = "'#{value}'" if value !~ /^['"]/
90
92
 
93
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
91
94
  if (value[0]==value[-1]) && %q{"'}.include?(value[0])
92
95
  str_value = value[1..-2]
93
96
  if str_value.include?(value[0])
94
- value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
97
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
95
98
  end
96
99
  end
97
100
 
98
101
  case node.value[1]
99
102
  when :equal
100
- attribute + " = " + "#{value}"
103
+ attribute + "=" + "#{value}"
101
104
  when :not_equal
102
- attribute + " != " + "#{value}"
105
+ attribute + "!=" + "#{value}"
103
106
  when :substring_match
104
- "contains(#{attribute}, #{value})"
107
+ "contains(#{attribute},#{value})"
105
108
  when :prefix_match
106
- "starts-with(#{attribute}, #{value})"
109
+ "starts-with(#{attribute},#{value})"
107
110
  when :dash_match
108
- "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
111
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
109
112
  when :includes
110
- "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
113
+ value = value[1..-2] # strip quotes
114
+ css_class(attribute, value)
111
115
  when :suffix_match
112
- "substring(#{attribute}, string-length(#{attribute}) - " +
113
- "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
116
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
114
117
  else
115
118
  attribute + " #{node.value[1]} " + "#{value}"
116
119
  end
@@ -124,14 +127,14 @@ module Nokogiri
124
127
  return self.send(msg, node) if self.respond_to?(msg)
125
128
 
126
129
  case node.value.first
127
- when "first" then "position() = 1"
128
- when "first-child" then "count(preceding-sibling::*) = 0"
129
- when "last" then "position() = last()"
130
- when "last-child" then "count(following-sibling::*) = 0"
131
- when "first-of-type" then "position() = 1"
132
- when "last-of-type" then "position() = last()"
133
- when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
134
- when "only-of-type" then "last() = 1"
130
+ when "first" then "position()=1"
131
+ when "first-child" then "count(preceding-sibling::*)=0"
132
+ when "last" then "position()=last()"
133
+ when "last-child" then "count(following-sibling::*)=0"
134
+ when "first-of-type" then "position()=1"
135
+ when "last-of-type" then "position()=last()"
136
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
137
+ when "only-of-type" then "last()=1"
135
138
  when "empty" then "not(node())"
136
139
  when "parent" then "node()"
137
140
  when "root" then "not(parent::*)"
@@ -142,7 +145,7 @@ module Nokogiri
142
145
  end
143
146
 
144
147
  def visit_class_condition node
145
- "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
148
+ css_class("@class", node.value.first)
146
149
  end
147
150
 
148
151
  def visit_combinator node
@@ -179,25 +182,26 @@ module Nokogiri
179
182
  node.accept(self)
180
183
  end
181
184
 
182
- private
185
+ private
186
+
183
187
  def nth node, options={}
184
188
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
185
189
 
186
190
  a, b = read_a_and_positive_b node.value
187
191
  position = if options[:child]
188
- options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
192
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
189
193
  else
190
194
  options[:last] ? "(last()-position()+1)" : "position()"
191
195
  end
192
196
 
193
197
  if b.zero?
194
- "(#{position} mod #{a}) = 0"
198
+ "(#{position} mod #{a})=0"
195
199
  else
196
200
  compare = a < 0 ? "<=" : ">="
197
201
  if a.abs == 1
198
- "#{position} #{compare} #{b}"
202
+ "#{position}#{compare}#{b}"
199
203
  else
200
- "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
204
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
201
205
  end
202
206
  end
203
207
  end
@@ -225,6 +229,32 @@ module Nokogiri
225
229
  end =~ /(nth|first|last|only)-of-type(\()?/
226
230
  end
227
231
  end
232
+
233
+ # use only ordinary xpath functions
234
+ def css_class_standard(hay, needle)
235
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
+ end
237
+
238
+ # use the builtin implementation
239
+ def css_class_builtin(hay, needle)
240
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
241
+ end
242
+
243
+ alias_method :css_class, :css_class_standard
244
+ end
245
+
246
+ class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
+ private
248
+ alias_method :css_class, :css_class_builtin
249
+ end
250
+
251
+ class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
+ private
253
+ if Nokogiri.uses_libxml?
254
+ alias_method :css_class, :css_class_builtin
255
+ else
256
+ alias_method :css_class, :css_class_standard
257
+ end
228
258
  end
229
259
  end
230
260
  end
data/lib/nokogiri/css.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/css/node'
2
3
  require 'nokogiri/css/xpath_visitor'
3
4
  x = $-w
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module Decorators
3
4
  ###
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load the C or Java extension
4
+ begin
5
+ ::RUBY_VERSION =~ /(\d+\.\d+)/
6
+ require "nokogiri/#{Regexp.last_match(1)}/nokogiri"
7
+ rescue LoadError => e
8
+ if e.message =~ /GLIBC/
9
+ warn(<<~EOM)
10
+
11
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
12
+
13
+ #{e.message}
14
+
15
+ If that's the case, then please install Nokogiri via the `ruby` platform gem:
16
+ gem install nokogiri --platform=ruby
17
+ or:
18
+ bundle config set force_ruby_platform true
19
+
20
+ Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
21
+
22
+ EOM
23
+ raise e
24
+ end
25
+ require 'nokogiri/nokogiri'
26
+ end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module HTML
3
4
  ###
@@ -1,3 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
1
5
  module Nokogiri
2
6
  module HTML
3
7
  class Document < Nokogiri::XML::Document
@@ -160,11 +164,12 @@ module Nokogiri
160
164
  # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
161
165
  # Nokogiri::XML::ParseOptions.
162
166
  def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
163
-
164
167
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
165
- # Give the options to the user
168
+
166
169
  yield options if block_given?
167
170
 
171
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
172
+
168
173
  if string_or_io.respond_to?(:encoding)
169
174
  unless string_or_io.encoding.name == "ASCII-8BIT"
170
175
  encoding ||= string_or_io.encoding.name
@@ -172,7 +177,12 @@ module Nokogiri
172
177
  end
173
178
 
174
179
  if string_or_io.respond_to?(:read)
175
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
180
+ if string_or_io.is_a?(Pathname)
181
+ # resolve the Pathname to the file and open it as an IO object, see #2110
182
+ string_or_io = string_or_io.expand_path.open
183
+ url ||= string_or_io.path
184
+ end
185
+
176
186
  unless encoding
177
187
  # Libxml2's parser has poor support for encoding
178
188
  # detection. First, it does not recognize the HTML5
@@ -251,9 +261,6 @@ module Nokogiri
251
261
  end
252
262
 
253
263
  def self.detect_encoding(chunk)
254
- if Nokogiri.jruby? && EncodingReader.is_jruby_without_fix?
255
- return EncodingReader.detect_encoding_for_jruby_without_fix(chunk)
256
- end
257
264
  m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
258
265
  return Nokogiri.XML(m[1]).encoding
259
266
 
@@ -272,26 +279,6 @@ module Nokogiri
272
279
  end
273
280
  end
274
281
 
275
- def self.is_jruby_without_fix?
276
- JRUBY_VERSION.split('.').join.to_i < 165
277
- end
278
-
279
- def self.detect_encoding_for_jruby_without_fix(chunk)
280
- m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
281
- return Nokogiri.XML(m[1]).encoding
282
-
283
- m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
284
- return m[4]
285
-
286
- catch(:encoding_found) {
287
- Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found.to_s)).parse(chunk)
288
- nil
289
- }
290
- rescue Nokogiri::SyntaxError, RuntimeError
291
- # Ignore parser errors that nokogiri may raise
292
- nil
293
- end
294
-
295
282
  def initialize(io)
296
283
  @io = io
297
284
  @firstchunk = nil