nokogiri 1.10.0 → 1.11.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/README.md +178 -90
  5. data/dependencies.yml +28 -26
  6. data/ext/nokogiri/depend +476 -357
  7. data/ext/nokogiri/extconf.rb +508 -354
  8. data/ext/nokogiri/html_document.c +79 -78
  9. data/ext/nokogiri/html_sax_parser_context.c +4 -2
  10. data/ext/nokogiri/html_sax_push_parser.c +14 -8
  11. data/ext/nokogiri/nokogiri.c +37 -40
  12. data/ext/nokogiri/nokogiri.h +26 -17
  13. data/ext/nokogiri/test_global_handlers.c +41 -0
  14. data/ext/nokogiri/xml_document.c +18 -4
  15. data/ext/nokogiri/xml_io.c +8 -6
  16. data/ext/nokogiri/xml_node.c +23 -6
  17. data/ext/nokogiri/xml_node_set.c +1 -1
  18. data/ext/nokogiri/xml_reader.c +6 -17
  19. data/ext/nokogiri/xml_relax_ng.c +29 -11
  20. data/ext/nokogiri/xml_sax_parser.c +2 -7
  21. data/ext/nokogiri/xml_sax_parser_context.c +4 -2
  22. data/ext/nokogiri/xml_sax_push_parser.c +2 -0
  23. data/ext/nokogiri/xml_schema.c +84 -13
  24. data/ext/nokogiri/xml_syntax_error.c +23 -0
  25. data/ext/nokogiri/xml_syntax_error.h +15 -3
  26. data/ext/nokogiri/xml_xpath_context.c +80 -4
  27. data/ext/nokogiri/xslt_stylesheet.c +2 -9
  28. data/lib/nokogiri.rb +22 -22
  29. data/lib/nokogiri/css.rb +1 -0
  30. data/lib/nokogiri/css/node.rb +1 -0
  31. data/lib/nokogiri/css/parser.rb +63 -62
  32. data/lib/nokogiri/css/parser.y +2 -2
  33. data/lib/nokogiri/css/parser_extras.rb +39 -36
  34. data/lib/nokogiri/css/syntax_error.rb +1 -0
  35. data/lib/nokogiri/css/tokenizer.rb +105 -103
  36. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  37. data/lib/nokogiri/decorators/slop.rb +1 -0
  38. data/lib/nokogiri/html.rb +1 -0
  39. data/lib/nokogiri/html/builder.rb +1 -0
  40. data/lib/nokogiri/html/document.rb +13 -26
  41. data/lib/nokogiri/html/document_fragment.rb +1 -0
  42. data/lib/nokogiri/html/element_description.rb +1 -0
  43. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  44. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  45. data/lib/nokogiri/html/sax/parser.rb +1 -0
  46. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  47. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  48. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  49. data/lib/nokogiri/syntax_error.rb +1 -0
  50. data/lib/nokogiri/version.rb +3 -109
  51. data/lib/nokogiri/version/constant.rb +5 -0
  52. data/lib/nokogiri/version/info.rb +182 -0
  53. data/lib/nokogiri/xml.rb +1 -0
  54. data/lib/nokogiri/xml/attr.rb +1 -0
  55. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  56. data/lib/nokogiri/xml/builder.rb +36 -32
  57. data/lib/nokogiri/xml/cdata.rb +1 -0
  58. data/lib/nokogiri/xml/character_data.rb +1 -0
  59. data/lib/nokogiri/xml/document.rb +21 -16
  60. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  61. data/lib/nokogiri/xml/dtd.rb +1 -0
  62. data/lib/nokogiri/xml/element_content.rb +1 -0
  63. data/lib/nokogiri/xml/element_decl.rb +1 -0
  64. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  65. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  66. data/lib/nokogiri/xml/namespace.rb +1 -0
  67. data/lib/nokogiri/xml/node.rb +588 -250
  68. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  69. data/lib/nokogiri/xml/node_set.rb +43 -8
  70. data/lib/nokogiri/xml/notation.rb +1 -0
  71. data/lib/nokogiri/xml/parse_options.rb +10 -3
  72. data/lib/nokogiri/xml/pp.rb +1 -0
  73. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  74. data/lib/nokogiri/xml/pp/node.rb +1 -0
  75. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  76. data/lib/nokogiri/xml/reader.rb +7 -3
  77. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  78. data/lib/nokogiri/xml/sax.rb +1 -0
  79. data/lib/nokogiri/xml/sax/document.rb +1 -0
  80. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  81. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  82. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  83. data/lib/nokogiri/xml/schema.rb +13 -4
  84. data/lib/nokogiri/xml/searchable.rb +25 -16
  85. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  86. data/lib/nokogiri/xml/text.rb +1 -0
  87. data/lib/nokogiri/xml/xpath.rb +1 -0
  88. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
  89. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  90. data/lib/nokogiri/xslt.rb +1 -0
  91. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  92. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  93. data/patches/libxml2/0002-Remove-script-macro-support.patch +40 -0
  94. data/patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  95. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +25 -0
  96. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +32 -0
  97. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
  98. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
  99. data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
  100. data/patches/libxml2/0009-avoid-isnan-isinf.patch +81 -0
  101. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  102. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  103. metadata +106 -99
  104. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  105. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -88,7 +88,7 @@ rule
88
88
  )
89
89
  }
90
90
  | LSQUARE NUMBER RSQUARE {
91
- # Non standard, but hpricot supports it.
91
+ # non-standard, from hpricot
92
92
  result = Node.new(:PSEUDO_CLASS,
93
93
  [Node.new(:FUNCTION, ['nth-child(', val[1]])]
94
94
  )
@@ -139,7 +139,7 @@ rule
139
139
  when 'n'
140
140
  result = Node.new(:NTH, ['1','n','+','0'])
141
141
  else
142
- # This is not CSS standard. It allows us to support this:
142
+ # non-standard to support custom functions:
143
143
  # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
144
144
  # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
145
145
  # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
@@ -1,63 +1,66 @@
1
- require 'thread'
1
+ # frozen_string_literal: true
2
+ require "thread"
2
3
 
3
4
  module Nokogiri
4
5
  module CSS
5
6
  class Parser < Racc::Parser
6
- @cache_on = true
7
- @cache = {}
8
- @mutex = Mutex.new
7
+ CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
8
+
9
+ @cache = {}
10
+ @mutex = Mutex.new
9
11
 
10
12
  class << self
11
- # Turn on CSS parse caching
12
- attr_accessor :cache_on
13
- alias :cache_on? :cache_on
14
- alias :set_cache :cache_on=
13
+ # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
14
+ def cache_on?
15
+ !Thread.current[CACHE_SWITCH_NAME]
16
+ end
17
+
18
+ # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
19
+ def set_cache(value)
20
+ Thread.current[CACHE_SWITCH_NAME] = !value
21
+ end
15
22
 
16
23
  # Get the css selector in +string+ from the cache
17
- def [] string
18
- return unless @cache_on
24
+ def [](string)
25
+ return unless cache_on?
19
26
  @mutex.synchronize { @cache[string] }
20
27
  end
21
28
 
22
29
  # Set the css selector in +string+ in the cache to +value+
23
- def []= string, value
24
- return value unless @cache_on
30
+ def []=(string, value)
31
+ return value unless cache_on?
25
32
  @mutex.synchronize { @cache[string] = value }
26
33
  end
27
34
 
28
35
  # Clear the cache
29
- def clear_cache
30
- @mutex.synchronize { @cache = {} }
36
+ def clear_cache(create_new_object = false)
37
+ @mutex.synchronize do
38
+ if create_new_object
39
+ @cache = {}
40
+ else
41
+ @cache.clear
42
+ end
43
+ end
31
44
  end
32
45
 
33
46
  # Execute +block+ without cache
34
- def without_cache &block
35
- tmp = @cache_on
36
- @cache_on = false
47
+ def without_cache(&block)
48
+ original_cache_setting = cache_on?
49
+ set_cache false
37
50
  block.call
38
- @cache_on = tmp
39
- end
40
-
41
- ###
42
- # Parse this CSS selector in +selector+. Returns an AST.
43
- def parse selector
44
- @warned ||= false
45
- unless @warned
46
- $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
- @warned = true
48
- end
49
- new.parse selector
51
+ ensure
52
+ set_cache original_cache_setting
50
53
  end
51
54
  end
52
55
 
53
56
  # Create a new CSS parser with respect to +namespaces+
54
- def initialize namespaces = {}
55
- @tokenizer = Tokenizer.new
57
+ def initialize(namespaces = {})
58
+ @tokenizer = Tokenizer.new
56
59
  @namespaces = namespaces
57
60
  super()
58
61
  end
59
62
 
60
- def parse string
63
+ def parse(string)
61
64
  @tokenizer.scan_setup string
62
65
  do_parse
63
66
  end
@@ -67,14 +70,14 @@ module Nokogiri
67
70
  end
68
71
 
69
72
  # Get the xpath for +string+ using +options+
70
- def xpath_for string, options={}
73
+ def xpath_for(string, options = {})
71
74
  key = "#{string}#{options[:ns]}#{options[:prefix]}"
72
75
  v = self.class[key]
73
76
  return v if v
74
77
 
75
78
  args = [
76
- options[:prefix] || '//',
77
- options[:visitor] || XPathVisitor.new
79
+ options[:prefix] || "//",
80
+ options[:visitor] || XPathVisitor.new,
78
81
  ]
79
82
  self.class[key] = parse(string).map { |ast|
80
83
  ast.to_xpath(*args)
@@ -82,7 +85,7 @@ module Nokogiri
82
85
  end
83
86
 
84
87
  # On CSS parser error, raise an exception
85
- def on_error error_token_id, error_value, value_stack
88
+ def on_error(error_token_id, error_value, value_stack)
86
89
  after = value_stack.compact.last
87
90
  raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
88
91
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/syntax_error'
2
3
  module Nokogiri
3
4
  module CSS
@@ -1,151 +1,153 @@
1
+ # frozen_string_literal: true
1
2
  #--
2
3
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by rex 1.0.5
4
+ # This file is automatically generated by rex 1.0.7
4
5
  # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
6
  #++
6
7
 
7
8
  module Nokogiri
8
9
  module CSS
9
10
  class Tokenizer # :nodoc:
10
- require 'strscan'
11
+ require 'strscan'
11
12
 
12
- class ScanError < StandardError ; end
13
+ class ScanError < StandardError ; end
13
14
 
14
- attr_reader :lineno
15
- attr_reader :filename
16
- attr_accessor :state
15
+ attr_reader :lineno
16
+ attr_reader :filename
17
+ attr_accessor :state
17
18
 
18
- def scan_setup(str)
19
- @ss = StringScanner.new(str)
20
- @lineno = 1
21
- @state = nil
22
- end
19
+ def scan_setup(str)
20
+ @ss = StringScanner.new(str)
21
+ @lineno = 1
22
+ @state = nil
23
+ end
23
24
 
24
- def action
25
- yield
26
- end
25
+ def action
26
+ yield
27
+ end
27
28
 
28
- def scan_str(str)
29
- scan_setup(str)
30
- do_parse
31
- end
32
- alias :scan :scan_str
29
+ def scan_str(str)
30
+ scan_setup(str)
31
+ do_parse
32
+ end
33
+ alias :scan :scan_str
33
34
 
34
- def load_file( filename )
35
- @filename = filename
36
- open(filename, "r") do |f|
37
- scan_setup(f.read)
38
- end
39
- end
35
+ def load_file( filename )
36
+ @filename = filename
37
+ File.open(filename, "r") do |f|
38
+ scan_setup(f.read)
39
+ end
40
+ end
40
41
 
41
- def scan_file( filename )
42
- load_file(filename)
43
- do_parse
44
- end
42
+ def scan_file( filename )
43
+ load_file(filename)
44
+ do_parse
45
+ end
45
46
 
46
47
 
47
- def next_token
48
- return if @ss.eos?
49
-
50
- # skips empty actions
51
- until token = _next_token or @ss.eos?; end
52
- token
53
- end
48
+ def next_token
49
+ return if @ss.eos?
54
50
 
55
- def _next_token
56
- text = @ss.peek(1)
57
- @lineno += 1 if text == "\n"
58
- token = case @state
59
- when nil
60
- case
61
- when (text = @ss.scan(/has\([\s]*/))
62
- action { [:HAS, text] }
51
+ # skips empty actions
52
+ until token = _next_token or @ss.eos?; end
53
+ token
54
+ end
63
55
 
64
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
- action { [:FUNCTION, text] }
56
+ def _next_token
57
+ text = @ss.peek(1)
58
+ @lineno += 1 if text == "\n"
59
+ token = case @state
60
+ when nil
61
+ case
62
+ when (text = @ss.scan(/has\([\s]*/))
63
+ action { [:HAS, text] }
66
64
 
67
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
- action { [:IDENT, text] }
65
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ action { [:FUNCTION, text] }
69
67
 
70
- when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
71
- action { [:HASH, text] }
68
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ action { [:IDENT, text] }
72
70
 
73
- when (text = @ss.scan(/[\s]*~=[\s]*/))
74
- action { [:INCLUDES, text] }
71
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
72
+ action { [:HASH, text] }
75
73
 
76
- when (text = @ss.scan(/[\s]*\|=[\s]*/))
77
- action { [:DASHMATCH, text] }
74
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
75
+ action { [:INCLUDES, text] }
78
76
 
79
- when (text = @ss.scan(/[\s]*\^=[\s]*/))
80
- action { [:PREFIXMATCH, text] }
77
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
78
+ action { [:DASHMATCH, text] }
81
79
 
82
- when (text = @ss.scan(/[\s]*\$=[\s]*/))
83
- action { [:SUFFIXMATCH, text] }
80
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
81
+ action { [:PREFIXMATCH, text] }
84
82
 
85
- when (text = @ss.scan(/[\s]*\*=[\s]*/))
86
- action { [:SUBSTRINGMATCH, text] }
83
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
84
+ action { [:SUFFIXMATCH, text] }
87
85
 
88
- when (text = @ss.scan(/[\s]*!=[\s]*/))
89
- action { [:NOT_EQUAL, text] }
86
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
87
+ action { [:SUBSTRINGMATCH, text] }
90
88
 
91
- when (text = @ss.scan(/[\s]*=[\s]*/))
92
- action { [:EQUAL, text] }
89
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
90
+ action { [:NOT_EQUAL, text] }
93
91
 
94
- when (text = @ss.scan(/[\s]*\)/))
95
- action { [:RPAREN, text] }
92
+ when (text = @ss.scan(/[\s]*=[\s]*/))
93
+ action { [:EQUAL, text] }
96
94
 
97
- when (text = @ss.scan(/\[[\s]*/))
98
- action { [:LSQUARE, text] }
95
+ when (text = @ss.scan(/[\s]*\)/))
96
+ action { [:RPAREN, text] }
99
97
 
100
- when (text = @ss.scan(/[\s]*\]/))
101
- action { [:RSQUARE, text] }
98
+ when (text = @ss.scan(/\[[\s]*/))
99
+ action { [:LSQUARE, text] }
102
100
 
103
- when (text = @ss.scan(/[\s]*\+[\s]*/))
104
- action { [:PLUS, text] }
101
+ when (text = @ss.scan(/[\s]*\]/))
102
+ action { [:RSQUARE, text] }
105
103
 
106
- when (text = @ss.scan(/[\s]*>[\s]*/))
107
- action { [:GREATER, text] }
104
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
105
+ action { [:PLUS, text] }
108
106
 
109
- when (text = @ss.scan(/[\s]*,[\s]*/))
110
- action { [:COMMA, text] }
107
+ when (text = @ss.scan(/[\s]*>[\s]*/))
108
+ action { [:GREATER, text] }
111
109
 
112
- when (text = @ss.scan(/[\s]*~[\s]*/))
113
- action { [:TILDE, text] }
110
+ when (text = @ss.scan(/[\s]*,[\s]*/))
111
+ action { [:COMMA, text] }
114
112
 
115
- when (text = @ss.scan(/\:not\([\s]*/))
116
- action { [:NOT, text] }
113
+ when (text = @ss.scan(/[\s]*~[\s]*/))
114
+ action { [:TILDE, text] }
117
115
 
118
- when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
119
- action { [:NUMBER, text] }
116
+ when (text = @ss.scan(/\:not\([\s]*/))
117
+ action { [:NOT, text] }
120
118
 
121
- when (text = @ss.scan(/[\s]*\/\/[\s]*/))
122
- action { [:DOUBLESLASH, text] }
119
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
120
+ action { [:NUMBER, text] }
123
121
 
124
- when (text = @ss.scan(/[\s]*\/[\s]*/))
125
- action { [:SLASH, text] }
122
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
123
+ action { [:DOUBLESLASH, text] }
126
124
 
127
- when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
128
- action {[:UNICODE_RANGE, text] }
125
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
126
+ action { [:SLASH, text] }
129
127
 
130
- when (text = @ss.scan(/[\s]+/))
131
- action { [:S, text] }
128
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
129
+ action {[:UNICODE_RANGE, text] }
132
130
 
133
- when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
134
- action { [:STRING, text] }
131
+ when (text = @ss.scan(/[\s]+/))
132
+ action { [:S, text] }
135
133
 
136
- when (text = @ss.scan(/./))
137
- action { [text, text] }
134
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
135
+ action { [:STRING, text] }
138
136
 
139
- else
140
- text = @ss.string[@ss.pos .. -1]
141
- raise ScanError, "can not match: '" + text + "'"
142
- end # if
137
+ when (text = @ss.scan(/./))
138
+ action { [text, text] }
143
139
 
144
- else
145
- raise ScanError, "undefined state: '" + state.to_s + "'"
146
- end # case state
147
- token
148
- end # def _next_token
140
+
141
+ else
142
+ text = @ss.string[@ss.pos .. -1]
143
+ raise ScanError, "can not match: '" + text + "'"
144
+ end # if
145
+
146
+ else
147
+ raise ScanError, "undefined state: '" + state.to_s + "'"
148
+ end # case state
149
+ token
150
+ end # def _next_token
149
151
 
150
152
  end # class
151
153
  end
@@ -1,8 +1,8 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module CSS
3
4
  class XPathVisitor # :nodoc:
4
5
  def visit_function node
5
-
6
6
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
7
  return self.send(msg, node) if self.respond_to?(msg)
8
8
 
@@ -12,49 +12,51 @@ module Nokogiri
12
12
  when /^self\(/
13
13
  "self::#{node.value[1]}"
14
14
  when /^eq\(/
15
- "position() = #{node.value[1]}"
15
+ "position()=#{node.value[1]}"
16
16
  when /^(nth|nth-of-type)\(/
17
17
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
18
  nth(node.value[1])
19
19
  else
20
- "position() = #{node.value[1]}"
20
+ "position()=#{node.value[1]}"
21
21
  end
22
22
  when /^nth-child\(/
23
23
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
24
  nth(node.value[1], :child => true)
25
25
  else
26
- "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
26
+ "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
27
27
  end
28
28
  when /^nth-last-of-type\(/
29
29
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
30
  nth(node.value[1], :last => true)
31
31
  else
32
32
  index = node.value[1].to_i - 1
33
- index == 0 ? "position() = last()" : "position() = last() - #{index}"
33
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
34
  end
35
35
  when /^nth-last-child\(/
36
36
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
37
  nth(node.value[1], :last => true, :child => true)
38
38
  else
39
- "count(following-sibling::*) = #{node.value[1].to_i-1}"
39
+ "count(following-sibling::*)=#{node.value[1].to_i-1}"
40
40
  end
41
41
  when /^(first|first-of-type)\(/
42
- "position() = 1"
42
+ "position()=1"
43
43
  when /^(last|last-of-type)\(/
44
- "position() = last()"
44
+ "position()=last()"
45
45
  when /^contains\(/
46
- "contains(., #{node.value[1]})"
46
+ "contains(.,#{node.value[1]})"
47
47
  when /^gt\(/
48
- "position() > #{node.value[1]}"
48
+ "position()>#{node.value[1]}"
49
49
  when /^only-child\(/
50
- "last() = 1"
50
+ "last()=1"
51
51
  when /^comment\(/
52
52
  "comment()"
53
53
  when /^has\(/
54
- node.value[1].accept(self)
54
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
55
+ ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
55
56
  else
57
+ # non-standard. this looks like a function call.
56
58
  args = ['.'] + node.value[1..-1]
57
- "#{node.value.first}#{args.join(', ')})"
59
+ "#{node.value.first}#{args.join(',')})"
58
60
  end
59
61
  end
60
62
 
@@ -69,18 +71,18 @@ module Nokogiri
69
71
 
70
72
  def visit_id node
71
73
  node.value.first =~ /^#(.*)$/
72
- "@id = '#{$1}'"
74
+ "@id='#{$1}'"
73
75
  end
74
76
 
75
77
  def visit_attribute_condition node
76
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
77
- ''
78
- else
79
- '@'
80
- end
78
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
+ ''
80
+ else
81
+ '@'
82
+ end
81
83
  attribute += node.value.first.accept(self)
82
84
 
83
- # Support non-standard css
85
+ # non-standard. attributes starting with '@'
84
86
  attribute.gsub!(/^@@/, '@')
85
87
 
86
88
  return attribute unless node.value.length == 3
@@ -88,29 +90,30 @@ module Nokogiri
88
90
  value = node.value.last
89
91
  value = "'#{value}'" if value !~ /^['"]/
90
92
 
93
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
91
94
  if (value[0]==value[-1]) && %q{"'}.include?(value[0])
92
95
  str_value = value[1..-2]
93
96
  if str_value.include?(value[0])
94
- value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
97
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
95
98
  end
96
99
  end
97
100
 
98
101
  case node.value[1]
99
102
  when :equal
100
- attribute + " = " + "#{value}"
103
+ attribute + "=" + "#{value}"
101
104
  when :not_equal
102
- attribute + " != " + "#{value}"
105
+ attribute + "!=" + "#{value}"
103
106
  when :substring_match
104
- "contains(#{attribute}, #{value})"
107
+ "contains(#{attribute},#{value})"
105
108
  when :prefix_match
106
- "starts-with(#{attribute}, #{value})"
109
+ "starts-with(#{attribute},#{value})"
107
110
  when :dash_match
108
- "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
111
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
109
112
  when :includes
110
- "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
113
+ value = value[1..-2] # strip quotes
114
+ css_class(attribute, value)
111
115
  when :suffix_match
112
- "substring(#{attribute}, string-length(#{attribute}) - " +
113
- "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
116
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
114
117
  else
115
118
  attribute + " #{node.value[1]} " + "#{value}"
116
119
  end
@@ -124,14 +127,14 @@ module Nokogiri
124
127
  return self.send(msg, node) if self.respond_to?(msg)
125
128
 
126
129
  case node.value.first
127
- when "first" then "position() = 1"
128
- when "first-child" then "count(preceding-sibling::*) = 0"
129
- when "last" then "position() = last()"
130
- when "last-child" then "count(following-sibling::*) = 0"
131
- when "first-of-type" then "position() = 1"
132
- when "last-of-type" then "position() = last()"
133
- when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
134
- when "only-of-type" then "last() = 1"
130
+ when "first" then "position()=1"
131
+ when "first-child" then "count(preceding-sibling::*)=0"
132
+ when "last" then "position()=last()"
133
+ when "last-child" then "count(following-sibling::*)=0"
134
+ when "first-of-type" then "position()=1"
135
+ when "last-of-type" then "position()=last()"
136
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
137
+ when "only-of-type" then "last()=1"
135
138
  when "empty" then "not(node())"
136
139
  when "parent" then "node()"
137
140
  when "root" then "not(parent::*)"
@@ -142,7 +145,7 @@ module Nokogiri
142
145
  end
143
146
 
144
147
  def visit_class_condition node
145
- "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
148
+ css_class("@class", node.value.first)
146
149
  end
147
150
 
148
151
  def visit_combinator node
@@ -179,25 +182,26 @@ module Nokogiri
179
182
  node.accept(self)
180
183
  end
181
184
 
182
- private
185
+ private
186
+
183
187
  def nth node, options={}
184
188
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
185
189
 
186
190
  a, b = read_a_and_positive_b node.value
187
191
  position = if options[:child]
188
- options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
192
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
189
193
  else
190
194
  options[:last] ? "(last()-position()+1)" : "position()"
191
195
  end
192
196
 
193
197
  if b.zero?
194
- "(#{position} mod #{a}) = 0"
198
+ "(#{position} mod #{a})=0"
195
199
  else
196
200
  compare = a < 0 ? "<=" : ">="
197
201
  if a.abs == 1
198
- "#{position} #{compare} #{b}"
202
+ "#{position}#{compare}#{b}"
199
203
  else
200
- "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
204
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
201
205
  end
202
206
  end
203
207
  end
@@ -225,6 +229,32 @@ module Nokogiri
225
229
  end =~ /(nth|first|last|only)-of-type(\()?/
226
230
  end
227
231
  end
232
+
233
+ # use only ordinary xpath functions
234
+ def css_class_standard(hay, needle)
235
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
+ end
237
+
238
+ # use the builtin implementation
239
+ def css_class_builtin(hay, needle)
240
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
241
+ end
242
+
243
+ alias_method :css_class, :css_class_standard
244
+ end
245
+
246
+ class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
+ private
248
+ alias_method :css_class, :css_class_builtin
249
+ end
250
+
251
+ class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
+ private
253
+ if Nokogiri.uses_libxml?
254
+ alias_method :css_class, :css_class_builtin
255
+ else
256
+ alias_method :css_class, :css_class_standard
257
+ end
228
258
  end
229
259
  end
230
260
  end