nokogiri 1.11.3 → 1.13.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (179) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/LICENSE-DEPENDENCIES.md +243 -22
  4. data/LICENSE.md +1 -1
  5. data/README.md +14 -11
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -64
  8. data/ext/nokogiri/depend +35 -34
  9. data/ext/nokogiri/extconf.rb +237 -133
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  12. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
  13. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  14. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +8 -8
  15. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  16. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  17. data/ext/nokogiri/nokogiri.c +70 -38
  18. data/ext/nokogiri/nokogiri.h +27 -9
  19. data/ext/nokogiri/xml_attr.c +2 -2
  20. data/ext/nokogiri/xml_attribute_decl.c +3 -3
  21. data/ext/nokogiri/xml_cdata.c +1 -1
  22. data/ext/nokogiri/xml_document.c +50 -50
  23. data/ext/nokogiri/xml_document_fragment.c +0 -2
  24. data/ext/nokogiri/xml_dtd.c +10 -10
  25. data/ext/nokogiri/xml_element_content.c +2 -0
  26. data/ext/nokogiri/xml_element_decl.c +3 -3
  27. data/ext/nokogiri/xml_encoding_handler.c +31 -12
  28. data/ext/nokogiri/xml_entity_decl.c +5 -5
  29. data/ext/nokogiri/xml_namespace.c +4 -2
  30. data/ext/nokogiri/xml_node.c +833 -492
  31. data/ext/nokogiri/xml_node_set.c +24 -24
  32. data/ext/nokogiri/xml_reader.c +90 -11
  33. data/ext/nokogiri/xml_sax_parser.c +6 -6
  34. data/ext/nokogiri/xml_sax_parser_context.c +12 -3
  35. data/ext/nokogiri/xml_schema.c +5 -3
  36. data/ext/nokogiri/xml_text.c +1 -1
  37. data/ext/nokogiri/xml_xpath_context.c +110 -85
  38. data/ext/nokogiri/xslt_stylesheet.c +109 -10
  39. data/gumbo-parser/CHANGES.md +63 -0
  40. data/gumbo-parser/Makefile +101 -0
  41. data/gumbo-parser/THANKS +27 -0
  42. data/gumbo-parser/src/Makefile +34 -0
  43. data/gumbo-parser/src/README.md +41 -0
  44. data/gumbo-parser/src/ascii.c +75 -0
  45. data/gumbo-parser/src/ascii.h +115 -0
  46. data/gumbo-parser/src/attribute.c +42 -0
  47. data/gumbo-parser/src/attribute.h +17 -0
  48. data/gumbo-parser/src/char_ref.c +22225 -0
  49. data/gumbo-parser/src/char_ref.h +29 -0
  50. data/gumbo-parser/src/char_ref.rl +2154 -0
  51. data/gumbo-parser/src/error.c +626 -0
  52. data/gumbo-parser/src/error.h +148 -0
  53. data/gumbo-parser/src/foreign_attrs.c +104 -0
  54. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  55. data/gumbo-parser/src/gumbo.h +943 -0
  56. data/gumbo-parser/src/insertion_mode.h +33 -0
  57. data/gumbo-parser/src/macros.h +91 -0
  58. data/gumbo-parser/src/parser.c +4875 -0
  59. data/gumbo-parser/src/parser.h +41 -0
  60. data/gumbo-parser/src/replacement.h +33 -0
  61. data/gumbo-parser/src/string_buffer.c +103 -0
  62. data/gumbo-parser/src/string_buffer.h +68 -0
  63. data/gumbo-parser/src/string_piece.c +48 -0
  64. data/gumbo-parser/src/svg_attrs.c +174 -0
  65. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  66. data/gumbo-parser/src/svg_tags.c +137 -0
  67. data/gumbo-parser/src/svg_tags.gperf +55 -0
  68. data/gumbo-parser/src/tag.c +222 -0
  69. data/gumbo-parser/src/tag_lookup.c +382 -0
  70. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  71. data/gumbo-parser/src/tag_lookup.h +13 -0
  72. data/gumbo-parser/src/token_buffer.c +79 -0
  73. data/gumbo-parser/src/token_buffer.h +71 -0
  74. data/gumbo-parser/src/token_type.h +17 -0
  75. data/gumbo-parser/src/tokenizer.c +3463 -0
  76. data/gumbo-parser/src/tokenizer.h +112 -0
  77. data/gumbo-parser/src/tokenizer_states.h +339 -0
  78. data/gumbo-parser/src/utf8.c +245 -0
  79. data/gumbo-parser/src/utf8.h +164 -0
  80. data/gumbo-parser/src/util.c +68 -0
  81. data/gumbo-parser/src/util.h +30 -0
  82. data/gumbo-parser/src/vector.c +111 -0
  83. data/gumbo-parser/src/vector.h +45 -0
  84. data/lib/nokogiri/class_resolver.rb +67 -0
  85. data/lib/nokogiri/css/node.rb +9 -8
  86. data/lib/nokogiri/css/parser.rb +361 -342
  87. data/lib/nokogiri/css/parser.y +250 -245
  88. data/lib/nokogiri/css/parser_extras.rb +22 -20
  89. data/lib/nokogiri/css/syntax_error.rb +2 -1
  90. data/lib/nokogiri/css/tokenizer.rb +4 -3
  91. data/lib/nokogiri/css/tokenizer.rex +3 -2
  92. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  93. data/lib/nokogiri/css.rb +49 -17
  94. data/lib/nokogiri/decorators/slop.rb +8 -7
  95. data/lib/nokogiri/extension.rb +8 -3
  96. data/lib/nokogiri/gumbo.rb +15 -0
  97. data/lib/nokogiri/html.rb +37 -27
  98. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  99. data/lib/nokogiri/{html → html4}/document.rb +92 -81
  100. data/lib/nokogiri/{html → html4}/document_fragment.rb +13 -9
  101. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  102. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  103. data/lib/nokogiri/{html → html4}/entity_lookup.rb +3 -2
  104. data/lib/nokogiri/{html → html4}/sax/parser.rb +16 -16
  105. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  106. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +11 -11
  107. data/lib/nokogiri/html4.rb +46 -0
  108. data/lib/nokogiri/html5/document.rb +91 -0
  109. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  110. data/lib/nokogiri/html5/node.rb +100 -0
  111. data/lib/nokogiri/html5.rb +478 -0
  112. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  113. data/lib/nokogiri/syntax_error.rb +1 -0
  114. data/lib/nokogiri/version/constant.rb +2 -1
  115. data/lib/nokogiri/version/info.rb +31 -14
  116. data/lib/nokogiri/version.rb +1 -0
  117. data/lib/nokogiri/xml/attr.rb +5 -3
  118. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  119. data/lib/nokogiri/xml/builder.rb +71 -31
  120. data/lib/nokogiri/xml/cdata.rb +2 -1
  121. data/lib/nokogiri/xml/character_data.rb +1 -0
  122. data/lib/nokogiri/xml/document.rb +183 -96
  123. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  124. data/lib/nokogiri/xml/dtd.rb +3 -2
  125. data/lib/nokogiri/xml/element_content.rb +1 -0
  126. data/lib/nokogiri/xml/element_decl.rb +2 -1
  127. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  128. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  129. data/lib/nokogiri/xml/namespace.rb +2 -0
  130. data/lib/nokogiri/xml/node/save_options.rb +9 -5
  131. data/lib/nokogiri/xml/node.rb +525 -354
  132. data/lib/nokogiri/xml/node_set.rb +50 -54
  133. data/lib/nokogiri/xml/notation.rb +12 -0
  134. data/lib/nokogiri/xml/parse_options.rb +13 -6
  135. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  136. data/lib/nokogiri/xml/pp/node.rb +24 -26
  137. data/lib/nokogiri/xml/pp.rb +3 -2
  138. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  139. data/lib/nokogiri/xml/reader.rb +20 -24
  140. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  141. data/lib/nokogiri/xml/sax/document.rb +44 -49
  142. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  143. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  144. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  145. data/lib/nokogiri/xml/sax.rb +5 -4
  146. data/lib/nokogiri/xml/schema.rb +7 -6
  147. data/lib/nokogiri/xml/searchable.rb +93 -62
  148. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  149. data/lib/nokogiri/xml/text.rb +1 -0
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  151. data/lib/nokogiri/xml/xpath.rb +13 -1
  152. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  153. data/lib/nokogiri/xml.rb +37 -37
  154. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  155. data/lib/nokogiri/xslt.rb +28 -20
  156. data/lib/nokogiri.rb +48 -43
  157. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  158. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  159. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  160. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  161. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +3 -3
  162. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  163. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  164. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  165. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  166. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  167. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  168. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  169. metadata +204 -93
  170. data/lib/nokogiri/html/element_description_defaults.rb +0 -672
  171. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  172. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  173. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  174. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  175. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  176. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
  177. data/patches/libxml2/0011-update-automake-files-for-arm64.patch +0 -2511
  178. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  179. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -5,255 +5,260 @@ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
5
  token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
6
6
 
7
7
  rule
8
- selector
9
- : selector COMMA simple_selector_1toN {
10
- result = [val.first, val.last].flatten
11
- }
12
- | prefixless_combinator_selector { result = val.flatten }
13
- | optional_S simple_selector_1toN { result = [val.last].flatten }
14
- ;
15
- combinator
16
- : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
17
- | GREATER { result = :CHILD_SELECTOR }
18
- | TILDE { result = :FOLLOWING_SELECTOR }
19
- | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
- | SLASH { result = :CHILD_SELECTOR }
21
- ;
22
- simple_selector
23
- : element_name hcap_0toN {
24
- result = if val[1].nil?
25
- val.first
26
- else
27
- Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
- end
29
- }
30
- | function
31
- | function pseudo {
32
- result = Node.new(:CONDITIONAL_SELECTOR, val)
33
- }
34
- | function attrib {
35
- result = Node.new(:CONDITIONAL_SELECTOR, val)
36
- }
37
- | hcap_1toN {
38
- result = Node.new(:CONDITIONAL_SELECTOR,
39
- [Node.new(:ELEMENT_NAME, ['*']), val.first]
40
- )
41
- }
42
- ;
43
- prefixless_combinator_selector
44
- : combinator simple_selector_1toN {
45
- result = Node.new(val.first, [nil, val.last])
46
- }
47
- ;
48
- simple_selector_1toN
49
- : simple_selector combinator simple_selector_1toN {
50
- result = Node.new(val[1], [val.first, val.last])
51
- }
52
- | simple_selector S simple_selector_1toN {
53
- result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
54
- }
55
- | simple_selector
56
- ;
57
- class
58
- : '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
59
- ;
60
- element_name
61
- : namespaced_ident
62
- | '*' { result = Node.new(:ELEMENT_NAME, val) }
63
- ;
64
- namespaced_ident
65
- : namespace '|' IDENT {
66
- result = Node.new(:ELEMENT_NAME,
67
- [[val.first, val.last].compact.join(':')]
68
- )
69
- }
70
- | IDENT {
71
- name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
72
- result = Node.new(:ELEMENT_NAME, [name])
73
- }
74
- ;
75
- namespace
76
- : IDENT { result = val[0] }
77
- |
78
- ;
79
- attrib
80
- : LSQUARE attrib_name attrib_val_0or1 RSQUARE {
81
- result = Node.new(:ATTRIBUTE_CONDITION,
82
- [val[1]] + (val[2] || [])
83
- )
84
- }
85
- | LSQUARE function attrib_val_0or1 RSQUARE {
86
- result = Node.new(:ATTRIBUTE_CONDITION,
87
- [val[1]] + (val[2] || [])
88
- )
89
- }
90
- | LSQUARE NUMBER RSQUARE {
91
- # non-standard, from hpricot
92
- result = Node.new(:PSEUDO_CLASS,
93
- [Node.new(:FUNCTION, ['nth-child(', val[1]])]
94
- )
95
- }
96
- ;
97
- attrib_name
98
- : namespace '|' IDENT {
99
- result = Node.new(:ELEMENT_NAME,
100
- [[val.first, val.last].compact.join(':')]
101
- )
102
- }
103
- | IDENT {
104
- # Default namespace is not applied to attributes.
105
- # So we don't add prefix "xmlns:" as in namespaced_ident.
106
- result = Node.new(:ELEMENT_NAME, [val.first])
107
- }
108
- ;
109
- function
110
- : FUNCTION RPAREN {
111
- result = Node.new(:FUNCTION, [val.first.strip])
112
- }
113
- | FUNCTION expr RPAREN {
114
- result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
115
- }
116
- | FUNCTION nth RPAREN {
117
- result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
118
- }
119
- | NOT expr RPAREN {
120
- result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
121
- }
122
- | HAS selector RPAREN {
123
- result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
124
- }
125
- ;
126
- expr
127
- : NUMBER COMMA expr { result = [val.first, val.last] }
128
- | STRING COMMA expr { result = [val.first, val.last] }
129
- | IDENT COMMA expr { result = [val.first, val.last] }
130
- | NUMBER
131
- | STRING
132
- | IDENT # even, odd
133
- {
134
- case val[0]
135
- when 'even'
136
- result = Node.new(:NTH, ['2','n','+','0'])
137
- when 'odd'
138
- result = Node.new(:NTH, ['2','n','+','1'])
139
- when 'n'
140
- result = Node.new(:NTH, ['1','n','+','0'])
141
- else
142
- # non-standard to support custom functions:
143
- # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
144
- # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
145
- # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
146
- result = val
147
- end
148
- }
149
- ;
150
- nth
151
- : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
152
- {
153
- if val[1] == 'n'
154
- result = Node.new(:NTH, val)
155
- else
156
- raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
157
- end
158
- }
159
- | IDENT PLUS NUMBER { # n+3, -n+3
160
- if val[0] == 'n'
161
- val.unshift("1")
162
- result = Node.new(:NTH, val)
163
- elsif val[0] == '-n'
164
- val[0] = 'n'
165
- val.unshift("-1")
166
- result = Node.new(:NTH, val)
167
- else
168
- raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
169
- end
170
- }
171
- | NUMBER IDENT { # 5n, -5n, 10n-1
172
- n = val[1]
173
- if n[0, 2] == 'n-'
174
- val[1] = 'n'
175
- val << "-"
176
- # b is contained in n as n is the string "n-b"
177
- val << n[2, n.size]
178
- result = Node.new(:NTH, val)
179
- elsif n == 'n'
180
- val << "+"
181
- val << "0"
182
- result = Node.new(:NTH, val)
183
- else
184
- raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
185
- end
186
- }
187
- ;
188
- pseudo
189
- : ':' function {
190
- result = Node.new(:PSEUDO_CLASS, [val[1]])
191
- }
192
- | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
193
- ;
194
- hcap_0toN
195
- : hcap_1toN
196
- |
197
- ;
198
- hcap_1toN
199
- : attribute_id hcap_1toN {
200
- result = Node.new(:COMBINATOR, val)
201
- }
202
- | class hcap_1toN {
203
- result = Node.new(:COMBINATOR, val)
204
- }
205
- | attrib hcap_1toN {
206
- result = Node.new(:COMBINATOR, val)
207
- }
208
- | pseudo hcap_1toN {
209
- result = Node.new(:COMBINATOR, val)
210
- }
211
- | negation hcap_1toN {
212
- result = Node.new(:COMBINATOR, val)
213
- }
214
- | attribute_id
215
- | class
216
- | attrib
217
- | pseudo
218
- | negation
219
- ;
220
- attribute_id
221
- : HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
222
- ;
223
- attrib_val_0or1
224
- : eql_incl_dash IDENT { result = [val.first, unescape_css_identifier(val[1])] }
225
- | eql_incl_dash STRING { result = [val.first, unescape_css_string(val[1])] }
226
- | eql_incl_dash NUMBER { result = [val.first, val[1]] }
227
- |
228
- ;
229
- eql_incl_dash
230
- : EQUAL { result = :equal }
231
- | PREFIXMATCH { result = :prefix_match }
232
- | SUFFIXMATCH { result = :suffix_match }
233
- | SUBSTRINGMATCH { result = :substring_match }
234
- | NOT_EQUAL { result = :not_equal }
235
- | INCLUDES { result = :includes }
236
- | DASHMATCH { result = :dash_match }
237
- ;
238
- negation
239
- : NOT negation_arg RPAREN {
240
- result = Node.new(:NOT, [val[1]])
241
- }
242
- ;
243
- negation_arg
244
- : element_name
245
- | element_name hcap_1toN
246
- | hcap_1toN
247
- ;
248
- optional_S
249
- : S
250
- |
251
- ;
8
+ selector:
9
+ selector COMMA simple_selector_1toN {
10
+ result = [val[0], val[2]].flatten
11
+ }
12
+ | prefixless_combinator_selector { result = val.flatten }
13
+ | optional_S simple_selector_1toN { result = [val[1]].flatten }
14
+ ;
15
+
16
+ combinator:
17
+ PLUS { result = :DIRECT_ADJACENT_SELECTOR }
18
+ | GREATER { result = :CHILD_SELECTOR }
19
+ | TILDE { result = :FOLLOWING_SELECTOR }
20
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
21
+ | SLASH { result = :CHILD_SELECTOR }
22
+ ;
23
+
24
+ xpath_attribute_name:
25
+ '@' IDENT { result = val[1] }
26
+ ;
27
+
28
+ xpath_attribute:
29
+ xpath_attribute_name { result = Node.new(:ATTRIB_NAME, [val[0]]) }
30
+ ;
31
+
32
+ simple_selector:
33
+ element_name hcap_0toN {
34
+ result = if val[1].nil?
35
+ val[0]
36
+ else
37
+ Node.new(:CONDITIONAL_SELECTOR, [val[0], val[1]])
38
+ end
39
+ }
40
+ | function
41
+ | function pseudo { result = Node.new(:CONDITIONAL_SELECTOR, val) }
42
+ | function attrib { result = Node.new(:CONDITIONAL_SELECTOR, val) }
43
+ | hcap_1toN { result = Node.new(:CONDITIONAL_SELECTOR, [Node.new(:ELEMENT_NAME, ['*']), val[0]]) }
44
+ | xpath_attribute
45
+ ;
46
+
47
+ prefixless_combinator_selector:
48
+ combinator simple_selector_1toN { result = Node.new(val[0], [nil, val[1]]) }
49
+ ;
50
+
51
+ simple_selector_1toN:
52
+ simple_selector combinator simple_selector_1toN { result = Node.new(val[1], [val[0], val[2]]) }
53
+ | simple_selector S simple_selector_1toN { result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) }
54
+ | simple_selector
55
+ ;
56
+
57
+ class:
58
+ '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
59
+ ;
60
+
61
+ element_name:
62
+ namespaced_ident
63
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
64
+ ;
65
+
66
+ namespaced_ident:
67
+ namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')]) }
68
+ | IDENT {
69
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0]
70
+ result = Node.new(:ELEMENT_NAME, [name])
71
+ }
72
+ ;
73
+
74
+ namespace:
75
+ IDENT { result = val[0] }
76
+ |
77
+ ;
78
+
79
+ attrib:
80
+ LSQUARE attrib_name attrib_val_0or1 RSQUARE {
81
+ result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []))
82
+ }
83
+ | LSQUARE function attrib_val_0or1 RSQUARE {
84
+ result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []))
85
+ }
86
+ | LSQUARE NUMBER RSQUARE {
87
+ result = Node.new(:PSEUDO_CLASS, [Node.new(:FUNCTION, ['nth-child(', val[1]])])
88
+ }
89
+ ;
90
+
91
+ attrib_name:
92
+ namespace '|' IDENT { result = Node.new(:ATTRIB_NAME, [[val[0], val[2]].compact.join(':')]) }
93
+ | IDENT { result = Node.new(:ATTRIB_NAME, [val[0]]) }
94
+ | xpath_attribute
95
+ ;
96
+
97
+ function:
98
+ FUNCTION RPAREN {
99
+ result = Node.new(:FUNCTION, [val[0].strip])
100
+ }
101
+ | FUNCTION expr RPAREN {
102
+ result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
103
+ }
104
+ | FUNCTION nth RPAREN {
105
+ result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
106
+ }
107
+ | NOT expr RPAREN {
108
+ result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
109
+ }
110
+ | HAS selector RPAREN {
111
+ result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
112
+ }
113
+ ;
114
+
115
+ expr:
116
+ NUMBER COMMA expr { result = [val[0], val[2]] }
117
+ | STRING COMMA expr { result = [val[0], val[2]] }
118
+ | IDENT COMMA expr { result = [val[0], val[2]] }
119
+ | xpath_attribute COMMA expr { result = [val[0], val[2]] }
120
+ | NUMBER
121
+ | STRING
122
+ | IDENT {
123
+ case val[0]
124
+ when 'even'
125
+ result = Node.new(:NTH, ['2','n','+','0'])
126
+ when 'odd'
127
+ result = Node.new(:NTH, ['2','n','+','1'])
128
+ when 'n'
129
+ result = Node.new(:NTH, ['1','n','+','0'])
130
+ else
131
+ result = val
132
+ end
133
+ }
134
+ | xpath_attribute
135
+ ;
136
+
137
+ nth:
138
+ NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
139
+ {
140
+ if val[1] == 'n'
141
+ result = Node.new(:NTH, val)
142
+ else
143
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
144
+ end
145
+ }
146
+ | IDENT PLUS NUMBER { # n+3, -n+3
147
+ if val[0] == 'n'
148
+ val.unshift("1")
149
+ result = Node.new(:NTH, val)
150
+ elsif val[0] == '-n'
151
+ val[0] = 'n'
152
+ val.unshift("-1")
153
+ result = Node.new(:NTH, val)
154
+ else
155
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
156
+ end
157
+ }
158
+ | NUMBER IDENT { # 5n, -5n, 10n-1
159
+ n = val[1]
160
+ if n[0, 2] == 'n-'
161
+ val[1] = 'n'
162
+ val << "-"
163
+ # b is contained in n as n is the string "n-b"
164
+ val << n[2, n.size]
165
+ result = Node.new(:NTH, val)
166
+ elsif n == 'n'
167
+ val << "+"
168
+ val << "0"
169
+ result = Node.new(:NTH, val)
170
+ else
171
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
172
+ end
173
+ }
174
+ ;
175
+
176
+ pseudo:
177
+ ':' function {
178
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
179
+ }
180
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
181
+ ;
182
+
183
+ hcap_0toN:
184
+ hcap_1toN
185
+ |
186
+ ;
187
+
188
+ hcap_1toN:
189
+ attribute_id hcap_1toN {
190
+ result = Node.new(:COMBINATOR, val)
191
+ }
192
+ | class hcap_1toN {
193
+ result = Node.new(:COMBINATOR, val)
194
+ }
195
+ | attrib hcap_1toN {
196
+ result = Node.new(:COMBINATOR, val)
197
+ }
198
+ | pseudo hcap_1toN {
199
+ result = Node.new(:COMBINATOR, val)
200
+ }
201
+ | negation hcap_1toN {
202
+ result = Node.new(:COMBINATOR, val)
203
+ }
204
+ | attribute_id
205
+ | class
206
+ | attrib
207
+ | pseudo
208
+ | negation
209
+ ;
210
+
211
+ attribute_id:
212
+ HASH { result = Node.new(:ID, [unescape_css_identifier(val[0])]) }
213
+ ;
214
+
215
+ attrib_val_0or1:
216
+ eql_incl_dash IDENT { result = [val[0], unescape_css_identifier(val[1])] }
217
+ | eql_incl_dash STRING { result = [val[0], unescape_css_string(val[1])] }
218
+ | eql_incl_dash NUMBER { result = [val[0], val[1]] }
219
+ |
220
+ ;
221
+
222
+ eql_incl_dash:
223
+ EQUAL { result = :equal }
224
+ | PREFIXMATCH { result = :prefix_match }
225
+ | SUFFIXMATCH { result = :suffix_match }
226
+ | SUBSTRINGMATCH { result = :substring_match }
227
+ | NOT_EQUAL { result = :not_equal }
228
+ | INCLUDES { result = :includes }
229
+ | DASHMATCH { result = :dash_match }
230
+ ;
231
+
232
+ negation:
233
+ NOT negation_arg RPAREN {
234
+ result = Node.new(:NOT, [val[1]])
235
+ }
236
+ ;
237
+
238
+ negation_arg:
239
+ element_name
240
+ | element_name hcap_1toN
241
+ | hcap_1toN
242
+ ;
243
+
244
+ optional_S:
245
+ S
246
+ |
247
+ ;
248
+
252
249
  end
253
250
 
254
251
  ---- header
255
252
 
256
- require 'nokogiri/css/parser_extras'
253
+ require_relative "parser_extras"
254
+
255
+ module Nokogiri
256
+ module CSS
257
+ # :nodoc: all
258
+ class Parser < Racc::Parser
259
+ end
260
+ end
261
+ end
257
262
 
258
263
  ---- inner
259
264
 
@@ -1,9 +1,10 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "thread"
3
4
 
4
5
  module Nokogiri
5
6
  module CSS
6
- class Parser < Racc::Parser
7
+ class Parser < Racc::Parser # :nodoc:
7
8
  CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
8
9
 
9
10
  @cache = {}
@@ -16,19 +17,21 @@ module Nokogiri
16
17
  end
17
18
 
18
19
  # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
19
- def set_cache(value)
20
+ def set_cache(value) # rubocop:disable Naming/AccessorMethodName
20
21
  Thread.current[CACHE_SWITCH_NAME] = !value
21
22
  end
22
23
 
23
24
  # Get the css selector in +string+ from the cache
24
25
  def [](string)
25
- return unless cache_on?
26
+ return nil unless cache_on?
27
+
26
28
  @mutex.synchronize { @cache[string] }
27
29
  end
28
30
 
29
31
  # Set the css selector in +string+ in the cache to +value+
30
32
  def []=(string, value)
31
33
  return value unless cache_on?
34
+
32
35
  @mutex.synchronize { @cache[string] = value }
33
36
  end
34
37
 
@@ -46,10 +49,10 @@ module Nokogiri
46
49
  # Execute +block+ without cache
47
50
  def without_cache(&block)
48
51
  original_cache_setting = cache_on?
49
- set_cache false
50
- block.call
52
+ set_cache(false)
53
+ yield
51
54
  ensure
52
- set_cache original_cache_setting
55
+ set_cache(original_cache_setting)
53
56
  end
54
57
  end
55
58
 
@@ -61,7 +64,7 @@ module Nokogiri
61
64
  end
62
65
 
63
66
  def parse(string)
64
- @tokenizer.scan_setup string
67
+ @tokenizer.scan_setup(string)
65
68
  do_parse
66
69
  end
67
70
 
@@ -70,24 +73,23 @@ module Nokogiri
70
73
  end
71
74
 
72
75
  # Get the xpath for +string+ using +options+
73
- def xpath_for(string, options = {})
74
- key = "#{string}#{options[:ns]}#{options[:prefix]}"
75
- v = self.class[key]
76
- return v if v
77
-
78
- args = [
79
- options[:prefix] || "//",
80
- options[:visitor] || XPathVisitor.new,
81
- ]
82
- self.class[key] = parse(string).map { |ast|
83
- ast.to_xpath(*args)
84
- }
76
+ def xpath_for(string, prefix, visitor)
77
+ key = cache_key(string, prefix, visitor)
78
+ self.class[key] ||= parse(string).map do |ast|
79
+ ast.to_xpath(prefix, visitor)
80
+ end
85
81
  end
86
82
 
87
83
  # On CSS parser error, raise an exception
88
84
  def on_error(error_token_id, error_value, value_stack)
89
85
  after = value_stack.compact.last
90
- raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
86
+ raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
87
+ end
88
+
89
+ def cache_key(query, prefix, visitor)
90
+ if self.class.cache_on?
91
+ [query, prefix, @namespaces, visitor.config]
92
+ end
91
93
  end
92
94
  end
93
95
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/syntax_error'
2
+
3
+ require_relative "../syntax_error"
3
4
  module Nokogiri
4
5
  module CSS
5
6
  class SyntaxError < ::Nokogiri::SyntaxError
@@ -7,7 +7,8 @@
7
7
 
8
8
  module Nokogiri
9
9
  module CSS
10
- class Tokenizer # :nodoc:
10
+ # :nodoc: all
11
+ class Tokenizer
11
12
  require 'strscan'
12
13
 
13
14
  class ScanError < StandardError ; end
@@ -62,10 +63,10 @@ class Tokenizer # :nodoc:
62
63
  when (text = @ss.scan(/has\([\s]*/))
63
64
  action { [:HAS, text] }
64
65
 
65
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
67
  action { [:FUNCTION, text] }
67
68
 
68
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
70
  action { [:IDENT, text] }
70
71
 
71
72
  when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
@@ -1,6 +1,7 @@
1
1
  module Nokogiri
2
2
  module CSS
3
- class Tokenizer # :nodoc:
3
+ # :nodoc: all
4
+ class Tokenizer
4
5
 
5
6
  macro
6
7
  nl \n|\r\n|\r|\f
@@ -12,7 +13,7 @@ macro
12
13
  escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
13
14
  nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
15
  nmstart [_A-Za-z]|{nonascii}|{escape}
15
- ident [-@]?({nmstart})({nmchar})*
16
+ ident -?({nmstart})({nmchar})*
16
17
  name ({nmchar})+
17
18
  string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
18
19
  string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'