p_css 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ module CSS
2
+ module Selectors
3
+ # Parser for the An+B microsyntax used by `:nth-child(...)` and friends.
4
+ # https://drafts.csswg.org/css-syntax/#anb-microsyntax
5
+ module AnBParser
6
+ TRAILING_DASH_INT = /\A-(\d+)\z/.freeze
7
+ N_TRAILING_INT = /\An(-\d+)?\z/i.freeze
8
+ DASH_N_TRAILING = /\A-n(-\d+)?\z/i.freeze
9
+
10
+ module_function
11
+
12
+ def parse(input)
13
+ tokens = input.is_a?(String) ? Tokenizer.new(input).tokenize : input.to_a
14
+ Impl.new(tokens).parse
15
+ end
16
+
17
+ class Impl
18
+ include CSS::TokenCursor
19
+
20
+ def initialize(tokens)
21
+ init_cursor(tokens)
22
+ end
23
+
24
+ def parse
25
+ skip_whitespace
26
+
27
+ result = parse_value
28
+
29
+ skip_whitespace
30
+
31
+ unless peek.type == :eof
32
+ raise ParseError, "trailing tokens in AnB: #{peek.type}"
33
+ end
34
+
35
+ result
36
+ end
37
+
38
+ private
39
+
40
+ def parse_value
41
+ t = peek
42
+
43
+ case t.type
44
+ when :ident then parse_ident_form(t)
45
+ when :number then parse_pure_number(t)
46
+ when :dimension then parse_dimension_form(t)
47
+ when :delim then parse_signed_form(t)
48
+ else
49
+ raise ParseError, "expected An+B, got #{t.type}"
50
+ end
51
+ end
52
+
53
+ def parse_ident_form(t)
54
+ consume
55
+
56
+ case t.value.downcase
57
+ when 'even'
58
+ AnB.new(step: 2, offset: 0)
59
+ when 'odd'
60
+ AnB.new(step: 2, offset: 1)
61
+ when 'n'
62
+ parse_offset(step: 1)
63
+ when '-n'
64
+ parse_offset(step: -1)
65
+ when N_TRAILING_INT
66
+ AnB.new(step: 1, offset: -extract_dash_int(t.value, prefix: 'n'))
67
+ when DASH_N_TRAILING
68
+ AnB.new(step: -1, offset: -extract_dash_int(t.value, prefix: '-n'))
69
+ else
70
+ raise ParseError, "invalid AnB identifier: #{t.value}"
71
+ end
72
+ end
73
+
74
+ def parse_pure_number(t)
75
+ consume
76
+
77
+ raise ParseError, 'AnB integer must be an integer' unless t.flag == :integer
78
+
79
+ AnB.new(step: 0, offset: t.value)
80
+ end
81
+
82
+ def parse_dimension_form(t)
83
+ consume
84
+
85
+ raise ParseError, 'AnB step coefficient must be an integer' unless t.flag == :integer
86
+
87
+ unit = t.unit.downcase
88
+
89
+ if unit == 'n'
90
+ parse_offset(step: t.value)
91
+ elsif unit.start_with?('n') && (m = TRAILING_DASH_INT.match(unit[1..]))
92
+ AnB.new(step: t.value, offset: -m[1].to_i)
93
+ else
94
+ raise ParseError, "invalid AnB dimension unit: #{unit}"
95
+ end
96
+ end
97
+
98
+ # `+n`, `+n+1`, `+n-1`, `+n-3` (where `+n-3` lexes as delim '+' then
99
+ # ident "n-3"): consume the leading `+` and re-enter the ident path.
100
+ def parse_signed_form(t)
101
+ raise ParseError, "unexpected delim #{t.value}" unless t.value == '+'
102
+
103
+ consume
104
+
105
+ ident = peek
106
+
107
+ unless ident.type == :ident
108
+ raise ParseError, "expected ident after '+', got #{ident.type}"
109
+ end
110
+
111
+ consume
112
+
113
+ case ident.value.downcase
114
+ when 'n'
115
+ parse_offset(step: 1)
116
+ when N_TRAILING_INT
117
+ AnB.new(step: 1, offset: -extract_dash_int(ident.value, prefix: 'n'))
118
+ else
119
+ raise ParseError, "invalid AnB after '+': #{ident.value}"
120
+ end
121
+ end
122
+
123
+ def parse_offset(step:)
124
+ skip_whitespace
125
+
126
+ t = peek
127
+
128
+ case t.type
129
+ when :eof
130
+ AnB.new(step:, offset: 0)
131
+ when :number
132
+ consume
133
+
134
+ raise ParseError, 'AnB offset must be an integer' unless t.flag == :integer
135
+
136
+ AnB.new(step:, offset: t.value)
137
+ when :delim
138
+ unless t.value == '+' || t.value == '-'
139
+ raise ParseError, "expected +/- in AnB offset, got delim #{t.value}"
140
+ end
141
+
142
+ sign = t.value
143
+ consume
144
+
145
+ skip_whitespace
146
+
147
+ n = peek
148
+
149
+ unless n.type == :number && n.flag == :integer
150
+ raise ParseError, "expected integer after #{sign}"
151
+ end
152
+
153
+ consume
154
+
155
+ AnB.new(step:, offset: sign == '-' ? -n.value.abs : n.value.abs)
156
+ else
157
+ AnB.new(step:, offset: 0)
158
+ end
159
+ end
160
+
161
+ def extract_dash_int(s, prefix:)
162
+ rest = s.sub(/\A#{prefix}/i, '')
163
+ return 0 if rest.empty?
164
+
165
+ m = TRAILING_DASH_INT.match(rest)
166
+
167
+ raise ParseError, "invalid AnB suffix: #{s}" unless m
168
+
169
+ m[1].to_i
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,449 @@
1
+ module CSS
2
+ module Selectors
3
+ # Matches a Selector AST against any duck-typed element. Required
4
+ # methods on the element:
5
+ #
6
+ # - `name` (or `tag_name`) — tag name
7
+ # - `[](attr)` — attribute value or nil
8
+ # - `parent` — parent element or non-element
9
+ # - `previous_element` (or `previous_element_sibling`) — preceding
10
+ # element sibling
11
+ # - `next_element` (or `next_element_sibling`) — following
12
+ # element sibling
13
+ # - `children` (and optionally `element_children`) — child nodes
14
+ #
15
+ # `Nokogiri::XML::Element` and `Nokogiri::HTML::Element` satisfy this
16
+ # protocol out of the box.
17
+ #
18
+ # Pseudo-classes that depend on user-agent state (`:hover`, `:focus`,
19
+ # `:visited`, validity-API states, `:fullscreen`, etc.) always return
20
+ # false; this matcher is intended for stateless analysis.
21
+ module Matcher
22
+ extend self
23
+
24
+ DISABLEABLE_TAGS = %w[button input select textarea optgroup option fieldset].freeze
25
+ INPUT_TAGS = %w[input textarea select].freeze
26
+ LINK_TAGS = %w[a area link].freeze
27
+ RO_INPUT_TYPES = %w[hidden range color checkbox radio file submit image reset button].freeze
28
+
29
+ def matches?(element, selector)
30
+ sel = selector.is_a?(String) ? Parser.parse_selector_list(selector) : selector
31
+
32
+ case sel
33
+ when SelectorList
34
+ sel.selectors.any? { match_complex(element, it) }
35
+ when ComplexSelector
36
+ match_complex(element, sel)
37
+ when CompoundSelector
38
+ match_compound(element, sel)
39
+ else
40
+ raise ArgumentError, "expected a selector node or string, got #{sel.class}"
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ # Walks the complex selector right-to-left starting at the rightmost
47
+ # compound. Each combinator either succeeds against ancestors /
48
+ # siblings of the current candidate or fails the whole match.
49
+ def match_complex(element, complex)
50
+ match_at(element, complex, complex.compounds.size - 1)
51
+ end
52
+
53
+ def match_at(element, complex, index)
54
+ return false if element.nil?
55
+ return false unless match_compound(element, complex.compounds[index])
56
+ return true if index.zero?
57
+
58
+ prev = index - 1
59
+
60
+ case complex.combinators[prev]
61
+ when :descendant then walk_until_match(element, complex, prev, :parent_element)
62
+ when :child then match_at(parent_element(element), complex, prev)
63
+ when :next_sibling then match_at(previous_element(element), complex, prev)
64
+ when :subsequent_sibling then walk_until_match(element, complex, prev, :previous_element)
65
+ end
66
+ end
67
+
68
+ # Steps along the DOM via `direction` until a candidate matches the
69
+ # remaining complex selector or the chain runs out.
70
+ def walk_until_match(element, complex, index, direction)
71
+ candidate = send(direction, element)
72
+
73
+ while candidate
74
+ return true if match_at(candidate, complex, index)
75
+
76
+ candidate = send(direction, candidate)
77
+ end
78
+
79
+ false
80
+ end
81
+
82
+ def match_compound(element, compound)
83
+ compound.components.all? { match_simple(element, it) }
84
+ end
85
+
86
+ def match_simple(element, simple)
87
+ case simple
88
+ when TypeSelector then tag(element).casecmp?(simple.name)
89
+ when UniversalSelector then true
90
+ when IdSelector then attr(element, 'id') == simple.name
91
+ when ClassSelector then class_list(element).include?(simple.name)
92
+ when AttributeSelector then match_attribute(element, simple)
93
+ when PseudoClass then match_pseudo_class(element, simple)
94
+ when PseudoElement then false
95
+ when NestingSelector then false
96
+ else false
97
+ end
98
+ end
99
+
100
+ # Attribute matching ----------------------------------------------
101
+
102
+ def match_attribute(element, attr_sel)
103
+ actual = attr(element, attr_sel.name)
104
+
105
+ return false if actual.nil?
106
+ return true if attr_sel.matcher.nil?
107
+
108
+ haystack = actual.to_s
109
+ needle = attr_sel.value.to_s
110
+
111
+ if attr_sel.case_flag == :i
112
+ haystack = haystack.downcase
113
+ needle = needle.downcase
114
+ end
115
+
116
+ case attr_sel.matcher
117
+ when :exact then haystack == needle
118
+ when :includes then !needle.empty? && haystack.split(/\s+/).include?(needle)
119
+ when :dash then haystack == needle || haystack.start_with?("#{needle}-")
120
+ when :prefix then !needle.empty? && haystack.start_with?(needle)
121
+ when :suffix then !needle.empty? && haystack.end_with?(needle)
122
+ when :substring then !needle.empty? && haystack.include?(needle)
123
+ end
124
+ end
125
+
126
+ # Pseudo-class matching -------------------------------------------
127
+
128
+ def match_pseudo_class(element, pc)
129
+ case pc.name.downcase
130
+ when 'is', 'where', 'matches' then match_selector_list_arg(element, pc.argument)
131
+ when 'not' then negate_selector_list_arg(element, pc.argument)
132
+ when 'has' then false
133
+ when 'root' then parent_element(element).nil?
134
+ when 'scope' then parent_element(element).nil?
135
+ when 'first-child' then previous_element(element).nil?
136
+ when 'last-child' then next_element(element).nil?
137
+ when 'only-child' then previous_element(element).nil? && next_element(element).nil?
138
+ when 'first-of-type' then same_type_previous(element).nil?
139
+ when 'last-of-type' then same_type_next(element).nil?
140
+ when 'only-of-type' then same_type_previous(element).nil? && same_type_next(element).nil?
141
+ when 'nth-child' then match_nth(element, pc.argument, of_type: false, from_end: false)
142
+ when 'nth-last-child' then match_nth(element, pc.argument, of_type: false, from_end: true)
143
+ when 'nth-of-type' then match_nth(element, pc.argument, of_type: true, from_end: false)
144
+ when 'nth-last-of-type' then match_nth(element, pc.argument, of_type: true, from_end: true)
145
+ when 'empty' then empty?(element)
146
+ when 'link', 'any-link' then link?(element)
147
+ when 'enabled' then disableable?(element) && !disabled?(element)
148
+ when 'disabled' then disabled?(element)
149
+ when 'checked' then checked?(element)
150
+ when 'required' then required?(element)
151
+ when 'optional' then optional?(element)
152
+ when 'read-only' then read_only?(element)
153
+ when 'read-write' then read_write?(element)
154
+ when 'placeholder-shown' then placeholder_shown?(element)
155
+ when 'lang' then match_lang(element, pc.argument)
156
+ when 'dir' then match_dir(element, pc.argument)
157
+ when 'defined' then true
158
+ else false
159
+ end
160
+ end
161
+
162
+ def match_selector_list_arg(element, arg)
163
+ arg.is_a?(SelectorList) && matches?(element, arg)
164
+ end
165
+
166
+ def negate_selector_list_arg(element, arg)
167
+ arg.is_a?(SelectorList) && !matches?(element, arg)
168
+ end
169
+
170
+ def match_nth(element, anb, of_type:, from_end:)
171
+ return false unless anb.is_a?(AnB)
172
+
173
+ index = nth_index(element, of_type:, from_end:)
174
+
175
+ return false if index.nil?
176
+
177
+ step = anb.step
178
+ offset = anb.offset
179
+
180
+ if step.zero?
181
+ index == offset
182
+ else
183
+ diff = index - offset
184
+ (diff % step).zero? && (diff / step) >= 0
185
+ end
186
+ end
187
+
188
+ def nth_index(element, of_type:, from_end:)
189
+ p = parent_element(element)
190
+
191
+ return nil if p.nil?
192
+
193
+ siblings = element_children(p)
194
+ siblings = siblings.select { tag(it).casecmp?(tag(element)) } if of_type
195
+ siblings = siblings.reverse if from_end
196
+
197
+ idx = siblings.index { same_node?(it, element) }
198
+ idx && idx + 1
199
+ end
200
+
201
+ # Form / link state -----------------------------------------------
202
+
203
+ def link?(element)
204
+ LINK_TAGS.include?(tag(element)) && !attr(element, 'href').nil?
205
+ end
206
+
207
+ def disableable?(element)
208
+ DISABLEABLE_TAGS.include?(tag(element))
209
+ end
210
+
211
+ def disabled?(element)
212
+ return false unless disableable?(element)
213
+ return true if attr(element, 'disabled')
214
+
215
+ ancestor = parent_element(element)
216
+
217
+ while ancestor
218
+ if tag(ancestor) == 'fieldset' && attr(ancestor, 'disabled')
219
+ return true unless inside_first_legend?(element, ancestor)
220
+ end
221
+
222
+ ancestor = parent_element(ancestor)
223
+ end
224
+
225
+ false
226
+ end
227
+
228
+ def inside_first_legend?(element, fieldset)
229
+ first_legend = element_children(fieldset).find { tag(it) == 'legend' }
230
+
231
+ return false if first_legend.nil?
232
+
233
+ ancestor = element
234
+
235
+ while ancestor
236
+ return true if same_node?(ancestor, first_legend)
237
+ break if same_node?(ancestor, fieldset)
238
+
239
+ ancestor = parent_element(ancestor)
240
+ end
241
+
242
+ false
243
+ end
244
+
245
+ def checked?(element)
246
+ case tag(element)
247
+ when 'input'
248
+ %w[checkbox radio].include?(attr(element, 'type').to_s.downcase) && !attr(element, 'checked').nil?
249
+ when 'option'
250
+ !attr(element, 'selected').nil?
251
+ else
252
+ false
253
+ end
254
+ end
255
+
256
+ def required?(element)
257
+ INPUT_TAGS.include?(tag(element)) && !attr(element, 'required').nil?
258
+ end
259
+
260
+ def optional?(element)
261
+ INPUT_TAGS.include?(tag(element)) && attr(element, 'required').nil?
262
+ end
263
+
264
+ def read_only?(element)
265
+ case tag(element)
266
+ when 'input'
267
+ type = attr(element, 'type').to_s.downcase
268
+ return true if RO_INPUT_TYPES.include?(type)
269
+
270
+ !attr(element, 'readonly').nil? || disabled?(element)
271
+ when 'textarea'
272
+ !attr(element, 'readonly').nil? || disabled?(element)
273
+ else
274
+ ce = attr(element, 'contenteditable').to_s.downcase
275
+ ce.empty? || (ce != 'true' && ce != 'plaintext-only')
276
+ end
277
+ end
278
+
279
+ def read_write?(element)
280
+ return !read_only?(element) if %w[input textarea].include?(tag(element))
281
+
282
+ ce = attr(element, 'contenteditable').to_s.downcase
283
+ ce == 'true' || ce == 'plaintext-only'
284
+ end
285
+
286
+ def placeholder_shown?(element)
287
+ return false unless %w[input textarea].include?(tag(element))
288
+ return false if attr(element, 'placeholder').nil?
289
+
290
+ v = attr(element, 'value')
291
+ v.nil? || v.empty?
292
+ end
293
+
294
+ def match_lang(element, argument)
295
+ target = ident_argument(argument)
296
+
297
+ return false if target.nil?
298
+
299
+ target = target.downcase
300
+ ancestor = element
301
+
302
+ while ancestor
303
+ actual = attr(ancestor, 'lang') || attr(ancestor, 'xml:lang')
304
+
305
+ if actual
306
+ actual = actual.to_s.downcase
307
+ return actual == target || actual.start_with?("#{target}-")
308
+ end
309
+
310
+ ancestor = parent_element(ancestor)
311
+ end
312
+
313
+ false
314
+ end
315
+
316
+ def match_dir(element, argument)
317
+ target = ident_argument(argument)
318
+
319
+ return false if target.nil?
320
+
321
+ target = target.downcase
322
+ ancestor = element
323
+
324
+ while ancestor
325
+ actual = attr(ancestor, 'dir')
326
+
327
+ if actual
328
+ return actual.to_s.downcase == target
329
+ end
330
+
331
+ ancestor = parent_element(ancestor)
332
+ end
333
+
334
+ target == 'ltr'
335
+ end
336
+
337
+ def ident_argument(argument)
338
+ return nil unless argument.is_a?(Array)
339
+
340
+ token = argument.find { it.is_a?(Token) && (it.type == :ident || it.type == :string) }
341
+ token&.value
342
+ end
343
+
344
+ # CSS3 :empty semantics — element children always disqualify;
345
+ # whitespace-only text content does not. Comments / PIs / doctypes
346
+ # are ignored.
347
+ def empty?(element)
348
+ return false unless element.respond_to?(:children)
349
+
350
+ element.children.each do |child|
351
+ if child.respond_to?(:element?) && child.element?
352
+ return false
353
+ end
354
+
355
+ if child.respond_to?(:text?) && child.text?
356
+ content = child.respond_to?(:content) ? child.content : child.text
357
+ return false if content.to_s.match?(/\S/)
358
+ end
359
+ end
360
+
361
+ true
362
+ end
363
+
364
+ # Element protocol helpers ---------------------------------------
365
+
366
+ def tag(element)
367
+ name = element.respond_to?(:tag_name) ? element.tag_name : element.name
368
+ name.to_s.downcase
369
+ end
370
+
371
+ def attr(element, name)
372
+ v = element[name]
373
+ return v unless v.nil?
374
+
375
+ lower = name.downcase
376
+ return nil if name == lower
377
+
378
+ element[lower]
379
+ end
380
+
381
+ def class_list(element)
382
+ attr(element, 'class').to_s.split(/\s+/)
383
+ end
384
+
385
+ def parent_element(element)
386
+ p = element.respond_to?(:parent) ? element.parent : nil
387
+
388
+ return nil if p.nil?
389
+ return nil if p.respond_to?(:element?) && !p.element?
390
+
391
+ p
392
+ end
393
+
394
+ SIBLING_METHODS = {
395
+ previous: %i[previous_element previous_element_sibling previous_sibling],
396
+ next: %i[next_element next_element_sibling next_sibling]
397
+ }.freeze
398
+
399
+ def previous_element(element) = adjacent_element(element, :previous)
400
+ def next_element(element) = adjacent_element(element, :next)
401
+
402
+ def adjacent_element(element, direction)
403
+ primary, alt, fallback = SIBLING_METHODS.fetch(direction)
404
+
405
+ return element.send(primary) if element.respond_to?(primary)
406
+ return element.send(alt) if element.respond_to?(alt)
407
+
408
+ walk_sibling(element, fallback)
409
+ end
410
+
411
+ def walk_sibling(element, direction)
412
+ sib = element.respond_to?(direction) ? element.send(direction) : nil
413
+
414
+ until sib.nil?
415
+ return sib if !sib.respond_to?(:element?) || sib.element?
416
+
417
+ sib = sib.respond_to?(direction) ? sib.send(direction) : nil
418
+ end
419
+
420
+ nil
421
+ end
422
+
423
+ def element_children(element)
424
+ return element.element_children.to_a if element.respond_to?(:element_children)
425
+ return [] unless element.respond_to?(:children)
426
+
427
+ element.children.select {|c|
428
+ c.respond_to?(:element?) ? c.element? : false
429
+ }
430
+ end
431
+
432
+ def same_type_previous(element)
433
+ sib = previous_element(element)
434
+ sib = previous_element(sib) until sib.nil? || tag(sib).casecmp?(tag(element))
435
+ sib
436
+ end
437
+
438
+ def same_type_next(element)
439
+ sib = next_element(element)
440
+ sib = next_element(sib) until sib.nil? || tag(sib).casecmp?(tag(element))
441
+ sib
442
+ end
443
+
444
+ def same_node?(a, b)
445
+ a.equal?(b) || a == b
446
+ end
447
+ end
448
+ end
449
+ end
@@ -0,0 +1,61 @@
1
+ module CSS
2
+ module Selectors
3
+ # Marker module included by every selector AST data class. Used by the
4
+ # main `CSS.serialize` to dispatch into `Selectors::Serializer`.
5
+ module Node; end
6
+
7
+ # A comma-separated list of complex selectors.
8
+ SelectorList = Data.define(:selectors) do
9
+ include Node
10
+ def to_s = Selectors::Serializer.serialize(self)
11
+ end
12
+
13
+ # Compounds connected by combinators. `compounds.size == combinators.size + 1`.
14
+ # `combinators[i]` connects `compounds[i]` to `compounds[i + 1]`.
15
+ ComplexSelector = Data.define(:compounds, :combinators) do
16
+ include Node
17
+ def to_s = Selectors::Serializer.serialize(self)
18
+ end
19
+
20
+ # A run of simple selectors with no combinators between them, e.g.
21
+ # `a.foo:hover` or `[href]:not(:visited)`.
22
+ CompoundSelector = Data.define(:components) do
23
+ include Node
24
+ def to_s = Selectors::Serializer.serialize(self)
25
+ end
26
+
27
+ TypeSelector = Data.define(:name) { include Node }
28
+ UniversalSelector = Data.define { include Node }
29
+ NestingSelector = Data.define { include Node }
30
+ IdSelector = Data.define(:name) { include Node }
31
+ ClassSelector = Data.define(:name) { include Node }
32
+
33
+ # Attribute matchers:
34
+ # nil — `[name]` (presence)
35
+ # :exact — `[a=b]`
36
+ # :includes — `[a~=b]`
37
+ # :dash — `[a|=b]`
38
+ # :prefix — `[a^=b]`
39
+ # :suffix — `[a$=b]`
40
+ # :substring — `[a*=b]`
41
+ #
42
+ # `case_flag` is `nil`, `:i`, or `:s`.
43
+ AttributeSelector = Data.define(:name, :matcher, :value, :case_flag) do
44
+ include Node
45
+ end
46
+
47
+ # `argument` is `nil`, a `SelectorList` (`:not/:is/:where/:has`), an
48
+ # `AnB` (`:nth-*`), or a raw `Array<Token>` for unrecognized functional
49
+ # pseudos.
50
+ PseudoClass = Data.define(:name, :argument) { include Node }
51
+ PseudoElement = Data.define(:name, :argument) { include Node }
52
+
53
+ # `An+B` integer pair. `step` is the `n` coefficient, `offset` is the
54
+ # constant term. `even` => AnB(2, 0), `odd` => AnB(2, 1), `5` => AnB(0, 5),
55
+ # `n` => AnB(1, 0).
56
+ AnB = Data.define(:step, :offset) do
57
+ include Node
58
+ def to_s = Selectors::Serializer.serialize(self)
59
+ end
60
+ end
61
+ end