cataract 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci-manual-rubies.yml +27 -0
- data/.overcommit.yml +1 -1
- data/.rubocop.yml +62 -0
- data/.rubocop_todo.yml +186 -0
- data/BENCHMARKS.md +60 -139
- data/CHANGELOG.md +10 -0
- data/README.md +30 -2
- data/Rakefile +49 -22
- data/cataract.gemspec +4 -1
- data/ext/cataract/cataract.c +47 -47
- data/ext/cataract/css_parser.c +17 -33
- data/ext/cataract/merge.c +6 -0
- data/lib/cataract/at_rule.rb +8 -9
- data/lib/cataract/declaration.rb +18 -0
- data/lib/cataract/import_resolver.rb +3 -4
- data/lib/cataract/pure/byte_constants.rb +69 -0
- data/lib/cataract/pure/helpers.rb +35 -0
- data/lib/cataract/pure/imports.rb +255 -0
- data/lib/cataract/pure/merge.rb +1146 -0
- data/lib/cataract/pure/parser.rb +1236 -0
- data/lib/cataract/pure/serializer.rb +590 -0
- data/lib/cataract/pure/specificity.rb +206 -0
- data/lib/cataract/pure.rb +130 -0
- data/lib/cataract/rule.rb +22 -13
- data/lib/cataract/stylesheet.rb +14 -9
- data/lib/cataract/version.rb +1 -1
- data/lib/cataract.rb +18 -5
- metadata +12 -25
- data/benchmarks/benchmark_harness.rb +0 -193
- data/benchmarks/benchmark_merging.rb +0 -121
- data/benchmarks/benchmark_optimization_comparison.rb +0 -168
- data/benchmarks/benchmark_parsing.rb +0 -153
- data/benchmarks/benchmark_ragel_removal.rb +0 -56
- data/benchmarks/benchmark_runner.rb +0 -70
- data/benchmarks/benchmark_serialization.rb +0 -180
- data/benchmarks/benchmark_shorthand.rb +0 -109
- data/benchmarks/benchmark_shorthand_expansion.rb +0 -176
- data/benchmarks/benchmark_specificity.rb +0 -124
- data/benchmarks/benchmark_string_allocation.rb +0 -151
- data/benchmarks/benchmark_stylesheet_to_s.rb +0 -62
- data/benchmarks/benchmark_to_s_cached.rb +0 -55
- data/benchmarks/benchmark_value_splitter.rb +0 -54
- data/benchmarks/benchmark_yjit.rb +0 -158
- data/benchmarks/benchmark_yjit_workers.rb +0 -61
- data/benchmarks/profile_to_s.rb +0 -23
- data/benchmarks/speedup_calculator.rb +0 -83
- data/benchmarks/system_metadata.rb +0 -81
- data/benchmarks/templates/benchmarks.md.erb +0 -221
- data/benchmarks/yjit_tests.rb +0 -141
- data/scripts/fuzzer/run.rb +0 -828
- data/scripts/fuzzer/worker.rb +0 -99
- data/scripts/generate_benchmarks_md.rb +0 -155
|
@@ -0,0 +1,1236 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Pure Ruby CSS parser - Parser class
|
|
4
|
+
#
|
|
5
|
+
# IMPORTANT: This code is intentionally written in a non-idiomatic style.
|
|
6
|
+
# - Performance comes first - mirrors the C implementation
|
|
7
|
+
# - Character-by-character parsing (NO REGEXP)
|
|
8
|
+
# - Minimal abstraction, lots of state mutation
|
|
9
|
+
# - Optimized for speed, not readability
|
|
10
|
+
#
|
|
11
|
+
# Do NOT refactor to "clean Ruby" without benchmarking - you will make it slower.
|
|
12
|
+
#
|
|
13
|
+
# Example: RuboCop suggests using `.positive?` instead of `> 0`, but benchmarking
|
|
14
|
+
# shows `> 0` is 1.26x faster (see benchmark_positive.rb). These micro-optimizations
|
|
15
|
+
# matter in a hot parsing loop.
|
|
16
|
+
|
|
17
|
+
module Cataract
|
|
18
|
+
# Pure Ruby CSS parser - char-by-char, NO REGEXP
|
|
19
|
+
class Parser
|
|
20
|
+
# Maximum parse depth (prevent infinite recursion)
|
|
21
|
+
MAX_PARSE_DEPTH = 10
|
|
22
|
+
|
|
23
|
+
# Maximum media queries (prevent symbol table exhaustion)
|
|
24
|
+
MAX_MEDIA_QUERIES = 1000
|
|
25
|
+
|
|
26
|
+
# Maximum property name/value lengths
|
|
27
|
+
MAX_PROPERTY_NAME_LENGTH = 256
|
|
28
|
+
MAX_PROPERTY_VALUE_LENGTH = 32_768
|
|
29
|
+
|
|
30
|
+
AT_RULE_TYPES = %w[supports layer container scope].freeze
|
|
31
|
+
|
|
32
|
+
attr_reader :css, :pos, :len
|
|
33
|
+
|
|
34
|
+
# Extract substring and force specified encoding
|
|
35
|
+
# Per CSS spec, charset detection happens at byte-stream level before parsing.
|
|
36
|
+
# All parsing operations treat content as UTF-8 (spec requires fallback to UTF-8).
|
|
37
|
+
# This prevents ArgumentError on broken/invalid encodings when calling string methods.
|
|
38
|
+
# Optional encoding parameter (default: 'UTF-8', use 'US-ASCII' for property names)
|
|
39
|
+
def byteslice_encoded(start, length, encoding: 'UTF-8')
|
|
40
|
+
@css.byteslice(start, length).force_encoding(encoding)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Helper: Case-insensitive ASCII byte comparison
|
|
44
|
+
# Compares bytes at given position with ASCII pattern (case-insensitive)
|
|
45
|
+
# Safe to use even if position is in middle of multi-byte UTF-8 characters
|
|
46
|
+
# Returns true if match, false otherwise
|
|
47
|
+
def match_ascii_ci?(str, pos, pattern)
|
|
48
|
+
pattern_len = pattern.bytesize
|
|
49
|
+
return false if pos + pattern_len > str.bytesize
|
|
50
|
+
|
|
51
|
+
i = 0
|
|
52
|
+
while i < pattern_len
|
|
53
|
+
str_byte = str.getbyte(pos + i)
|
|
54
|
+
pat_byte = pattern.getbyte(i)
|
|
55
|
+
|
|
56
|
+
# Convert both to lowercase for comparison (ASCII only: A-Z -> a-z)
|
|
57
|
+
str_byte += BYTE_CASE_DIFF if str_byte >= BYTE_UPPER_A && str_byte <= BYTE_UPPER_Z
|
|
58
|
+
pat_byte += BYTE_CASE_DIFF if pat_byte >= BYTE_UPPER_A && pat_byte <= BYTE_UPPER_Z
|
|
59
|
+
|
|
60
|
+
return false if str_byte != pat_byte
|
|
61
|
+
|
|
62
|
+
i += 1
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
true
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def initialize(css_string, parent_media_sym: nil, depth: 0)
|
|
69
|
+
@css = css_string.dup.freeze
|
|
70
|
+
@pos = 0
|
|
71
|
+
@len = @css.bytesize
|
|
72
|
+
@parent_media_sym = parent_media_sym
|
|
73
|
+
|
|
74
|
+
# Parser state
|
|
75
|
+
@rules = [] # Flat array of Rule structs
|
|
76
|
+
@_media_index = {} # Symbol => Array of rule IDs
|
|
77
|
+
@rule_id_counter = 0 # Next rule ID (0-indexed)
|
|
78
|
+
@media_query_count = 0 # Safety limit
|
|
79
|
+
@_has_nesting = false # Set to true if any nested rules found
|
|
80
|
+
@depth = depth # Current recursion depth (passed from parent parser)
|
|
81
|
+
@charset = nil # @charset declaration
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def parse
|
|
85
|
+
# Skip @import statements at the beginning (they're handled by ImportResolver)
|
|
86
|
+
# Per CSS spec, @import must come before all rules (except @charset)
|
|
87
|
+
skip_imports
|
|
88
|
+
|
|
89
|
+
# Main parsing loop - char-by-char, NO REGEXP
|
|
90
|
+
until eof?
|
|
91
|
+
skip_ws_and_comments
|
|
92
|
+
break if eof?
|
|
93
|
+
|
|
94
|
+
# Peek at next byte to determine what to parse
|
|
95
|
+
byte = peek_byte
|
|
96
|
+
|
|
97
|
+
# Check for at-rules (@media, @charset, etc)
|
|
98
|
+
if byte == BYTE_AT
|
|
99
|
+
parse_at_rule
|
|
100
|
+
next
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Must be a selector-based rule
|
|
104
|
+
selector = parse_selector
|
|
105
|
+
|
|
106
|
+
next if selector.nil? || selector.empty?
|
|
107
|
+
|
|
108
|
+
# Find the block boundaries
|
|
109
|
+
decl_start = @pos # Should be right after the {
|
|
110
|
+
decl_end = find_matching_brace(decl_start)
|
|
111
|
+
|
|
112
|
+
# Check if block has nested selectors
|
|
113
|
+
if has_nested_selectors?(decl_start, decl_end)
|
|
114
|
+
# NESTED PATH: Parse mixed declarations + nested rules
|
|
115
|
+
# Split comma-separated selectors and parse each one
|
|
116
|
+
selectors = selector.split(',')
|
|
117
|
+
|
|
118
|
+
selectors.each do |individual_selector|
|
|
119
|
+
individual_selector.strip!
|
|
120
|
+
next if individual_selector.empty?
|
|
121
|
+
|
|
122
|
+
# Get rule ID for this selector
|
|
123
|
+
current_rule_id = @rule_id_counter
|
|
124
|
+
@rule_id_counter += 1
|
|
125
|
+
|
|
126
|
+
# Reserve parent's position in rules array (ensures parent comes before nested)
|
|
127
|
+
parent_position = @rules.length
|
|
128
|
+
@rules << nil # Placeholder
|
|
129
|
+
|
|
130
|
+
# Parse mixed block (declarations + nested selectors)
|
|
131
|
+
@depth += 1
|
|
132
|
+
parent_declarations = parse_mixed_block(decl_start, decl_end,
|
|
133
|
+
individual_selector, current_rule_id, @parent_media_sym)
|
|
134
|
+
@depth -= 1
|
|
135
|
+
|
|
136
|
+
# Create parent rule and replace placeholder
|
|
137
|
+
rule = Rule.new(
|
|
138
|
+
current_rule_id,
|
|
139
|
+
individual_selector,
|
|
140
|
+
parent_declarations,
|
|
141
|
+
nil, # specificity
|
|
142
|
+
nil, # parent_rule_id (top-level)
|
|
143
|
+
nil # nesting_style
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
@rules[parent_position] = rule
|
|
147
|
+
@_media_index[@parent_media_sym] ||= [] if @parent_media_sym
|
|
148
|
+
@_media_index[@parent_media_sym] << current_rule_id if @parent_media_sym
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Move position past the closing brace
|
|
152
|
+
@pos = decl_end
|
|
153
|
+
@pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
|
|
154
|
+
else
|
|
155
|
+
# NON-NESTED PATH: Parse declarations only
|
|
156
|
+
@pos = decl_start # Reset to start of block
|
|
157
|
+
declarations = parse_declarations
|
|
158
|
+
|
|
159
|
+
# Split comma-separated selectors into individual rules
|
|
160
|
+
selectors = selector.split(',')
|
|
161
|
+
|
|
162
|
+
selectors.each do |individual_selector|
|
|
163
|
+
individual_selector.strip!
|
|
164
|
+
next if individual_selector.empty?
|
|
165
|
+
|
|
166
|
+
# Create Rule struct
|
|
167
|
+
rule = Rule.new(
|
|
168
|
+
@rule_id_counter, # id
|
|
169
|
+
individual_selector, # selector
|
|
170
|
+
declarations, # declarations
|
|
171
|
+
nil, # specificity (calculated lazily)
|
|
172
|
+
nil, # parent_rule_id
|
|
173
|
+
nil # nesting_style
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
@rules << rule
|
|
177
|
+
@rule_id_counter += 1
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
{
|
|
183
|
+
rules: @rules,
|
|
184
|
+
_media_index: @_media_index,
|
|
185
|
+
charset: @charset,
|
|
186
|
+
_has_nesting: @_has_nesting
|
|
187
|
+
}
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
private
|
|
191
|
+
|
|
192
|
+
# Check if we're at end of input
|
|
193
|
+
def eof?
|
|
194
|
+
@pos >= @len
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Peek current byte without advancing
|
|
198
|
+
# @return [Integer, nil] Byte value or nil if EOF
|
|
199
|
+
def peek_byte
|
|
200
|
+
return nil if eof?
|
|
201
|
+
|
|
202
|
+
@css.getbyte(@pos)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Delegate to module-level helper methods (now work with bytes)
|
|
206
|
+
def whitespace?(byte)
|
|
207
|
+
Cataract.is_whitespace?(byte)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def letter?(byte)
|
|
211
|
+
Cataract.letter?(byte)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def digit?(byte)
|
|
215
|
+
Cataract.digit?(byte)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def ident_char?(byte)
|
|
219
|
+
Cataract.ident_char?(byte)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def skip_whitespace
|
|
223
|
+
@pos += 1 while !eof? && whitespace?(peek_byte)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def skip_comment # rubocop:disable Naming/PredicateMethod
|
|
227
|
+
return false unless peek_byte == BYTE_SLASH && @css.getbyte(@pos + 1) == BYTE_STAR
|
|
228
|
+
|
|
229
|
+
@pos += 2 # Skip /*
|
|
230
|
+
while @pos + 1 < @len
|
|
231
|
+
if @css.getbyte(@pos) == BYTE_STAR && @css.getbyte(@pos + 1) == BYTE_SLASH
|
|
232
|
+
@pos += 2 # Skip */
|
|
233
|
+
return true
|
|
234
|
+
end
|
|
235
|
+
@pos += 1
|
|
236
|
+
end
|
|
237
|
+
true
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Skip whitespace and comments
|
|
241
|
+
def skip_ws_and_comments
|
|
242
|
+
loop do
|
|
243
|
+
old_pos = @pos
|
|
244
|
+
skip_whitespace
|
|
245
|
+
skip_comment
|
|
246
|
+
break if @pos == old_pos # No progress made
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Find matching closing brace
|
|
251
|
+
# Translated from C: see ext/cataract/css_parser.c find_matching_brace
|
|
252
|
+
def find_matching_brace(start_pos)
|
|
253
|
+
depth = 1
|
|
254
|
+
pos = start_pos
|
|
255
|
+
|
|
256
|
+
while pos < @len
|
|
257
|
+
byte = @css.getbyte(pos)
|
|
258
|
+
if byte == BYTE_LBRACE
|
|
259
|
+
depth += 1
|
|
260
|
+
elsif byte == BYTE_RBRACE
|
|
261
|
+
depth -= 1
|
|
262
|
+
break if depth == 0 # Found matching brace, exit immediately
|
|
263
|
+
end
|
|
264
|
+
pos += 1
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
pos
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Parse selector (read until '{')
|
|
271
|
+
def parse_selector
|
|
272
|
+
start_pos = @pos
|
|
273
|
+
|
|
274
|
+
# Read until we find '{'
|
|
275
|
+
until eof? || peek_byte == BYTE_LBRACE # Flip to save a 'opt_not' instruction: while !eof? && peek_byte != BYTE_LBRACE
|
|
276
|
+
@pos += 1
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# If we hit EOF without finding '{', return nil
|
|
280
|
+
return nil if eof?
|
|
281
|
+
|
|
282
|
+
# Extract selector text
|
|
283
|
+
selector_text = byteslice_encoded(start_pos, @pos - start_pos)
|
|
284
|
+
|
|
285
|
+
# Skip the '{'
|
|
286
|
+
@pos += 1 if peek_byte == BYTE_LBRACE
|
|
287
|
+
|
|
288
|
+
# Trim whitespace from selector (in-place to avoid allocation)
|
|
289
|
+
selector_text.strip!
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Parse mixed block containing declarations AND nested selectors/at-rules
|
|
293
|
+
# Translated from C: see ext/cataract/css_parser.c parse_mixed_block
|
|
294
|
+
# Returns: Array of declarations (only the declarations, not nested rules)
|
|
295
|
+
def parse_mixed_block(start_pos, end_pos, parent_selector, parent_rule_id, parent_media_sym)
|
|
296
|
+
# Check recursion depth to prevent stack overflow
|
|
297
|
+
if @depth > MAX_PARSE_DEPTH
|
|
298
|
+
raise DepthError, "CSS nesting too deep: exceeded maximum depth of #{MAX_PARSE_DEPTH}"
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
declarations = []
|
|
302
|
+
pos = start_pos
|
|
303
|
+
|
|
304
|
+
while pos < end_pos
|
|
305
|
+
# Skip whitespace and comments
|
|
306
|
+
while pos < end_pos && whitespace?(@css.getbyte(pos))
|
|
307
|
+
pos += 1
|
|
308
|
+
end
|
|
309
|
+
break if pos >= end_pos
|
|
310
|
+
|
|
311
|
+
# Skip comments
|
|
312
|
+
if pos + 1 < end_pos && @css.getbyte(pos) == BYTE_SLASH && @css.getbyte(pos + 1) == BYTE_STAR
|
|
313
|
+
pos += 2
|
|
314
|
+
while pos + 1 < end_pos
|
|
315
|
+
if @css.getbyte(pos) == BYTE_STAR && @css.getbyte(pos + 1) == BYTE_SLASH
|
|
316
|
+
pos += 2
|
|
317
|
+
break
|
|
318
|
+
end
|
|
319
|
+
pos += 1
|
|
320
|
+
end
|
|
321
|
+
next
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Check if this is a nested @media query
|
|
325
|
+
if @css.getbyte(pos) == BYTE_AT && pos + 6 < end_pos &&
|
|
326
|
+
byteslice_encoded(pos, 6) == '@media' &&
|
|
327
|
+
(pos + 6 >= end_pos || whitespace?(@css.getbyte(pos + 6)))
|
|
328
|
+
# Nested @media - parse with parent selector as context
|
|
329
|
+
media_start = pos + 6
|
|
330
|
+
while media_start < end_pos && whitespace?(@css.getbyte(media_start))
|
|
331
|
+
media_start += 1
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
# Find opening brace
|
|
335
|
+
media_query_end = media_start
|
|
336
|
+
while media_query_end < end_pos && @css.getbyte(media_query_end) != BYTE_LBRACE
|
|
337
|
+
media_query_end += 1
|
|
338
|
+
end
|
|
339
|
+
break if media_query_end >= end_pos
|
|
340
|
+
|
|
341
|
+
# Extract media query (trim trailing whitespace)
|
|
342
|
+
media_query_end_trimmed = media_query_end
|
|
343
|
+
while media_query_end_trimmed > media_start && whitespace?(@css.getbyte(media_query_end_trimmed - 1))
|
|
344
|
+
media_query_end_trimmed -= 1
|
|
345
|
+
end
|
|
346
|
+
media_query_str = byteslice_encoded(media_start, media_query_end_trimmed - media_start)
|
|
347
|
+
# Keep media query exactly as written - parentheses are required per CSS spec
|
|
348
|
+
media_query_str.strip!
|
|
349
|
+
media_sym = media_query_str.to_sym
|
|
350
|
+
|
|
351
|
+
pos = media_query_end + 1 # Skip {
|
|
352
|
+
|
|
353
|
+
# Find matching closing brace
|
|
354
|
+
media_block_start = pos
|
|
355
|
+
media_block_end = find_matching_brace(pos)
|
|
356
|
+
pos = media_block_end
|
|
357
|
+
pos += 1 if pos < end_pos # Skip }
|
|
358
|
+
|
|
359
|
+
# Combine media queries: parent + child
|
|
360
|
+
combined_media_sym = combine_media_queries(parent_media_sym, media_sym)
|
|
361
|
+
|
|
362
|
+
# Create rule ID for this media rule
|
|
363
|
+
media_rule_id = @rule_id_counter
|
|
364
|
+
@rule_id_counter += 1
|
|
365
|
+
|
|
366
|
+
# Parse mixed block recursively
|
|
367
|
+
@depth += 1
|
|
368
|
+
media_declarations = parse_mixed_block(media_block_start, media_block_end,
|
|
369
|
+
parent_selector, media_rule_id, combined_media_sym)
|
|
370
|
+
@depth -= 1
|
|
371
|
+
|
|
372
|
+
# Create rule with parent selector and declarations, associated with combined media query
|
|
373
|
+
rule = Rule.new(
|
|
374
|
+
media_rule_id,
|
|
375
|
+
parent_selector,
|
|
376
|
+
media_declarations,
|
|
377
|
+
nil, # specificity
|
|
378
|
+
parent_rule_id,
|
|
379
|
+
nil # nesting_style (nil for @media nesting)
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Mark that we have nesting
|
|
383
|
+
@_has_nesting = true unless parent_rule_id.nil?
|
|
384
|
+
|
|
385
|
+
@rules << rule
|
|
386
|
+
@_media_index[combined_media_sym] ||= []
|
|
387
|
+
@_media_index[combined_media_sym] << media_rule_id
|
|
388
|
+
|
|
389
|
+
next
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# Check if this is a nested selector
|
|
393
|
+
byte = @css.getbyte(pos)
|
|
394
|
+
if byte == BYTE_AMPERSAND || byte == BYTE_DOT || byte == BYTE_HASH ||
|
|
395
|
+
byte == BYTE_LBRACKET || byte == BYTE_COLON || byte == BYTE_ASTERISK ||
|
|
396
|
+
byte == BYTE_GT || byte == BYTE_PLUS || byte == BYTE_TILDE || byte == BYTE_AT
|
|
397
|
+
# Find the opening brace
|
|
398
|
+
nested_sel_start = pos
|
|
399
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_LBRACE
|
|
400
|
+
pos += 1
|
|
401
|
+
end
|
|
402
|
+
break if pos >= end_pos
|
|
403
|
+
|
|
404
|
+
nested_sel_end = pos
|
|
405
|
+
# Trim trailing whitespace
|
|
406
|
+
while nested_sel_end > nested_sel_start && whitespace?(@css.getbyte(nested_sel_end - 1))
|
|
407
|
+
nested_sel_end -= 1
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
pos += 1 # Skip {
|
|
411
|
+
|
|
412
|
+
# Find matching closing brace
|
|
413
|
+
nested_block_start = pos
|
|
414
|
+
nested_block_end = find_matching_brace(pos)
|
|
415
|
+
pos = nested_block_end
|
|
416
|
+
pos += 1 if pos < end_pos # Skip }
|
|
417
|
+
|
|
418
|
+
# Extract nested selector and split on commas
|
|
419
|
+
nested_selector_text = byteslice_encoded(nested_sel_start, nested_sel_end - nested_sel_start)
|
|
420
|
+
nested_selectors = nested_selector_text.split(',')
|
|
421
|
+
|
|
422
|
+
nested_selectors.each do |seg|
|
|
423
|
+
seg.strip!
|
|
424
|
+
next if seg.empty?
|
|
425
|
+
|
|
426
|
+
# Resolve nested selector
|
|
427
|
+
resolved_selector, nesting_style = resolve_nested_selector(parent_selector, seg)
|
|
428
|
+
|
|
429
|
+
# Get rule ID
|
|
430
|
+
rule_id = @rule_id_counter
|
|
431
|
+
@rule_id_counter += 1
|
|
432
|
+
|
|
433
|
+
# Recursively parse nested block
|
|
434
|
+
@depth += 1
|
|
435
|
+
nested_declarations = parse_mixed_block(nested_block_start, nested_block_end,
|
|
436
|
+
resolved_selector, rule_id, parent_media_sym)
|
|
437
|
+
@depth -= 1
|
|
438
|
+
|
|
439
|
+
# Create rule for nested selector
|
|
440
|
+
rule = Rule.new(
|
|
441
|
+
rule_id,
|
|
442
|
+
resolved_selector,
|
|
443
|
+
nested_declarations,
|
|
444
|
+
nil, # specificity
|
|
445
|
+
parent_rule_id,
|
|
446
|
+
nesting_style
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# Mark that we have nesting
|
|
450
|
+
@_has_nesting = true unless parent_rule_id.nil?
|
|
451
|
+
|
|
452
|
+
@rules << rule
|
|
453
|
+
@_media_index[parent_media_sym] ||= [] if parent_media_sym
|
|
454
|
+
@_media_index[parent_media_sym] << rule_id if parent_media_sym
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
next
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
# This is a declaration - parse it
|
|
461
|
+
prop_start = pos
|
|
462
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_COLON &&
|
|
463
|
+
@css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_LBRACE
|
|
464
|
+
pos += 1
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
if pos >= end_pos || @css.getbyte(pos) != BYTE_COLON
|
|
468
|
+
# Malformed - skip to semicolon
|
|
469
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON
|
|
470
|
+
pos += 1
|
|
471
|
+
end
|
|
472
|
+
pos += 1 if pos < end_pos
|
|
473
|
+
next
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
prop_end = pos
|
|
477
|
+
# Trim trailing whitespace
|
|
478
|
+
while prop_end > prop_start && whitespace?(@css.getbyte(prop_end - 1))
|
|
479
|
+
prop_end -= 1
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
property = byteslice_encoded(prop_start, prop_end - prop_start, encoding: 'US-ASCII')
|
|
483
|
+
property.downcase!
|
|
484
|
+
|
|
485
|
+
pos += 1 # Skip :
|
|
486
|
+
|
|
487
|
+
# Skip leading whitespace in value
|
|
488
|
+
while pos < end_pos && whitespace?(@css.getbyte(pos))
|
|
489
|
+
pos += 1
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
# Parse value (read until ';' or '}')
|
|
493
|
+
val_start = pos
|
|
494
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
|
|
495
|
+
pos += 1
|
|
496
|
+
end
|
|
497
|
+
val_end = pos
|
|
498
|
+
|
|
499
|
+
# Trim trailing whitespace from value
|
|
500
|
+
while val_end > val_start && whitespace?(@css.getbyte(val_end - 1))
|
|
501
|
+
val_end -= 1
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
value = byteslice_encoded(val_start, val_end - val_start)
|
|
505
|
+
|
|
506
|
+
# Check for !important flag
|
|
507
|
+
important = false
|
|
508
|
+
if value.end_with?('!important')
|
|
509
|
+
important = true
|
|
510
|
+
# NOTE: Using rstrip here instead of manual byte loop since !important is rare (not hot path)
|
|
511
|
+
value = value[0, value.length - 10].rstrip # Remove '!important' and trailing whitespace
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
|
|
515
|
+
|
|
516
|
+
# Create declaration
|
|
517
|
+
declarations << Declaration.new(property, value, important) if prop_end > prop_start && val_end > val_start
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
declarations
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
# Parse declaration block (inside { ... })
|
|
524
|
+
# Assumes we're already past the opening '{'
|
|
525
|
+
def parse_declarations
|
|
526
|
+
declarations = []
|
|
527
|
+
|
|
528
|
+
# Read until we find the closing '}'
|
|
529
|
+
until eof?
|
|
530
|
+
skip_ws_and_comments
|
|
531
|
+
break if eof?
|
|
532
|
+
|
|
533
|
+
# Check for closing brace
|
|
534
|
+
if peek_byte == BYTE_RBRACE
|
|
535
|
+
@pos += 1 # consume '}'
|
|
536
|
+
break
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
# Parse property name (read until ':')
|
|
540
|
+
property_start = @pos
|
|
541
|
+
until eof?
|
|
542
|
+
byte = peek_byte
|
|
543
|
+
break if byte == BYTE_COLON || byte == BYTE_SEMICOLON || byte == BYTE_RBRACE
|
|
544
|
+
|
|
545
|
+
@pos += 1
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
# Skip if no colon found (malformed)
|
|
549
|
+
if eof? || peek_byte != BYTE_COLON
|
|
550
|
+
# Try to recover by finding next ; or }
|
|
551
|
+
skip_to_semicolon_or_brace
|
|
552
|
+
next
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
property = byteslice_encoded(property_start, @pos - property_start, encoding: 'US-ASCII')
|
|
556
|
+
property.strip!
|
|
557
|
+
property.downcase!
|
|
558
|
+
@pos += 1 # skip ':'
|
|
559
|
+
|
|
560
|
+
skip_ws_and_comments
|
|
561
|
+
|
|
562
|
+
# Parse value (read until ';' or '}')
|
|
563
|
+
value_start = @pos
|
|
564
|
+
important = false
|
|
565
|
+
|
|
566
|
+
until eof?
|
|
567
|
+
byte = peek_byte
|
|
568
|
+
break if byte == BYTE_SEMICOLON || byte == BYTE_RBRACE
|
|
569
|
+
|
|
570
|
+
@pos += 1
|
|
571
|
+
end
|
|
572
|
+
|
|
573
|
+
value = byteslice_encoded(value_start, @pos - value_start)
|
|
574
|
+
value.strip!
|
|
575
|
+
|
|
576
|
+
# Check for !important (byte-by-byte, no regexp)
|
|
577
|
+
if value.bytesize > 10
|
|
578
|
+
# Scan backwards to find !important
|
|
579
|
+
i = value.bytesize - 1
|
|
580
|
+
# Skip trailing whitespace
|
|
581
|
+
while i >= 0
|
|
582
|
+
b = value.getbyte(i)
|
|
583
|
+
break unless b == BYTE_SPACE || b == BYTE_TAB
|
|
584
|
+
|
|
585
|
+
i -= 1
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
# Check for 'important' (9 chars)
|
|
589
|
+
if i >= 8 && value[(i - 8)..i] == 'important'
|
|
590
|
+
i -= 9
|
|
591
|
+
# Skip whitespace before 'important'
|
|
592
|
+
while i >= 0
|
|
593
|
+
b = value.getbyte(i)
|
|
594
|
+
break unless b == BYTE_SPACE || b == BYTE_TAB
|
|
595
|
+
|
|
596
|
+
i -= 1
|
|
597
|
+
end
|
|
598
|
+
# Check for '!'
|
|
599
|
+
if i >= 0 && value.getbyte(i) == BYTE_BANG
|
|
600
|
+
important = true
|
|
601
|
+
# Remove everything from '!' onwards (use byteslice and strip in-place)
|
|
602
|
+
value = value.byteslice(0, i)
|
|
603
|
+
value.strip!
|
|
604
|
+
end
|
|
605
|
+
end
|
|
606
|
+
end
|
|
607
|
+
|
|
608
|
+
# Skip semicolon if present
|
|
609
|
+
@pos += 1 if peek_byte == BYTE_SEMICOLON
|
|
610
|
+
|
|
611
|
+
# Create Declaration struct
|
|
612
|
+
declarations << Declaration.new(property, value, important)
|
|
613
|
+
end
|
|
614
|
+
|
|
615
|
+
declarations
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
# Parse at-rule (@media, @supports, @charset, @keyframes, @font-face, etc)
|
|
619
|
+
# Translated from C: see ext/cataract/css_parser.c lines 962-1128
|
|
620
|
+
def parse_at_rule
|
|
621
|
+
at_rule_start = @pos # Points to '@'
|
|
622
|
+
@pos += 1 # skip '@'
|
|
623
|
+
|
|
624
|
+
# Find end of at-rule name (stop at whitespace or opening brace)
|
|
625
|
+
name_start = @pos
|
|
626
|
+
until eof?
|
|
627
|
+
byte = peek_byte
|
|
628
|
+
break if whitespace?(byte) || byte == BYTE_LBRACE
|
|
629
|
+
|
|
630
|
+
@pos += 1
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
at_rule_name = byteslice_encoded(name_start, @pos - name_start)
|
|
634
|
+
|
|
635
|
+
# Handle @charset specially - it's just @charset "value";
|
|
636
|
+
if at_rule_name == 'charset'
|
|
637
|
+
skip_ws_and_comments
|
|
638
|
+
# Read until semicolon
|
|
639
|
+
value_start = @pos
|
|
640
|
+
while !eof? && peek_byte != BYTE_SEMICOLON
|
|
641
|
+
@pos += 1
|
|
642
|
+
end
|
|
643
|
+
|
|
644
|
+
charset_value = byteslice_encoded(value_start, @pos - value_start)
|
|
645
|
+
charset_value.strip!
|
|
646
|
+
# Remove quotes (byte-by-byte)
|
|
647
|
+
result = String.new
|
|
648
|
+
i = 0
|
|
649
|
+
len = charset_value.bytesize
|
|
650
|
+
while i < len
|
|
651
|
+
byte = charset_value.getbyte(i)
|
|
652
|
+
result << charset_value[i] unless byte == BYTE_DQUOTE || byte == BYTE_SQUOTE
|
|
653
|
+
i += 1
|
|
654
|
+
end
|
|
655
|
+
@charset = result
|
|
656
|
+
|
|
657
|
+
@pos += 1 if peek_byte == BYTE_SEMICOLON # consume semicolon
|
|
658
|
+
return
|
|
659
|
+
end
|
|
660
|
+
|
|
661
|
+
# Handle conditional group at-rules: @supports, @layer, @container, @scope
|
|
662
|
+
# These behave like @media but don't affect media context
|
|
663
|
+
if AT_RULE_TYPES.include?(at_rule_name)
|
|
664
|
+
skip_ws_and_comments
|
|
665
|
+
|
|
666
|
+
# Skip to opening brace
|
|
667
|
+
while !eof? && peek_byte != BYTE_LBRACE
|
|
668
|
+
@pos += 1
|
|
669
|
+
end
|
|
670
|
+
|
|
671
|
+
return if eof? || peek_byte != BYTE_LBRACE
|
|
672
|
+
|
|
673
|
+
@pos += 1 # skip '{'
|
|
674
|
+
|
|
675
|
+
# Find matching closing brace
|
|
676
|
+
block_start = @pos
|
|
677
|
+
block_end = find_matching_brace(@pos)
|
|
678
|
+
|
|
679
|
+
# Check depth before recursing
|
|
680
|
+
if @depth + 1 > MAX_PARSE_DEPTH
|
|
681
|
+
raise DepthError, "CSS nesting too deep: exceeded maximum depth of #{MAX_PARSE_DEPTH}"
|
|
682
|
+
end
|
|
683
|
+
|
|
684
|
+
# Recursively parse block content (preserve parent media context)
|
|
685
|
+
nested_parser = Parser.new(
|
|
686
|
+
byteslice_encoded(block_start, block_end - block_start),
|
|
687
|
+
parent_media_sym: @parent_media_sym, depth: @depth + 1
|
|
688
|
+
)
|
|
689
|
+
|
|
690
|
+
nested_result = nested_parser.parse
|
|
691
|
+
|
|
692
|
+
# Merge nested media_index into ours
|
|
693
|
+
nested_result[:_media_index].each do |media, rule_ids|
|
|
694
|
+
@_media_index[media] ||= []
|
|
695
|
+
# Use each + << instead of concat + map (1.20x faster for small arrays)
|
|
696
|
+
rule_ids.each { |rid| @_media_index[media] << (@rule_id_counter + rid) }
|
|
697
|
+
end
|
|
698
|
+
|
|
699
|
+
# Add nested rules to main rules array
|
|
700
|
+
nested_result[:rules].each do |rule|
|
|
701
|
+
rule.id = @rule_id_counter
|
|
702
|
+
@rule_id_counter += 1
|
|
703
|
+
@rules << rule
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
# Move position past the closing brace
|
|
707
|
+
@pos = block_end
|
|
708
|
+
@pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
|
|
709
|
+
|
|
710
|
+
return
|
|
711
|
+
end
|
|
712
|
+
|
|
713
|
+
# Handle @media specially - parse content and track in media_index
|
|
714
|
+
if at_rule_name == 'media'
|
|
715
|
+
skip_ws_and_comments
|
|
716
|
+
|
|
717
|
+
# Find media query (up to opening brace)
|
|
718
|
+
mq_start = @pos
|
|
719
|
+
while !eof? && peek_byte != BYTE_LBRACE
|
|
720
|
+
@pos += 1
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
return if eof? || peek_byte != BYTE_LBRACE
|
|
724
|
+
|
|
725
|
+
mq_end = @pos
|
|
726
|
+
# Trim trailing whitespace
|
|
727
|
+
while mq_end > mq_start && whitespace?(@css.getbyte(mq_end - 1))
|
|
728
|
+
mq_end -= 1
|
|
729
|
+
end
|
|
730
|
+
|
|
731
|
+
child_media_string = byteslice_encoded(mq_start, mq_end - mq_start)
|
|
732
|
+
# Keep media query exactly as written - parentheses are required per CSS spec
|
|
733
|
+
child_media_string.strip!
|
|
734
|
+
child_media_sym = child_media_string.to_sym
|
|
735
|
+
|
|
736
|
+
# Combine with parent media context
|
|
737
|
+
combined_media_sym = combine_media_queries(@parent_media_sym, child_media_sym)
|
|
738
|
+
|
|
739
|
+
# Check media query limit
|
|
740
|
+
unless @_media_index.key?(combined_media_sym)
|
|
741
|
+
@media_query_count += 1
|
|
742
|
+
if @media_query_count > MAX_MEDIA_QUERIES
|
|
743
|
+
raise SizeError, "Too many media queries: exceeded maximum of #{MAX_MEDIA_QUERIES}"
|
|
744
|
+
end
|
|
745
|
+
end
|
|
746
|
+
|
|
747
|
+
@pos += 1 # skip '{'
|
|
748
|
+
|
|
749
|
+
# Find matching closing brace
|
|
750
|
+
block_start = @pos
|
|
751
|
+
block_end = find_matching_brace(@pos)
|
|
752
|
+
|
|
753
|
+
# Check depth before recursing
|
|
754
|
+
if @depth + 1 > MAX_PARSE_DEPTH
|
|
755
|
+
raise DepthError, "CSS nesting too deep: exceeded maximum depth of #{MAX_PARSE_DEPTH}"
|
|
756
|
+
end
|
|
757
|
+
|
|
758
|
+
# Parse the content with the combined media context
|
|
759
|
+
nested_parser = Parser.new(
|
|
760
|
+
byteslice_encoded(block_start, block_end - block_start),
|
|
761
|
+
parent_media_sym: combined_media_sym,
|
|
762
|
+
depth: @depth + 1
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
nested_result = nested_parser.parse
|
|
766
|
+
|
|
767
|
+
# Merge nested media_index into ours (for nested @media)
|
|
768
|
+
nested_result[:_media_index].each do |media, rule_ids|
|
|
769
|
+
@_media_index[media] ||= []
|
|
770
|
+
# Use each + << instead of concat + map (1.20x faster for small arrays)
|
|
771
|
+
rule_ids.each { |rid| @_media_index[media] << (@rule_id_counter + rid) }
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
# Add nested rules to main rules array and update media_index
|
|
775
|
+
nested_result[:rules].each do |rule|
|
|
776
|
+
rule.id = @rule_id_counter
|
|
777
|
+
|
|
778
|
+
# Extract media types and add to each first (if different from full query)
|
|
779
|
+
# We add these BEFORE the full query so that when iterating the media_index hash,
|
|
780
|
+
# the full query comes last and takes precedence during serialization
|
|
781
|
+
media_types = Cataract.parse_media_types(combined_media_sym)
|
|
782
|
+
media_types.each do |media_type|
|
|
783
|
+
# Only add if different from combined_media_sym to avoid duplication
|
|
784
|
+
if media_type != combined_media_sym
|
|
785
|
+
@_media_index[media_type] ||= []
|
|
786
|
+
@_media_index[media_type] << @rule_id_counter
|
|
787
|
+
end
|
|
788
|
+
end
|
|
789
|
+
|
|
790
|
+
# Add to full query symbol (after media types for insertion order)
|
|
791
|
+
@_media_index[combined_media_sym] ||= []
|
|
792
|
+
@_media_index[combined_media_sym] << @rule_id_counter
|
|
793
|
+
|
|
794
|
+
@rule_id_counter += 1
|
|
795
|
+
@rules << rule
|
|
796
|
+
end
|
|
797
|
+
|
|
798
|
+
# Move position past the closing brace
|
|
799
|
+
@pos = block_end
|
|
800
|
+
@pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
|
|
801
|
+
|
|
802
|
+
return
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
# Check for @keyframes (contains <rule-list>)
|
|
806
|
+
is_keyframes = at_rule_name == 'keyframes' ||
|
|
807
|
+
at_rule_name == '-webkit-keyframes' ||
|
|
808
|
+
at_rule_name == '-moz-keyframes'
|
|
809
|
+
|
|
810
|
+
if is_keyframes
|
|
811
|
+
# Build full selector string: "@keyframes fade"
|
|
812
|
+
selector_start = at_rule_start # Points to '@'
|
|
813
|
+
|
|
814
|
+
# Skip to opening brace
|
|
815
|
+
while !eof? && peek_byte != BYTE_LBRACE
|
|
816
|
+
@pos += 1
|
|
817
|
+
end
|
|
818
|
+
|
|
819
|
+
return if eof? || peek_byte != BYTE_LBRACE
|
|
820
|
+
|
|
821
|
+
selector_end = @pos
|
|
822
|
+
# Trim trailing whitespace
|
|
823
|
+
while selector_end > selector_start && whitespace?(@css.getbyte(selector_end - 1))
|
|
824
|
+
selector_end -= 1
|
|
825
|
+
end
|
|
826
|
+
selector = byteslice_encoded(selector_start, selector_end - selector_start)
|
|
827
|
+
|
|
828
|
+
@pos += 1 # skip '{'
|
|
829
|
+
|
|
830
|
+
# Find matching closing brace
|
|
831
|
+
block_start = @pos
|
|
832
|
+
block_end = find_matching_brace(@pos)
|
|
833
|
+
|
|
834
|
+
# Check depth before recursing
|
|
835
|
+
if @depth + 1 > MAX_PARSE_DEPTH
|
|
836
|
+
raise DepthError, "CSS nesting too deep: exceeded maximum depth of #{MAX_PARSE_DEPTH}"
|
|
837
|
+
end
|
|
838
|
+
|
|
839
|
+
# Parse keyframe blocks as rules (0%/from/to etc)
|
|
840
|
+
# Create a nested parser context
|
|
841
|
+
nested_parser = Parser.new(byteslice_encoded(block_start, block_end - block_start), depth: @depth + 1)
|
|
842
|
+
nested_result = nested_parser.parse
|
|
843
|
+
content = nested_result[:rules]
|
|
844
|
+
|
|
845
|
+
# Move position past the closing brace
|
|
846
|
+
@pos = block_end
|
|
847
|
+
# The closing brace should be at block_end
|
|
848
|
+
@pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
|
|
849
|
+
|
|
850
|
+
# Get rule ID and increment
|
|
851
|
+
rule_id = @rule_id_counter
|
|
852
|
+
@rule_id_counter += 1
|
|
853
|
+
|
|
854
|
+
# Create AtRule with nested rules
|
|
855
|
+
at_rule = AtRule.new(rule_id, selector, content, nil)
|
|
856
|
+
@rules << at_rule
|
|
857
|
+
|
|
858
|
+
return
|
|
859
|
+
end
|
|
860
|
+
|
|
861
|
+
# Check for @font-face (contains <declaration-list>)
|
|
862
|
+
if at_rule_name == 'font-face'
|
|
863
|
+
# Build selector string: "@font-face"
|
|
864
|
+
selector_start = at_rule_start # Points to '@'
|
|
865
|
+
|
|
866
|
+
# Skip to opening brace
|
|
867
|
+
while !eof? && peek_byte != BYTE_LBRACE
|
|
868
|
+
@pos += 1
|
|
869
|
+
end
|
|
870
|
+
|
|
871
|
+
return if eof? || peek_byte != BYTE_LBRACE
|
|
872
|
+
|
|
873
|
+
selector_end = @pos
|
|
874
|
+
# Trim trailing whitespace
|
|
875
|
+
while selector_end > selector_start && whitespace?(@css.getbyte(selector_end - 1))
|
|
876
|
+
selector_end -= 1
|
|
877
|
+
end
|
|
878
|
+
selector = byteslice_encoded(selector_start, selector_end - selector_start)
|
|
879
|
+
|
|
880
|
+
@pos += 1 # skip '{'
|
|
881
|
+
|
|
882
|
+
# Find matching closing brace
|
|
883
|
+
decl_start = @pos
|
|
884
|
+
decl_end = find_matching_brace(@pos)
|
|
885
|
+
|
|
886
|
+
# Parse declarations
|
|
887
|
+
content = parse_declarations_block(decl_start, decl_end)
|
|
888
|
+
|
|
889
|
+
# Move position past the closing brace
|
|
890
|
+
@pos = decl_end
|
|
891
|
+
# The closing brace should be at decl_end
|
|
892
|
+
@pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
|
|
893
|
+
|
|
894
|
+
# Get rule ID and increment
|
|
895
|
+
rule_id = @rule_id_counter
|
|
896
|
+
@rule_id_counter += 1
|
|
897
|
+
|
|
898
|
+
# Create AtRule with declarations
|
|
899
|
+
at_rule = AtRule.new(rule_id, selector, content, nil)
|
|
900
|
+
@rules << at_rule
|
|
901
|
+
|
|
902
|
+
return
|
|
903
|
+
end
|
|
904
|
+
|
|
905
|
+
# Unknown at-rule (@property, @page, @counter-style, etc.)
|
|
906
|
+
# Treat as a regular selector-based rule with declarations
|
|
907
|
+
selector_start = at_rule_start # Points to '@'
|
|
908
|
+
|
|
909
|
+
# Skip to opening brace
|
|
910
|
+
until eof? || peek_byte == BYTE_LBRACE # Save a not_opt instruction: while !eof? && peek_byte != BYTE_LBRACE
|
|
911
|
+
@pos += 1
|
|
912
|
+
end
|
|
913
|
+
|
|
914
|
+
return if eof? || peek_byte != BYTE_LBRACE
|
|
915
|
+
|
|
916
|
+
selector_end = @pos
|
|
917
|
+
# Trim trailing whitespace
|
|
918
|
+
while selector_end > selector_start && whitespace?(@css.getbyte(selector_end - 1))
|
|
919
|
+
selector_end -= 1
|
|
920
|
+
end
|
|
921
|
+
selector = byteslice_encoded(selector_start, selector_end - selector_start)
|
|
922
|
+
|
|
923
|
+
@pos += 1 # skip '{'
|
|
924
|
+
|
|
925
|
+
# Parse declarations
|
|
926
|
+
declarations = parse_declarations
|
|
927
|
+
|
|
928
|
+
# Create Rule with declarations
|
|
929
|
+
rule = Rule.new(
|
|
930
|
+
@rule_id_counter, # id
|
|
931
|
+
selector, # selector (e.g., "@property --main-color")
|
|
932
|
+
declarations, # declarations
|
|
933
|
+
nil, # specificity
|
|
934
|
+
nil, # parent_rule_id
|
|
935
|
+
nil # nesting_style
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
@rules << rule
|
|
939
|
+
@rule_id_counter += 1
|
|
940
|
+
end
|
|
941
|
+
|
|
942
|
+
# Check if block contains nested selectors vs just declarations
|
|
943
|
+
# Translated from C: see ext/cataract/css_parser.c has_nested_selectors
|
|
944
|
+
def has_nested_selectors?(start_pos, end_pos)
|
|
945
|
+
pos = start_pos
|
|
946
|
+
|
|
947
|
+
while pos < end_pos
|
|
948
|
+
# Skip whitespace
|
|
949
|
+
while pos < end_pos && whitespace?(@css.getbyte(pos))
|
|
950
|
+
pos += 1
|
|
951
|
+
end
|
|
952
|
+
break if pos >= end_pos
|
|
953
|
+
|
|
954
|
+
# Skip comments
|
|
955
|
+
if pos + 1 < end_pos && @css.getbyte(pos) == BYTE_SLASH && @css.getbyte(pos + 1) == BYTE_STAR
|
|
956
|
+
pos += 2
|
|
957
|
+
while pos + 1 < end_pos
|
|
958
|
+
if @css.getbyte(pos) == BYTE_STAR && @css.getbyte(pos + 1) == BYTE_SLASH
|
|
959
|
+
pos += 2
|
|
960
|
+
break
|
|
961
|
+
end
|
|
962
|
+
pos += 1
|
|
963
|
+
end
|
|
964
|
+
next
|
|
965
|
+
end
|
|
966
|
+
|
|
967
|
+
# Check for nested selector indicators
|
|
968
|
+
byte = @css.getbyte(pos)
|
|
969
|
+
if byte == BYTE_AMPERSAND || byte == BYTE_DOT || byte == BYTE_HASH ||
|
|
970
|
+
byte == BYTE_LBRACKET || byte == BYTE_COLON || byte == BYTE_ASTERISK ||
|
|
971
|
+
byte == BYTE_GT || byte == BYTE_PLUS || byte == BYTE_TILDE
|
|
972
|
+
# Look ahead - if followed by {, it's likely a nested selector
|
|
973
|
+
lookahead = pos + 1
|
|
974
|
+
while lookahead < end_pos && @css.getbyte(lookahead) != BYTE_LBRACE &&
|
|
975
|
+
@css.getbyte(lookahead) != BYTE_SEMICOLON && @css.getbyte(lookahead) != BYTE_NEWLINE
|
|
976
|
+
lookahead += 1
|
|
977
|
+
end
|
|
978
|
+
return true if lookahead < end_pos && @css.getbyte(lookahead) == BYTE_LBRACE
|
|
979
|
+
end
|
|
980
|
+
|
|
981
|
+
# Check for @media, @supports, etc nested inside
|
|
982
|
+
return true if byte == BYTE_AT
|
|
983
|
+
|
|
984
|
+
# Skip to next line or semicolon
|
|
985
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_NEWLINE
|
|
986
|
+
pos += 1
|
|
987
|
+
end
|
|
988
|
+
pos += 1 if pos < end_pos
|
|
989
|
+
end
|
|
990
|
+
|
|
991
|
+
false
|
|
992
|
+
end
|
|
993
|
+
|
|
994
|
+
# Resolve nested selector against parent
|
|
995
|
+
# Translated from C: see ext/cataract/css_parser.c resolve_nested_selector
|
|
996
|
+
# Examples:
|
|
997
|
+
# resolve_nested_selector(".parent", "& .child") => [".parent .child", 1] (explicit)
|
|
998
|
+
# resolve_nested_selector(".parent", "&:hover") => [".parent:hover", 1] (explicit)
|
|
999
|
+
# resolve_nested_selector(".parent", "&.active") => [".parent.active", 1] (explicit)
|
|
1000
|
+
# resolve_nested_selector(".parent", ".child") => [".parent .child", 0] (implicit)
|
|
1001
|
+
# resolve_nested_selector(".parent", "> .child") => [".parent > .child", 0] (implicit combinator)
|
|
1002
|
+
#
|
|
1003
|
+
# Returns: [resolved_selector, nesting_style]
|
|
1004
|
+
# nesting_style: 0 = NESTING_STYLE_IMPLICIT, 1 = NESTING_STYLE_EXPLICIT
|
|
1005
|
+
def resolve_nested_selector(parent_selector, nested_selector)
|
|
1006
|
+
# Check if nested selector contains & (byte-level search)
|
|
1007
|
+
len = nested_selector.bytesize
|
|
1008
|
+
has_ampersand = false
|
|
1009
|
+
i = 0
|
|
1010
|
+
while i < len
|
|
1011
|
+
if nested_selector.getbyte(i) == BYTE_AMPERSAND
|
|
1012
|
+
has_ampersand = true
|
|
1013
|
+
break
|
|
1014
|
+
end
|
|
1015
|
+
i += 1
|
|
1016
|
+
end
|
|
1017
|
+
|
|
1018
|
+
if has_ampersand
|
|
1019
|
+
# Explicit nesting - replace & with parent
|
|
1020
|
+
nesting_style = NESTING_STYLE_EXPLICIT
|
|
1021
|
+
|
|
1022
|
+
# Trim leading whitespace to check for combinator
|
|
1023
|
+
# NOTE: We use a manual byte-level loop instead of lstrip for performance.
|
|
1024
|
+
# Ruby's lstrip handles all Unicode whitespace and encoding checks, but CSS
|
|
1025
|
+
# selectors only use ASCII whitespace (space, tab, newline, CR). Our loop
|
|
1026
|
+
# checks only these 4 bytes, which benchmarks 1.89x faster than lstrip.
|
|
1027
|
+
start_pos = 0
|
|
1028
|
+
while start_pos < len
|
|
1029
|
+
byte = nested_selector.getbyte(start_pos)
|
|
1030
|
+
break unless byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR
|
|
1031
|
+
|
|
1032
|
+
start_pos += 1
|
|
1033
|
+
end
|
|
1034
|
+
|
|
1035
|
+
# Check if selector starts with a combinator (relative selector)
|
|
1036
|
+
starts_with_combinator = false
|
|
1037
|
+
if start_pos < len
|
|
1038
|
+
first_byte = nested_selector.getbyte(start_pos)
|
|
1039
|
+
starts_with_combinator = (first_byte == BYTE_PLUS || first_byte == BYTE_GT || first_byte == BYTE_TILDE)
|
|
1040
|
+
end
|
|
1041
|
+
|
|
1042
|
+
# Build result by replacing & with parent
|
|
1043
|
+
result = String.new
|
|
1044
|
+
if starts_with_combinator
|
|
1045
|
+
# Prepend parent first with space for relative selectors
|
|
1046
|
+
# Example: "+ .bar + &" => ".foo + .bar + .foo"
|
|
1047
|
+
result << parent_selector
|
|
1048
|
+
result << ' '
|
|
1049
|
+
end
|
|
1050
|
+
|
|
1051
|
+
# Replace all & with parent selector (byte-level iteration)
|
|
1052
|
+
i = 0
|
|
1053
|
+
while i < len
|
|
1054
|
+
byte = nested_selector.getbyte(i)
|
|
1055
|
+
result << if byte == BYTE_AMPERSAND
|
|
1056
|
+
parent_selector
|
|
1057
|
+
else
|
|
1058
|
+
byte.chr
|
|
1059
|
+
end
|
|
1060
|
+
i += 1
|
|
1061
|
+
end
|
|
1062
|
+
|
|
1063
|
+
[result, nesting_style]
|
|
1064
|
+
else
|
|
1065
|
+
# Implicit nesting - prepend parent with appropriate spacing
|
|
1066
|
+
nesting_style = NESTING_STYLE_IMPLICIT
|
|
1067
|
+
|
|
1068
|
+
# Trim leading whitespace from nested selector (byte-level)
|
|
1069
|
+
# See comment above for why we don't use lstrip
|
|
1070
|
+
start_pos = 0
|
|
1071
|
+
while start_pos < len
|
|
1072
|
+
byte = nested_selector.getbyte(start_pos)
|
|
1073
|
+
break unless byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR
|
|
1074
|
+
|
|
1075
|
+
start_pos += 1
|
|
1076
|
+
end
|
|
1077
|
+
|
|
1078
|
+
result = String.new
|
|
1079
|
+
result << parent_selector
|
|
1080
|
+
result << ' '
|
|
1081
|
+
result << nested_selector.byteslice(start_pos..-1)
|
|
1082
|
+
|
|
1083
|
+
[result, nesting_style]
|
|
1084
|
+
end
|
|
1085
|
+
end
|
|
1086
|
+
|
|
1087
|
+
# Combine parent and child media queries
|
|
1088
|
+
# Translated from C: see ext/cataract/css_parser.c combine_media_queries
|
|
1089
|
+
# Examples:
|
|
1090
|
+
# parent="screen", child="min-width: 500px" => "screen and (min-width: 500px)"
|
|
1091
|
+
# parent=nil, child="print" => "print"
|
|
1092
|
+
def combine_media_queries(parent, child)
|
|
1093
|
+
return child if parent.nil?
|
|
1094
|
+
return parent if child.nil?
|
|
1095
|
+
|
|
1096
|
+
# Combine: "parent and child"
|
|
1097
|
+
parent_str = parent.to_s
|
|
1098
|
+
child_str = child.to_s
|
|
1099
|
+
|
|
1100
|
+
combined = "#{parent_str} and "
|
|
1101
|
+
|
|
1102
|
+
# If child is a condition (contains ':'), wrap it in parentheses
|
|
1103
|
+
combined += if child_str.include?(':')
|
|
1104
|
+
# Add parens if not already present
|
|
1105
|
+
if child_str.start_with?('(') && child_str.end_with?(')')
|
|
1106
|
+
child_str
|
|
1107
|
+
else
|
|
1108
|
+
"(#{child_str})"
|
|
1109
|
+
end
|
|
1110
|
+
else
|
|
1111
|
+
child_str
|
|
1112
|
+
end
|
|
1113
|
+
|
|
1114
|
+
combined.to_sym
|
|
1115
|
+
end
|
|
1116
|
+
|
|
1117
|
+
# Skip to next semicolon or closing brace (error recovery)
|
|
1118
|
+
def skip_to_semicolon_or_brace
|
|
1119
|
+
until eof? || peek_byte == BYTE_SEMICOLON || peek_byte == BYTE_RBRACE # Flip to save a not_opt instruction: while !eof? && peek_byte != BYTE_SEMICOLON && peek_byte != BYTE_RBRACE
|
|
1120
|
+
@pos += 1
|
|
1121
|
+
end
|
|
1122
|
+
|
|
1123
|
+
@pos += 1 if peek_byte == BYTE_SEMICOLON # consume semicolon
|
|
1124
|
+
end
|
|
1125
|
+
|
|
1126
|
+
# Skip @import statements at the beginning of CSS
|
|
1127
|
+
# Per CSS spec, @import must come before all rules (except @charset)
|
|
1128
|
+
def skip_imports
|
|
1129
|
+
until eof?
|
|
1130
|
+
# Skip whitespace
|
|
1131
|
+
while !eof? && whitespace?(peek_byte)
|
|
1132
|
+
@pos += 1
|
|
1133
|
+
end
|
|
1134
|
+
break if eof?
|
|
1135
|
+
|
|
1136
|
+
# Skip comments
|
|
1137
|
+
if @pos + 1 < @len && @css.getbyte(@pos) == BYTE_SLASH && @css.getbyte(@pos + 1) == BYTE_STAR
|
|
1138
|
+
@pos += 2
|
|
1139
|
+
while @pos + 1 < @len
|
|
1140
|
+
if @css.getbyte(@pos) == BYTE_STAR && @css.getbyte(@pos + 1) == BYTE_SLASH
|
|
1141
|
+
@pos += 2
|
|
1142
|
+
break
|
|
1143
|
+
end
|
|
1144
|
+
@pos += 1
|
|
1145
|
+
end
|
|
1146
|
+
next
|
|
1147
|
+
end
|
|
1148
|
+
|
|
1149
|
+
# Check for @import (case-insensitive byte comparison)
|
|
1150
|
+
if @pos + 7 <= @len && @css.getbyte(@pos) == BYTE_AT && match_ascii_ci?(@css, @pos + 1, 'import')
|
|
1151
|
+
# Check that it's followed by whitespace or quote
|
|
1152
|
+
if @pos + 7 >= @len || whitespace?(@css.getbyte(@pos + 7)) || @css.getbyte(@pos + 7) == BYTE_SQUOTE || @css.getbyte(@pos + 7) == BYTE_DQUOTE
|
|
1153
|
+
# Skip to semicolon
|
|
1154
|
+
while !eof? && peek_byte != BYTE_SEMICOLON
|
|
1155
|
+
@pos += 1
|
|
1156
|
+
end
|
|
1157
|
+
@pos += 1 unless eof? # Skip semicolon
|
|
1158
|
+
next
|
|
1159
|
+
end
|
|
1160
|
+
end
|
|
1161
|
+
|
|
1162
|
+
# Hit non-@import content, stop skipping
|
|
1163
|
+
break
|
|
1164
|
+
end
|
|
1165
|
+
end
|
|
1166
|
+
|
|
1167
|
+
# Parse a block of declarations given start/end positions
|
|
1168
|
+
# Used for @font-face and other at-rules
|
|
1169
|
+
# Translated from C: see ext/cataract/css_parser.c parse_declarations
|
|
1170
|
+
def parse_declarations_block(start_pos, end_pos)
|
|
1171
|
+
declarations = []
|
|
1172
|
+
pos = start_pos
|
|
1173
|
+
|
|
1174
|
+
while pos < end_pos
|
|
1175
|
+
# Skip whitespace
|
|
1176
|
+
while pos < end_pos && whitespace?(@css.getbyte(pos))
|
|
1177
|
+
pos += 1
|
|
1178
|
+
end
|
|
1179
|
+
break if pos >= end_pos
|
|
1180
|
+
|
|
1181
|
+
# Parse property name (read until ':')
|
|
1182
|
+
prop_start = pos
|
|
1183
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_COLON && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
|
|
1184
|
+
pos += 1
|
|
1185
|
+
end
|
|
1186
|
+
|
|
1187
|
+
# Skip if no colon found (malformed)
|
|
1188
|
+
if pos >= end_pos || @css.getbyte(pos) != BYTE_COLON
|
|
1189
|
+
# Try to recover by finding next semicolon
|
|
1190
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON
|
|
1191
|
+
pos += 1
|
|
1192
|
+
end
|
|
1193
|
+
pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
|
|
1194
|
+
next
|
|
1195
|
+
end
|
|
1196
|
+
|
|
1197
|
+
prop_end = pos
|
|
1198
|
+
# Trim trailing whitespace from property
|
|
1199
|
+
while prop_end > prop_start && whitespace?(@css.getbyte(prop_end - 1))
|
|
1200
|
+
prop_end -= 1
|
|
1201
|
+
end
|
|
1202
|
+
|
|
1203
|
+
property = byteslice_encoded(prop_start, prop_end - prop_start, encoding: 'US-ASCII')
|
|
1204
|
+
property.downcase!
|
|
1205
|
+
|
|
1206
|
+
pos += 1 # Skip ':'
|
|
1207
|
+
|
|
1208
|
+
# Skip leading whitespace in value
|
|
1209
|
+
while pos < end_pos && whitespace?(@css.getbyte(pos))
|
|
1210
|
+
pos += 1
|
|
1211
|
+
end
|
|
1212
|
+
|
|
1213
|
+
# Parse value (read until ';' or '}')
|
|
1214
|
+
val_start = pos
|
|
1215
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
|
|
1216
|
+
pos += 1
|
|
1217
|
+
end
|
|
1218
|
+
val_end = pos
|
|
1219
|
+
|
|
1220
|
+
# Trim trailing whitespace from value
|
|
1221
|
+
while val_end > val_start && whitespace?(@css.getbyte(val_end - 1))
|
|
1222
|
+
val_end -= 1
|
|
1223
|
+
end
|
|
1224
|
+
|
|
1225
|
+
value = byteslice_encoded(val_start, val_end - val_start)
|
|
1226
|
+
|
|
1227
|
+
pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
|
|
1228
|
+
|
|
1229
|
+
# Create Declaration struct (at-rules don't use !important)
|
|
1230
|
+
declarations << Declaration.new(property, value, false)
|
|
1231
|
+
end
|
|
1232
|
+
|
|
1233
|
+
declarations
|
|
1234
|
+
end
|
|
1235
|
+
end
|
|
1236
|
+
end
|