cataract 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci-manual-rubies.yml +44 -0
  3. data/.overcommit.yml +1 -1
  4. data/.rubocop.yml +96 -4
  5. data/.rubocop_todo.yml +186 -0
  6. data/BENCHMARKS.md +62 -141
  7. data/CHANGELOG.md +20 -0
  8. data/RAGEL_MIGRATION.md +2 -2
  9. data/README.md +37 -4
  10. data/Rakefile +72 -32
  11. data/cataract.gemspec +4 -1
  12. data/ext/cataract/cataract.c +59 -50
  13. data/ext/cataract/cataract.h +5 -3
  14. data/ext/cataract/css_parser.c +173 -65
  15. data/ext/cataract/extconf.rb +2 -2
  16. data/ext/cataract/{merge.c → flatten.c} +526 -468
  17. data/ext/cataract/shorthand_expander.c +164 -115
  18. data/lib/cataract/at_rule.rb +8 -9
  19. data/lib/cataract/declaration.rb +18 -0
  20. data/lib/cataract/import_resolver.rb +63 -43
  21. data/lib/cataract/import_statement.rb +49 -0
  22. data/lib/cataract/pure/byte_constants.rb +69 -0
  23. data/lib/cataract/pure/flatten.rb +1145 -0
  24. data/lib/cataract/pure/helpers.rb +35 -0
  25. data/lib/cataract/pure/imports.rb +268 -0
  26. data/lib/cataract/pure/parser.rb +1340 -0
  27. data/lib/cataract/pure/serializer.rb +590 -0
  28. data/lib/cataract/pure/specificity.rb +206 -0
  29. data/lib/cataract/pure.rb +153 -0
  30. data/lib/cataract/rule.rb +69 -15
  31. data/lib/cataract/stylesheet.rb +356 -49
  32. data/lib/cataract/version.rb +1 -1
  33. data/lib/cataract.rb +43 -26
  34. metadata +14 -26
  35. data/benchmarks/benchmark_harness.rb +0 -193
  36. data/benchmarks/benchmark_merging.rb +0 -121
  37. data/benchmarks/benchmark_optimization_comparison.rb +0 -168
  38. data/benchmarks/benchmark_parsing.rb +0 -153
  39. data/benchmarks/benchmark_ragel_removal.rb +0 -56
  40. data/benchmarks/benchmark_runner.rb +0 -70
  41. data/benchmarks/benchmark_serialization.rb +0 -180
  42. data/benchmarks/benchmark_shorthand.rb +0 -109
  43. data/benchmarks/benchmark_shorthand_expansion.rb +0 -176
  44. data/benchmarks/benchmark_specificity.rb +0 -124
  45. data/benchmarks/benchmark_string_allocation.rb +0 -151
  46. data/benchmarks/benchmark_stylesheet_to_s.rb +0 -62
  47. data/benchmarks/benchmark_to_s_cached.rb +0 -55
  48. data/benchmarks/benchmark_value_splitter.rb +0 -54
  49. data/benchmarks/benchmark_yjit.rb +0 -158
  50. data/benchmarks/benchmark_yjit_workers.rb +0 -61
  51. data/benchmarks/profile_to_s.rb +0 -23
  52. data/benchmarks/speedup_calculator.rb +0 -83
  53. data/benchmarks/system_metadata.rb +0 -81
  54. data/benchmarks/templates/benchmarks.md.erb +0 -221
  55. data/benchmarks/yjit_tests.rb +0 -141
  56. data/scripts/fuzzer/run.rb +0 -828
  57. data/scripts/fuzzer/worker.rb +0 -99
  58. data/scripts/generate_benchmarks_md.rb +0 -155
@@ -0,0 +1,1340 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Pure Ruby CSS parser - Parser class
4
+ #
5
+ # IMPORTANT: This code is intentionally written in a non-idiomatic style.
6
+ # - Performance comes first - mirrors the C implementation
7
+ # - Character-by-character parsing (NO REGEXP)
8
+ # - Minimal abstraction, lots of state mutation
9
+ # - Optimized for speed, not readability
10
+ #
11
+ # Do NOT refactor to "clean Ruby" without benchmarking - you will make it slower.
12
+ #
13
+ # Example: RuboCop suggests using `.positive?` instead of `> 0`, but benchmarking
14
+ # shows `> 0` is 1.26x faster (see benchmark_positive.rb). These micro-optimizations
15
+ # matter in a hot parsing loop.
16
+
17
+ module Cataract
18
+ # Pure Ruby CSS parser - char-by-char, NO REGEXP
19
+ class Parser
20
+ # Maximum parse depth (prevent infinite recursion)
21
+ MAX_PARSE_DEPTH = 10
22
+
23
+ # Maximum media queries (prevent symbol table exhaustion)
24
+ MAX_MEDIA_QUERIES = 1000
25
+
26
+ # Maximum property name/value lengths
27
+ MAX_PROPERTY_NAME_LENGTH = 256
28
+ MAX_PROPERTY_VALUE_LENGTH = 32_768
29
+
30
+ AT_RULE_TYPES = %w[supports layer container scope].freeze
31
+
32
+ attr_reader :css, :pos, :len
33
+
34
+ # Extract substring and force specified encoding
35
+ # Per CSS spec, charset detection happens at byte-stream level before parsing.
36
+ # All parsing operations treat content as UTF-8 (spec requires fallback to UTF-8).
37
+ # This prevents ArgumentError on broken/invalid encodings when calling string methods.
38
+ # Optional encoding parameter (default: 'UTF-8', use 'US-ASCII' for property names)
39
+ def byteslice_encoded(start, length, encoding: 'UTF-8')
40
+ @css.byteslice(start, length).force_encoding(encoding)
41
+ end
42
+
43
+ # Helper: Case-insensitive ASCII byte comparison
44
+ # Compares bytes at given position with ASCII pattern (case-insensitive)
45
+ # Safe to use even if position is in middle of multi-byte UTF-8 characters
46
+ # Returns true if match, false otherwise
47
+ def match_ascii_ci?(str, pos, pattern)
48
+ pattern_len = pattern.bytesize
49
+ return false if pos + pattern_len > str.bytesize
50
+
51
+ i = 0
52
+ while i < pattern_len
53
+ str_byte = str.getbyte(pos + i)
54
+ pat_byte = pattern.getbyte(i)
55
+
56
+ # Convert both to lowercase for comparison (ASCII only: A-Z -> a-z)
57
+ str_byte += BYTE_CASE_DIFF if str_byte >= BYTE_UPPER_A && str_byte <= BYTE_UPPER_Z
58
+ pat_byte += BYTE_CASE_DIFF if pat_byte >= BYTE_UPPER_A && pat_byte <= BYTE_UPPER_Z
59
+
60
+ return false if str_byte != pat_byte
61
+
62
+ i += 1
63
+ end
64
+
65
+ true
66
+ end
67
+
68
+ def initialize(css_string, parent_media_sym: nil, depth: 0)
69
+ @css = css_string.dup.freeze
70
+ @pos = 0
71
+ @len = @css.bytesize
72
+ @parent_media_sym = parent_media_sym
73
+
74
+ # Parser state
75
+ @rules = [] # Flat array of Rule structs
76
+ @_media_index = {} # Symbol => Array of rule IDs
77
+ @imports = [] # Array of ImportStatement structs
78
+ @rule_id_counter = 0 # Next rule ID (0-indexed)
79
+ @media_query_count = 0 # Safety limit
80
+ @_has_nesting = false # Set to true if any nested rules found
81
+ @depth = depth # Current recursion depth (passed from parent parser)
82
+ @charset = nil # @charset declaration
83
+ end
84
+
85
+ def parse
86
+ # @import statements are now handled in parse_at_rule
87
+ # They must come before all rules (except @charset) per CSS spec
88
+
89
+ # Main parsing loop - char-by-char, NO REGEXP
90
+ until eof?
91
+ skip_ws_and_comments
92
+ break if eof?
93
+
94
+ # Peek at next byte to determine what to parse
95
+ byte = peek_byte
96
+
97
+ # Check for at-rules (@media, @charset, etc)
98
+ if byte == BYTE_AT
99
+ parse_at_rule
100
+ next
101
+ end
102
+
103
+ # Must be a selector-based rule
104
+ selector = parse_selector
105
+
106
+ next if selector.nil? || selector.empty?
107
+
108
+ # Find the block boundaries
109
+ decl_start = @pos # Should be right after the {
110
+ decl_end = find_matching_brace(decl_start)
111
+
112
+ # Check if block has nested selectors
113
+ if has_nested_selectors?(decl_start, decl_end)
114
+ # NESTED PATH: Parse mixed declarations + nested rules
115
+ # Split comma-separated selectors and parse each one
116
+ selectors = selector.split(',')
117
+
118
+ selectors.each do |individual_selector|
119
+ individual_selector.strip!
120
+ next if individual_selector.empty?
121
+
122
+ # Get rule ID for this selector
123
+ current_rule_id = @rule_id_counter
124
+ @rule_id_counter += 1
125
+
126
+ # Reserve parent's position in rules array (ensures parent comes before nested)
127
+ parent_position = @rules.length
128
+ @rules << nil # Placeholder
129
+
130
+ # Parse mixed block (declarations + nested selectors)
131
+ @depth += 1
132
+ parent_declarations = parse_mixed_block(decl_start, decl_end,
133
+ individual_selector, current_rule_id, @parent_media_sym)
134
+ @depth -= 1
135
+
136
+ # Create parent rule and replace placeholder
137
+ rule = Rule.new(
138
+ current_rule_id,
139
+ individual_selector,
140
+ parent_declarations,
141
+ nil, # specificity
142
+ nil, # parent_rule_id (top-level)
143
+ nil # nesting_style
144
+ )
145
+
146
+ @rules[parent_position] = rule
147
+ @_media_index[@parent_media_sym] ||= [] if @parent_media_sym
148
+ @_media_index[@parent_media_sym] << current_rule_id if @parent_media_sym
149
+ end
150
+
151
+ # Move position past the closing brace
152
+ @pos = decl_end
153
+ @pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
154
+ else
155
+ # NON-NESTED PATH: Parse declarations only
156
+ @pos = decl_start # Reset to start of block
157
+ declarations = parse_declarations
158
+
159
+ # Split comma-separated selectors into individual rules
160
+ selectors = selector.split(',')
161
+
162
+ selectors.each do |individual_selector|
163
+ individual_selector.strip!
164
+ next if individual_selector.empty?
165
+
166
+ # Create Rule struct
167
+ rule = Rule.new(
168
+ @rule_id_counter, # id
169
+ individual_selector, # selector
170
+ declarations, # declarations
171
+ nil, # specificity (calculated lazily)
172
+ nil, # parent_rule_id
173
+ nil # nesting_style
174
+ )
175
+
176
+ @rules << rule
177
+ @rule_id_counter += 1
178
+ end
179
+ end
180
+ end
181
+
182
+ {
183
+ rules: @rules,
184
+ _media_index: @_media_index,
185
+ imports: @imports,
186
+ charset: @charset,
187
+ _has_nesting: @_has_nesting
188
+ }
189
+ end
190
+
191
+ private
192
+
193
+ # Check if we're at end of input
194
+ def eof?
195
+ @pos >= @len
196
+ end
197
+
198
+ # Peek current byte without advancing
199
+ # @return [Integer, nil] Byte value or nil if EOF
200
+ def peek_byte
201
+ return nil if eof?
202
+
203
+ @css.getbyte(@pos)
204
+ end
205
+
206
+ # Delegate to module-level helper methods (now work with bytes)
207
+ def whitespace?(byte)
208
+ Cataract.is_whitespace?(byte)
209
+ end
210
+
211
+ def letter?(byte)
212
+ Cataract.letter?(byte)
213
+ end
214
+
215
+ def digit?(byte)
216
+ Cataract.digit?(byte)
217
+ end
218
+
219
+ def ident_char?(byte)
220
+ Cataract.ident_char?(byte)
221
+ end
222
+
223
+ def skip_whitespace
224
+ @pos += 1 while !eof? && whitespace?(peek_byte)
225
+ end
226
+
227
+ def skip_comment # rubocop:disable Naming/PredicateMethod
228
+ return false unless peek_byte == BYTE_SLASH && @css.getbyte(@pos + 1) == BYTE_STAR
229
+
230
+ @pos += 2 # Skip /*
231
+ while @pos + 1 < @len
232
+ if @css.getbyte(@pos) == BYTE_STAR && @css.getbyte(@pos + 1) == BYTE_SLASH
233
+ @pos += 2 # Skip */
234
+ return true
235
+ end
236
+ @pos += 1
237
+ end
238
+ true
239
+ end
240
+
241
+ # Skip whitespace and comments
242
+ def skip_ws_and_comments
243
+ loop do
244
+ old_pos = @pos
245
+ skip_whitespace
246
+ skip_comment
247
+ break if @pos == old_pos # No progress made
248
+ end
249
+ end
250
+
251
+ # Find matching closing brace
252
+ # Translated from C: see ext/cataract/css_parser.c find_matching_brace
253
+ def find_matching_brace(start_pos)
254
+ depth = 1
255
+ pos = start_pos
256
+
257
+ while pos < @len
258
+ byte = @css.getbyte(pos)
259
+ if byte == BYTE_LBRACE
260
+ depth += 1
261
+ elsif byte == BYTE_RBRACE
262
+ depth -= 1
263
+ break if depth == 0 # Found matching brace, exit immediately
264
+ end
265
+ pos += 1
266
+ end
267
+
268
+ pos
269
+ end
270
+
271
+ # Parse selector (read until '{')
272
+ def parse_selector
273
+ start_pos = @pos
274
+
275
+ # Read until we find '{'
276
+ until eof? || peek_byte == BYTE_LBRACE # Flip to save a 'opt_not' instruction: while !eof? && peek_byte != BYTE_LBRACE
277
+ @pos += 1
278
+ end
279
+
280
+ # If we hit EOF without finding '{', return nil
281
+ return nil if eof?
282
+
283
+ # Extract selector text
284
+ selector_text = byteslice_encoded(start_pos, @pos - start_pos)
285
+
286
+ # Skip the '{'
287
+ @pos += 1 if peek_byte == BYTE_LBRACE
288
+
289
+ # Trim whitespace from selector (in-place to avoid allocation)
290
+ selector_text.strip!
291
+ end
292
+
293
+ # Parse mixed block containing declarations AND nested selectors/at-rules
294
+ # Translated from C: see ext/cataract/css_parser.c parse_mixed_block
295
+ # Returns: Array of declarations (only the declarations, not nested rules)
296
+ def parse_mixed_block(start_pos, end_pos, parent_selector, parent_rule_id, parent_media_sym)
297
+ # Check recursion depth to prevent stack overflow
298
+ if @depth > MAX_PARSE_DEPTH
299
+ raise DepthError, "CSS nesting too deep: exceeded maximum depth of #{MAX_PARSE_DEPTH}"
300
+ end
301
+
302
+ declarations = []
303
+ pos = start_pos
304
+
305
+ while pos < end_pos
306
+ # Skip whitespace and comments
307
+ while pos < end_pos && whitespace?(@css.getbyte(pos))
308
+ pos += 1
309
+ end
310
+ break if pos >= end_pos
311
+
312
+ # Skip comments
313
+ if pos + 1 < end_pos && @css.getbyte(pos) == BYTE_SLASH && @css.getbyte(pos + 1) == BYTE_STAR
314
+ pos += 2
315
+ while pos + 1 < end_pos
316
+ if @css.getbyte(pos) == BYTE_STAR && @css.getbyte(pos + 1) == BYTE_SLASH
317
+ pos += 2
318
+ break
319
+ end
320
+ pos += 1
321
+ end
322
+ next
323
+ end
324
+
325
+ # Check if this is a nested @media query
326
+ if @css.getbyte(pos) == BYTE_AT && pos + 6 < end_pos &&
327
+ byteslice_encoded(pos, 6) == '@media' &&
328
+ (pos + 6 >= end_pos || whitespace?(@css.getbyte(pos + 6)))
329
+ # Nested @media - parse with parent selector as context
330
+ media_start = pos + 6
331
+ while media_start < end_pos && whitespace?(@css.getbyte(media_start))
332
+ media_start += 1
333
+ end
334
+
335
+ # Find opening brace
336
+ media_query_end = media_start
337
+ while media_query_end < end_pos && @css.getbyte(media_query_end) != BYTE_LBRACE
338
+ media_query_end += 1
339
+ end
340
+ break if media_query_end >= end_pos
341
+
342
+ # Extract media query (trim trailing whitespace)
343
+ media_query_end_trimmed = media_query_end
344
+ while media_query_end_trimmed > media_start && whitespace?(@css.getbyte(media_query_end_trimmed - 1))
345
+ media_query_end_trimmed -= 1
346
+ end
347
+ media_query_str = byteslice_encoded(media_start, media_query_end_trimmed - media_start)
348
+ # Keep media query exactly as written - parentheses are required per CSS spec
349
+ media_query_str.strip!
350
+ media_sym = media_query_str.to_sym
351
+
352
+ pos = media_query_end + 1 # Skip {
353
+
354
+ # Find matching closing brace
355
+ media_block_start = pos
356
+ media_block_end = find_matching_brace(pos)
357
+ pos = media_block_end
358
+ pos += 1 if pos < end_pos # Skip }
359
+
360
+ # Combine media queries: parent + child
361
+ combined_media_sym = combine_media_queries(parent_media_sym, media_sym)
362
+
363
+ # Create rule ID for this media rule
364
+ media_rule_id = @rule_id_counter
365
+ @rule_id_counter += 1
366
+
367
+ # Parse mixed block recursively
368
+ @depth += 1
369
+ media_declarations = parse_mixed_block(media_block_start, media_block_end,
370
+ parent_selector, media_rule_id, combined_media_sym)
371
+ @depth -= 1
372
+
373
+ # Create rule with parent selector and declarations, associated with combined media query
374
+ rule = Rule.new(
375
+ media_rule_id,
376
+ parent_selector,
377
+ media_declarations,
378
+ nil, # specificity
379
+ parent_rule_id,
380
+ nil # nesting_style (nil for @media nesting)
381
+ )
382
+
383
+ # Mark that we have nesting
384
+ @_has_nesting = true unless parent_rule_id.nil?
385
+
386
+ @rules << rule
387
+ @_media_index[combined_media_sym] ||= []
388
+ @_media_index[combined_media_sym] << media_rule_id
389
+
390
+ next
391
+ end
392
+
393
+ # Check if this is a nested selector
394
+ byte = @css.getbyte(pos)
395
+ if byte == BYTE_AMPERSAND || byte == BYTE_DOT || byte == BYTE_HASH ||
396
+ byte == BYTE_LBRACKET || byte == BYTE_COLON || byte == BYTE_ASTERISK ||
397
+ byte == BYTE_GT || byte == BYTE_PLUS || byte == BYTE_TILDE || byte == BYTE_AT
398
+ # Find the opening brace
399
+ nested_sel_start = pos
400
+ while pos < end_pos && @css.getbyte(pos) != BYTE_LBRACE
401
+ pos += 1
402
+ end
403
+ break if pos >= end_pos
404
+
405
+ nested_sel_end = pos
406
+ # Trim trailing whitespace
407
+ while nested_sel_end > nested_sel_start && whitespace?(@css.getbyte(nested_sel_end - 1))
408
+ nested_sel_end -= 1
409
+ end
410
+
411
+ pos += 1 # Skip {
412
+
413
+ # Find matching closing brace
414
+ nested_block_start = pos
415
+ nested_block_end = find_matching_brace(pos)
416
+ pos = nested_block_end
417
+ pos += 1 if pos < end_pos # Skip }
418
+
419
+ # Extract nested selector and split on commas
420
+ nested_selector_text = byteslice_encoded(nested_sel_start, nested_sel_end - nested_sel_start)
421
+ nested_selectors = nested_selector_text.split(',')
422
+
423
+ nested_selectors.each do |seg|
424
+ seg.strip!
425
+ next if seg.empty?
426
+
427
+ # Resolve nested selector
428
+ resolved_selector, nesting_style = resolve_nested_selector(parent_selector, seg)
429
+
430
+ # Get rule ID
431
+ rule_id = @rule_id_counter
432
+ @rule_id_counter += 1
433
+
434
+ # Recursively parse nested block
435
+ @depth += 1
436
+ nested_declarations = parse_mixed_block(nested_block_start, nested_block_end,
437
+ resolved_selector, rule_id, parent_media_sym)
438
+ @depth -= 1
439
+
440
+ # Create rule for nested selector
441
+ rule = Rule.new(
442
+ rule_id,
443
+ resolved_selector,
444
+ nested_declarations,
445
+ nil, # specificity
446
+ parent_rule_id,
447
+ nesting_style
448
+ )
449
+
450
+ # Mark that we have nesting
451
+ @_has_nesting = true unless parent_rule_id.nil?
452
+
453
+ @rules << rule
454
+ @_media_index[parent_media_sym] ||= [] if parent_media_sym
455
+ @_media_index[parent_media_sym] << rule_id if parent_media_sym
456
+ end
457
+
458
+ next
459
+ end
460
+
461
+ # This is a declaration - parse it
462
+ prop_start = pos
463
+ while pos < end_pos && @css.getbyte(pos) != BYTE_COLON &&
464
+ @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_LBRACE
465
+ pos += 1
466
+ end
467
+
468
+ if pos >= end_pos || @css.getbyte(pos) != BYTE_COLON
469
+ # Malformed - skip to semicolon
470
+ while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON
471
+ pos += 1
472
+ end
473
+ pos += 1 if pos < end_pos
474
+ next
475
+ end
476
+
477
+ prop_end = pos
478
+ # Trim trailing whitespace
479
+ while prop_end > prop_start && whitespace?(@css.getbyte(prop_end - 1))
480
+ prop_end -= 1
481
+ end
482
+
483
+ property = byteslice_encoded(prop_start, prop_end - prop_start, encoding: 'US-ASCII')
484
+ property.downcase!
485
+
486
+ pos += 1 # Skip :
487
+
488
+ # Skip leading whitespace in value
489
+ while pos < end_pos && whitespace?(@css.getbyte(pos))
490
+ pos += 1
491
+ end
492
+
493
+ # Parse value (read until ';' or '}')
494
+ val_start = pos
495
+ while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
496
+ pos += 1
497
+ end
498
+ val_end = pos
499
+
500
+ # Trim trailing whitespace from value
501
+ while val_end > val_start && whitespace?(@css.getbyte(val_end - 1))
502
+ val_end -= 1
503
+ end
504
+
505
+ value = byteslice_encoded(val_start, val_end - val_start)
506
+
507
+ # Check for !important flag
508
+ important = false
509
+ if value.end_with?('!important')
510
+ important = true
511
+ # NOTE: Using rstrip here instead of manual byte loop since !important is rare (not hot path)
512
+ value = value[0, value.length - 10].rstrip # Remove '!important' and trailing whitespace
513
+ end
514
+
515
+ pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
516
+
517
+ # Create declaration
518
+ declarations << Declaration.new(property, value, important) if prop_end > prop_start && val_end > val_start
519
+ end
520
+
521
+ declarations
522
+ end
523
+
524
+ # Parse declaration block (inside { ... })
525
+ # Assumes we're already past the opening '{'
526
+ def parse_declarations
527
+ declarations = []
528
+
529
+ # Read until we find the closing '}'
530
+ until eof?
531
+ skip_ws_and_comments
532
+ break if eof?
533
+
534
+ # Check for closing brace
535
+ if peek_byte == BYTE_RBRACE
536
+ @pos += 1 # consume '}'
537
+ break
538
+ end
539
+
540
+ # Parse property name (read until ':')
541
+ property_start = @pos
542
+ until eof?
543
+ byte = peek_byte
544
+ break if byte == BYTE_COLON || byte == BYTE_SEMICOLON || byte == BYTE_RBRACE
545
+
546
+ @pos += 1
547
+ end
548
+
549
+ # Skip if no colon found (malformed)
550
+ if eof? || peek_byte != BYTE_COLON
551
+ # Try to recover by finding next ; or }
552
+ skip_to_semicolon_or_brace
553
+ next
554
+ end
555
+
556
+ property = byteslice_encoded(property_start, @pos - property_start, encoding: 'US-ASCII')
557
+ property.strip!
558
+ property.downcase!
559
+ @pos += 1 # skip ':'
560
+
561
+ skip_ws_and_comments
562
+
563
+ # Parse value (read until ';' or '}')
564
+ value_start = @pos
565
+ important = false
566
+
567
+ until eof?
568
+ byte = peek_byte
569
+ break if byte == BYTE_SEMICOLON || byte == BYTE_RBRACE
570
+
571
+ @pos += 1
572
+ end
573
+
574
+ value = byteslice_encoded(value_start, @pos - value_start)
575
+ value.strip!
576
+
577
+ # Check for !important (byte-by-byte, no regexp)
578
+ if value.bytesize > 10
579
+ # Scan backwards to find !important
580
+ i = value.bytesize - 1
581
+ # Skip trailing whitespace
582
+ while i >= 0
583
+ b = value.getbyte(i)
584
+ break unless b == BYTE_SPACE || b == BYTE_TAB
585
+
586
+ i -= 1
587
+ end
588
+
589
+ # Check for 'important' (9 chars)
590
+ if i >= 8 && value[(i - 8)..i] == 'important'
591
+ i -= 9
592
+ # Skip whitespace before 'important'
593
+ while i >= 0
594
+ b = value.getbyte(i)
595
+ break unless b == BYTE_SPACE || b == BYTE_TAB
596
+
597
+ i -= 1
598
+ end
599
+ # Check for '!'
600
+ if i >= 0 && value.getbyte(i) == BYTE_BANG
601
+ important = true
602
+ # Remove everything from '!' onwards (use byteslice and strip in-place)
603
+ value = value.byteslice(0, i)
604
+ value.strip!
605
+ end
606
+ end
607
+ end
608
+
609
+ # Skip semicolon if present
610
+ @pos += 1 if peek_byte == BYTE_SEMICOLON
611
+
612
+ # Create Declaration struct
613
+ declarations << Declaration.new(property, value, important)
614
+ end
615
+
616
+ declarations
617
+ end
618
+
619
+ # Parse at-rule (@media, @supports, @charset, @keyframes, @font-face, etc)
620
+ # Translated from C: see ext/cataract/css_parser.c lines 962-1128
621
+ def parse_at_rule
622
+ at_rule_start = @pos # Points to '@'
623
+ @pos += 1 # skip '@'
624
+
625
+ # Find end of at-rule name (stop at whitespace or opening brace)
626
+ name_start = @pos
627
+ until eof?
628
+ byte = peek_byte
629
+ break if whitespace?(byte) || byte == BYTE_LBRACE
630
+
631
+ @pos += 1
632
+ end
633
+
634
+ at_rule_name = byteslice_encoded(name_start, @pos - name_start)
635
+
636
+ # Handle @charset specially - it's just @charset "value";
637
+ if at_rule_name == 'charset'
638
+ skip_ws_and_comments
639
+ # Read until semicolon
640
+ value_start = @pos
641
+ while !eof? && peek_byte != BYTE_SEMICOLON
642
+ @pos += 1
643
+ end
644
+
645
+ charset_value = byteslice_encoded(value_start, @pos - value_start)
646
+ charset_value.strip!
647
+ # Remove quotes (byte-by-byte)
648
+ result = String.new
649
+ i = 0
650
+ len = charset_value.bytesize
651
+ while i < len
652
+ byte = charset_value.getbyte(i)
653
+ result << charset_value[i] unless byte == BYTE_DQUOTE || byte == BYTE_SQUOTE
654
+ i += 1
655
+ end
656
+ @charset = result
657
+
658
+ @pos += 1 if peek_byte == BYTE_SEMICOLON # consume semicolon
659
+ return
660
+ end
661
+
662
+ # Handle @import - must come before rules (except @charset)
663
+ if at_rule_name == 'import'
664
+ # If we've already seen a rule, this @import is invalid
665
+ if @rules.size > 0
666
+ warn 'CSS @import ignored: @import must appear before all rules (found import after rules)'
667
+ # Skip to semicolon
668
+ while !eof? && peek_byte != BYTE_SEMICOLON
669
+ @pos += 1
670
+ end
671
+ @pos += 1 if peek_byte == BYTE_SEMICOLON
672
+ return
673
+ end
674
+
675
+ parse_import_statement
676
+ return
677
+ end
678
+
679
+ # Handle conditional group at-rules: @supports, @layer, @container, @scope
680
+ # These behave like @media but don't affect media context
681
+ if AT_RULE_TYPES.include?(at_rule_name)
682
+ skip_ws_and_comments
683
+
684
+ # Skip to opening brace
685
+ while !eof? && peek_byte != BYTE_LBRACE
686
+ @pos += 1
687
+ end
688
+
689
+ return if eof? || peek_byte != BYTE_LBRACE
690
+
691
+ @pos += 1 # skip '{'
692
+
693
+ # Find matching closing brace
694
+ block_start = @pos
695
+ block_end = find_matching_brace(@pos)
696
+
697
+ # Check depth before recursing
698
+ if @depth + 1 > MAX_PARSE_DEPTH
699
+ raise DepthError, "CSS nesting too deep: exceeded maximum depth of #{MAX_PARSE_DEPTH}"
700
+ end
701
+
702
+ # Recursively parse block content (preserve parent media context)
703
+ nested_parser = Parser.new(
704
+ byteslice_encoded(block_start, block_end - block_start),
705
+ parent_media_sym: @parent_media_sym, depth: @depth + 1
706
+ )
707
+
708
+ nested_result = nested_parser.parse
709
+
710
+ # Merge nested media_index into ours
711
+ nested_result[:_media_index].each do |media, rule_ids|
712
+ @_media_index[media] ||= []
713
+ # Use each + << instead of concat + map (1.20x faster for small arrays)
714
+ rule_ids.each { |rid| @_media_index[media] << (@rule_id_counter + rid) }
715
+ end
716
+
717
+ # Add nested rules to main rules array
718
+ nested_result[:rules].each do |rule|
719
+ rule.id = @rule_id_counter
720
+ @rule_id_counter += 1
721
+ @rules << rule
722
+ end
723
+
724
+ # Move position past the closing brace
725
+ @pos = block_end
726
+ @pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
727
+
728
+ return
729
+ end
730
+
731
+ # Handle @media specially - parse content and track in media_index
732
+ if at_rule_name == 'media'
733
+ skip_ws_and_comments
734
+
735
+ # Find media query (up to opening brace)
736
+ mq_start = @pos
737
+ while !eof? && peek_byte != BYTE_LBRACE
738
+ @pos += 1
739
+ end
740
+
741
+ return if eof? || peek_byte != BYTE_LBRACE
742
+
743
+ mq_end = @pos
744
+ # Trim trailing whitespace
745
+ while mq_end > mq_start && whitespace?(@css.getbyte(mq_end - 1))
746
+ mq_end -= 1
747
+ end
748
+
749
+ child_media_string = byteslice_encoded(mq_start, mq_end - mq_start)
750
+ # Keep media query exactly as written - parentheses are required per CSS spec
751
+ child_media_string.strip!
752
+ child_media_sym = child_media_string.to_sym
753
+
754
+ # Combine with parent media context
755
+ combined_media_sym = combine_media_queries(@parent_media_sym, child_media_sym)
756
+
757
+ # Check media query limit
758
+ unless @_media_index.key?(combined_media_sym)
759
+ @media_query_count += 1
760
+ if @media_query_count > MAX_MEDIA_QUERIES
761
+ raise SizeError, "Too many media queries: exceeded maximum of #{MAX_MEDIA_QUERIES}"
762
+ end
763
+ end
764
+
765
+ @pos += 1 # skip '{'
766
+
767
+ # Find matching closing brace
768
+ block_start = @pos
769
+ block_end = find_matching_brace(@pos)
770
+
771
+ # Check depth before recursing
772
+ if @depth + 1 > MAX_PARSE_DEPTH
773
+ raise DepthError, "CSS nesting too deep: exceeded maximum depth of #{MAX_PARSE_DEPTH}"
774
+ end
775
+
776
+ # Parse the content with the combined media context
777
+ nested_parser = Parser.new(
778
+ byteslice_encoded(block_start, block_end - block_start),
779
+ parent_media_sym: combined_media_sym,
780
+ depth: @depth + 1
781
+ )
782
+
783
+ nested_result = nested_parser.parse
784
+
785
+ # Merge nested media_index into ours (for nested @media)
786
+ nested_result[:_media_index].each do |media, rule_ids|
787
+ @_media_index[media] ||= []
788
+ # Use each + << instead of concat + map (1.20x faster for small arrays)
789
+ rule_ids.each { |rid| @_media_index[media] << (@rule_id_counter + rid) }
790
+ end
791
+
792
+ # Add nested rules to main rules array and update media_index
793
+ nested_result[:rules].each do |rule|
794
+ rule.id = @rule_id_counter
795
+
796
+ # Extract media types and add to each first (if different from full query)
797
+ # We add these BEFORE the full query so that when iterating the media_index hash,
798
+ # the full query comes last and takes precedence during serialization
799
+ media_types = Cataract.parse_media_types(combined_media_sym)
800
+ media_types.each do |media_type|
801
+ # Only add if different from combined_media_sym to avoid duplication
802
+ if media_type != combined_media_sym
803
+ @_media_index[media_type] ||= []
804
+ @_media_index[media_type] << @rule_id_counter
805
+ end
806
+ end
807
+
808
+ # Add to full query symbol (after media types for insertion order)
809
+ @_media_index[combined_media_sym] ||= []
810
+ @_media_index[combined_media_sym] << @rule_id_counter
811
+
812
+ @rule_id_counter += 1
813
+ @rules << rule
814
+ end
815
+
816
+ # Move position past the closing brace
817
+ @pos = block_end
818
+ @pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
819
+
820
+ return
821
+ end
822
+
823
+ # Check for @keyframes (contains <rule-list>)
824
+ is_keyframes = at_rule_name == 'keyframes' ||
825
+ at_rule_name == '-webkit-keyframes' ||
826
+ at_rule_name == '-moz-keyframes'
827
+
828
+ if is_keyframes
829
+ # Build full selector string: "@keyframes fade"
830
+ selector_start = at_rule_start # Points to '@'
831
+
832
+ # Skip to opening brace
833
+ while !eof? && peek_byte != BYTE_LBRACE
834
+ @pos += 1
835
+ end
836
+
837
+ return if eof? || peek_byte != BYTE_LBRACE
838
+
839
+ selector_end = @pos
840
+ # Trim trailing whitespace
841
+ while selector_end > selector_start && whitespace?(@css.getbyte(selector_end - 1))
842
+ selector_end -= 1
843
+ end
844
+ selector = byteslice_encoded(selector_start, selector_end - selector_start)
845
+
846
+ @pos += 1 # skip '{'
847
+
848
+ # Find matching closing brace
849
+ block_start = @pos
850
+ block_end = find_matching_brace(@pos)
851
+
852
+ # Check depth before recursing
853
+ if @depth + 1 > MAX_PARSE_DEPTH
854
+ raise DepthError, "CSS nesting too deep: exceeded maximum depth of #{MAX_PARSE_DEPTH}"
855
+ end
856
+
857
+ # Parse keyframe blocks as rules (0%/from/to etc)
858
+ # Create a nested parser context
859
+ nested_parser = Parser.new(byteslice_encoded(block_start, block_end - block_start), depth: @depth + 1)
860
+ nested_result = nested_parser.parse
861
+ content = nested_result[:rules]
862
+
863
+ # Move position past the closing brace
864
+ @pos = block_end
865
+ # The closing brace should be at block_end
866
+ @pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
867
+
868
+ # Get rule ID and increment
869
+ rule_id = @rule_id_counter
870
+ @rule_id_counter += 1
871
+
872
+ # Create AtRule with nested rules
873
+ at_rule = AtRule.new(rule_id, selector, content, nil)
874
+ @rules << at_rule
875
+
876
+ return
877
+ end
878
+
879
+ # Check for @font-face (contains <declaration-list>)
880
+ if at_rule_name == 'font-face'
881
+ # Build selector string: "@font-face"
882
+ selector_start = at_rule_start # Points to '@'
883
+
884
+ # Skip to opening brace
885
+ while !eof? && peek_byte != BYTE_LBRACE
886
+ @pos += 1
887
+ end
888
+
889
+ return if eof? || peek_byte != BYTE_LBRACE
890
+
891
+ selector_end = @pos
892
+ # Trim trailing whitespace
893
+ while selector_end > selector_start && whitespace?(@css.getbyte(selector_end - 1))
894
+ selector_end -= 1
895
+ end
896
+ selector = byteslice_encoded(selector_start, selector_end - selector_start)
897
+
898
+ @pos += 1 # skip '{'
899
+
900
+ # Find matching closing brace
901
+ decl_start = @pos
902
+ decl_end = find_matching_brace(@pos)
903
+
904
+ # Parse declarations
905
+ content = parse_declarations_block(decl_start, decl_end)
906
+
907
+ # Move position past the closing brace
908
+ @pos = decl_end
909
+ # The closing brace should be at decl_end
910
+ @pos += 1 if @pos < @len && @css.getbyte(@pos) == BYTE_RBRACE
911
+
912
+ # Get rule ID and increment
913
+ rule_id = @rule_id_counter
914
+ @rule_id_counter += 1
915
+
916
+ # Create AtRule with declarations
917
+ at_rule = AtRule.new(rule_id, selector, content, nil)
918
+ @rules << at_rule
919
+
920
+ return
921
+ end
922
+
923
+ # Unknown at-rule (@property, @page, @counter-style, etc.)
924
+ # Treat as a regular selector-based rule with declarations
925
+ selector_start = at_rule_start # Points to '@'
926
+
927
+ # Skip to opening brace
928
+ until eof? || peek_byte == BYTE_LBRACE # Save a not_opt instruction: while !eof? && peek_byte != BYTE_LBRACE
929
+ @pos += 1
930
+ end
931
+
932
+ return if eof? || peek_byte != BYTE_LBRACE
933
+
934
+ selector_end = @pos
935
+ # Trim trailing whitespace
936
+ while selector_end > selector_start && whitespace?(@css.getbyte(selector_end - 1))
937
+ selector_end -= 1
938
+ end
939
+ selector = byteslice_encoded(selector_start, selector_end - selector_start)
940
+
941
+ @pos += 1 # skip '{'
942
+
943
+ # Parse declarations
944
+ declarations = parse_declarations
945
+
946
+ # Create Rule with declarations
947
+ rule = Rule.new(
948
+ @rule_id_counter, # id
949
+ selector, # selector (e.g., "@property --main-color")
950
+ declarations, # declarations
951
+ nil, # specificity
952
+ nil, # parent_rule_id
953
+ nil # nesting_style
954
+ )
955
+
956
+ @rules << rule
957
+ @rule_id_counter += 1
958
+ end
959
+
960
+ # Check if block contains nested selectors vs just declarations
961
+ # Translated from C: see ext/cataract/css_parser.c has_nested_selectors
962
+ def has_nested_selectors?(start_pos, end_pos)
963
+ pos = start_pos
964
+
965
+ while pos < end_pos
966
+ # Skip whitespace
967
+ while pos < end_pos && whitespace?(@css.getbyte(pos))
968
+ pos += 1
969
+ end
970
+ break if pos >= end_pos
971
+
972
+ # Skip comments
973
+ if pos + 1 < end_pos && @css.getbyte(pos) == BYTE_SLASH && @css.getbyte(pos + 1) == BYTE_STAR
974
+ pos += 2
975
+ while pos + 1 < end_pos
976
+ if @css.getbyte(pos) == BYTE_STAR && @css.getbyte(pos + 1) == BYTE_SLASH
977
+ pos += 2
978
+ break
979
+ end
980
+ pos += 1
981
+ end
982
+ next
983
+ end
984
+
985
+ # Check for nested selector indicators
986
+ byte = @css.getbyte(pos)
987
+ if byte == BYTE_AMPERSAND || byte == BYTE_DOT || byte == BYTE_HASH ||
988
+ byte == BYTE_LBRACKET || byte == BYTE_COLON || byte == BYTE_ASTERISK ||
989
+ byte == BYTE_GT || byte == BYTE_PLUS || byte == BYTE_TILDE
990
+ # Look ahead - if followed by {, it's likely a nested selector
991
+ lookahead = pos + 1
992
+ while lookahead < end_pos && @css.getbyte(lookahead) != BYTE_LBRACE &&
993
+ @css.getbyte(lookahead) != BYTE_SEMICOLON && @css.getbyte(lookahead) != BYTE_NEWLINE
994
+ lookahead += 1
995
+ end
996
+ return true if lookahead < end_pos && @css.getbyte(lookahead) == BYTE_LBRACE
997
+ end
998
+
999
+ # Check for @media, @supports, etc nested inside
1000
+ return true if byte == BYTE_AT
1001
+
1002
+ # Skip to next line or semicolon
1003
+ while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_NEWLINE
1004
+ pos += 1
1005
+ end
1006
+ pos += 1 if pos < end_pos
1007
+ end
1008
+
1009
+ false
1010
+ end
1011
+
1012
+ # Resolve nested selector against parent
1013
+ # Translated from C: see ext/cataract/css_parser.c resolve_nested_selector
1014
+ # Examples:
1015
+ # resolve_nested_selector(".parent", "& .child") => [".parent .child", 1] (explicit)
1016
+ # resolve_nested_selector(".parent", "&:hover") => [".parent:hover", 1] (explicit)
1017
+ # resolve_nested_selector(".parent", "&.active") => [".parent.active", 1] (explicit)
1018
+ # resolve_nested_selector(".parent", ".child") => [".parent .child", 0] (implicit)
1019
+ # resolve_nested_selector(".parent", "> .child") => [".parent > .child", 0] (implicit combinator)
1020
+ #
1021
+ # Returns: [resolved_selector, nesting_style]
1022
+ # nesting_style: 0 = NESTING_STYLE_IMPLICIT, 1 = NESTING_STYLE_EXPLICIT
1023
+ def resolve_nested_selector(parent_selector, nested_selector)
1024
+ # Check if nested selector contains & (byte-level search)
1025
+ len = nested_selector.bytesize
1026
+ has_ampersand = false
1027
+ i = 0
1028
+ while i < len
1029
+ if nested_selector.getbyte(i) == BYTE_AMPERSAND
1030
+ has_ampersand = true
1031
+ break
1032
+ end
1033
+ i += 1
1034
+ end
1035
+
1036
+ if has_ampersand
1037
+ # Explicit nesting - replace & with parent
1038
+ nesting_style = NESTING_STYLE_EXPLICIT
1039
+
1040
+ # Trim leading whitespace to check for combinator
1041
+ # NOTE: We use a manual byte-level loop instead of lstrip for performance.
1042
+ # Ruby's lstrip handles all Unicode whitespace and encoding checks, but CSS
1043
+ # selectors only use ASCII whitespace (space, tab, newline, CR). Our loop
1044
+ # checks only these 4 bytes, which benchmarks 1.89x faster than lstrip.
1045
+ start_pos = 0
1046
+ while start_pos < len
1047
+ byte = nested_selector.getbyte(start_pos)
1048
+ break unless byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR
1049
+
1050
+ start_pos += 1
1051
+ end
1052
+
1053
+ # Check if selector starts with a combinator (relative selector)
1054
+ starts_with_combinator = false
1055
+ if start_pos < len
1056
+ first_byte = nested_selector.getbyte(start_pos)
1057
+ starts_with_combinator = (first_byte == BYTE_PLUS || first_byte == BYTE_GT || first_byte == BYTE_TILDE)
1058
+ end
1059
+
1060
+ # Build result by replacing & with parent
1061
+ result = String.new
1062
+ if starts_with_combinator
1063
+ # Prepend parent first with space for relative selectors
1064
+ # Example: "+ .bar + &" => ".foo + .bar + .foo"
1065
+ result << parent_selector
1066
+ result << ' '
1067
+ end
1068
+
1069
+ # Replace all & with parent selector (byte-level iteration)
1070
+ i = 0
1071
+ while i < len
1072
+ byte = nested_selector.getbyte(i)
1073
+ result << if byte == BYTE_AMPERSAND
1074
+ parent_selector
1075
+ else
1076
+ byte.chr
1077
+ end
1078
+ i += 1
1079
+ end
1080
+
1081
+ [result, nesting_style]
1082
+ else
1083
+ # Implicit nesting - prepend parent with appropriate spacing
1084
+ nesting_style = NESTING_STYLE_IMPLICIT
1085
+
1086
+ # Trim leading whitespace from nested selector (byte-level)
1087
+ # See comment above for why we don't use lstrip
1088
+ start_pos = 0
1089
+ while start_pos < len
1090
+ byte = nested_selector.getbyte(start_pos)
1091
+ break unless byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR
1092
+
1093
+ start_pos += 1
1094
+ end
1095
+
1096
+ result = String.new
1097
+ result << parent_selector
1098
+ result << ' '
1099
+ result << nested_selector.byteslice(start_pos..-1)
1100
+
1101
+ [result, nesting_style]
1102
+ end
1103
+ end
1104
+
1105
+ # Combine parent and child media queries
1106
+ # Translated from C: see ext/cataract/css_parser.c combine_media_queries
1107
+ # Examples:
1108
+ # parent="screen", child="min-width: 500px" => "screen and (min-width: 500px)"
1109
+ # parent=nil, child="print" => "print"
1110
+ def combine_media_queries(parent, child)
1111
+ return child if parent.nil?
1112
+ return parent if child.nil?
1113
+
1114
+ # Combine: "parent and child"
1115
+ parent_str = parent.to_s
1116
+ child_str = child.to_s
1117
+
1118
+ combined = "#{parent_str} and "
1119
+
1120
+ # If child is a condition (contains ':'), wrap it in parentheses
1121
+ combined += if child_str.include?(':')
1122
+ # Add parens if not already present
1123
+ if child_str.start_with?('(') && child_str.end_with?(')')
1124
+ child_str
1125
+ else
1126
+ "(#{child_str})"
1127
+ end
1128
+ else
1129
+ child_str
1130
+ end
1131
+
1132
+ combined.to_sym
1133
+ end
1134
+
1135
+ # Skip to next semicolon or closing brace (error recovery)
1136
+ def skip_to_semicolon_or_brace
1137
+ until eof? || peek_byte == BYTE_SEMICOLON || peek_byte == BYTE_RBRACE # Flip to save a not_opt instruction: while !eof? && peek_byte != BYTE_SEMICOLON && peek_byte != BYTE_RBRACE
1138
+ @pos += 1
1139
+ end
1140
+
1141
+ @pos += 1 if peek_byte == BYTE_SEMICOLON # consume semicolon
1142
+ end
1143
+
1144
+ # Parse an @import statement
1145
+ # @import "url" [media-query];
1146
+ # @import url("url") [media-query];
1147
+ def parse_import_statement
1148
+ skip_ws_and_comments
1149
+
1150
+ # Check for optional url(
1151
+ has_url_function = false
1152
+ if @pos + 4 <= @len && match_ascii_ci?(@css, @pos, 'url(')
1153
+ has_url_function = true
1154
+ @pos += 4
1155
+ skip_ws_and_comments
1156
+ end
1157
+
1158
+ # Find opening quote
1159
+ byte = peek_byte
1160
+ if eof? || (byte != BYTE_DQUOTE && byte != BYTE_SQUOTE)
1161
+ # Invalid @import, skip to semicolon
1162
+ while !eof? && peek_byte != BYTE_SEMICOLON
1163
+ @pos += 1
1164
+ end
1165
+ @pos += 1 unless eof?
1166
+ return
1167
+ end
1168
+
1169
+ quote_char = byte
1170
+ @pos += 1 # Skip opening quote
1171
+
1172
+ url_start = @pos
1173
+
1174
+ # Find closing quote (handle escaped quotes)
1175
+ while !eof? && peek_byte != quote_char
1176
+ @pos += if peek_byte == BYTE_BACKSLASH && @pos + 1 < @len
1177
+ 2 # Skip escaped character
1178
+ else
1179
+ 1
1180
+ end
1181
+ end
1182
+
1183
+ if eof?
1184
+ # Unterminated string
1185
+ return
1186
+ end
1187
+
1188
+ url = byteslice_encoded(url_start, @pos - url_start)
1189
+ @pos += 1 # Skip closing quote
1190
+
1191
+ # Skip closing paren if we had url(
1192
+ if has_url_function
1193
+ skip_ws_and_comments
1194
+ @pos += 1 if peek_byte == BYTE_RPAREN
1195
+ end
1196
+
1197
+ skip_ws_and_comments
1198
+
1199
+ # Check for optional media query (everything until semicolon)
1200
+ media = nil
1201
+ if !eof? && peek_byte != BYTE_SEMICOLON
1202
+ media_start = @pos
1203
+
1204
+ # Find semicolon
1205
+ while !eof? && peek_byte != BYTE_SEMICOLON
1206
+ @pos += 1
1207
+ end
1208
+
1209
+ media_end = @pos
1210
+
1211
+ # Trim trailing whitespace from media query
1212
+ while media_end > media_start && whitespace?(@css.getbyte(media_end - 1))
1213
+ media_end -= 1
1214
+ end
1215
+
1216
+ if media_end > media_start
1217
+ media = byteslice_encoded(media_start, media_end - media_start).to_sym
1218
+ end
1219
+ end
1220
+
1221
+ # Skip semicolon
1222
+ @pos += 1 if peek_byte == BYTE_SEMICOLON
1223
+
1224
+ # Create ImportStatement (resolved: false by default)
1225
+ import_stmt = ImportStatement.new(@rule_id_counter, url, media, false)
1226
+ @imports << import_stmt
1227
+ @rule_id_counter += 1
1228
+ end
1229
+
1230
+ # Skip @import statements at the beginning of CSS (DEPRECATED - now parsed)
1231
+ # Per CSS spec, @import must come before all rules (except @charset)
1232
+ def skip_imports
1233
+ until eof?
1234
+ # Skip whitespace
1235
+ while !eof? && whitespace?(peek_byte)
1236
+ @pos += 1
1237
+ end
1238
+ break if eof?
1239
+
1240
+ # Skip comments
1241
+ if @pos + 1 < @len && @css.getbyte(@pos) == BYTE_SLASH && @css.getbyte(@pos + 1) == BYTE_STAR
1242
+ @pos += 2
1243
+ while @pos + 1 < @len
1244
+ if @css.getbyte(@pos) == BYTE_STAR && @css.getbyte(@pos + 1) == BYTE_SLASH
1245
+ @pos += 2
1246
+ break
1247
+ end
1248
+ @pos += 1
1249
+ end
1250
+ next
1251
+ end
1252
+
1253
+ # Check for @import (case-insensitive byte comparison)
1254
+ if @pos + 7 <= @len && @css.getbyte(@pos) == BYTE_AT && match_ascii_ci?(@css, @pos + 1, 'import')
1255
+ # Check that it's followed by whitespace or quote
1256
+ if @pos + 7 >= @len || whitespace?(@css.getbyte(@pos + 7)) || @css.getbyte(@pos + 7) == BYTE_SQUOTE || @css.getbyte(@pos + 7) == BYTE_DQUOTE
1257
+ # Skip to semicolon
1258
+ while !eof? && peek_byte != BYTE_SEMICOLON
1259
+ @pos += 1
1260
+ end
1261
+ @pos += 1 unless eof? # Skip semicolon
1262
+ next
1263
+ end
1264
+ end
1265
+
1266
+ # Hit non-@import content, stop skipping
1267
+ break
1268
+ end
1269
+ end
1270
+
1271
+ # Parse a block of declarations given start/end positions
1272
+ # Used for @font-face and other at-rules
1273
+ # Translated from C: see ext/cataract/css_parser.c parse_declarations
1274
+ def parse_declarations_block(start_pos, end_pos)
1275
+ declarations = []
1276
+ pos = start_pos
1277
+
1278
+ while pos < end_pos
1279
+ # Skip whitespace
1280
+ while pos < end_pos && whitespace?(@css.getbyte(pos))
1281
+ pos += 1
1282
+ end
1283
+ break if pos >= end_pos
1284
+
1285
+ # Parse property name (read until ':')
1286
+ prop_start = pos
1287
+ while pos < end_pos && @css.getbyte(pos) != BYTE_COLON && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
1288
+ pos += 1
1289
+ end
1290
+
1291
+ # Skip if no colon found (malformed)
1292
+ if pos >= end_pos || @css.getbyte(pos) != BYTE_COLON
1293
+ # Try to recover by finding next semicolon
1294
+ while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON
1295
+ pos += 1
1296
+ end
1297
+ pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
1298
+ next
1299
+ end
1300
+
1301
+ prop_end = pos
1302
+ # Trim trailing whitespace from property
1303
+ while prop_end > prop_start && whitespace?(@css.getbyte(prop_end - 1))
1304
+ prop_end -= 1
1305
+ end
1306
+
1307
+ property = byteslice_encoded(prop_start, prop_end - prop_start, encoding: 'US-ASCII')
1308
+ property.downcase!
1309
+
1310
+ pos += 1 # Skip ':'
1311
+
1312
+ # Skip leading whitespace in value
1313
+ while pos < end_pos && whitespace?(@css.getbyte(pos))
1314
+ pos += 1
1315
+ end
1316
+
1317
+ # Parse value (read until ';' or '}')
1318
+ val_start = pos
1319
+ while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
1320
+ pos += 1
1321
+ end
1322
+ val_end = pos
1323
+
1324
+ # Trim trailing whitespace from value
1325
+ while val_end > val_start && whitespace?(@css.getbyte(val_end - 1))
1326
+ val_end -= 1
1327
+ end
1328
+
1329
+ value = byteslice_encoded(val_start, val_end - val_start)
1330
+
1331
+ pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
1332
+
1333
+ # Create Declaration struct (at-rules don't use !important)
1334
+ declarations << Declaration.new(property, value, false)
1335
+ end
1336
+
1337
+ declarations
1338
+ end
1339
+ end
1340
+ end