cataract 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/.rubocop.yml +2 -0
- data/BENCHMARKS.md +41 -38
- data/CHANGELOG.md +13 -0
- data/README.md +9 -3
- data/ext/cataract/cataract.c +273 -92
- data/ext/cataract/cataract.h +4 -3
- data/ext/cataract/css_parser.c +125 -11
- data/ext/cataract/flatten.c +271 -16
- data/lib/cataract/declaration.rb +19 -0
- data/lib/cataract/pure/flatten.rb +103 -8
- data/lib/cataract/pure/parser.rb +203 -139
- data/lib/cataract/pure/serializer.rb +217 -115
- data/lib/cataract/pure.rb +4 -2
- data/lib/cataract/rule.rb +39 -3
- data/lib/cataract/stylesheet.rb +137 -14
- data/lib/cataract/stylesheet_scope.rb +11 -4
- data/lib/cataract/version.rb +1 -1
- metadata +1 -1
|
@@ -152,16 +152,27 @@ module Cataract
|
|
|
152
152
|
end
|
|
153
153
|
|
|
154
154
|
# Expand shorthands in regular rules only (AtRules don't have declarations)
|
|
155
|
+
# NOTE: Using manual each + concat instead of .flat_map for performance.
|
|
156
|
+
# The concise form (.flat_map) is ~5-10% slower depending on number of shorthands to expand.
|
|
155
157
|
regular_rules.each do |rule|
|
|
156
|
-
|
|
158
|
+
expanded = []
|
|
159
|
+
rule.declarations.each do |decl|
|
|
160
|
+
expanded.concat(_expand_shorthand(decl))
|
|
161
|
+
end
|
|
162
|
+
rule.declarations.replace(expanded)
|
|
157
163
|
end
|
|
158
164
|
|
|
159
165
|
merged_rules = []
|
|
160
166
|
|
|
161
167
|
# Always group by selector and preserve original selectors
|
|
162
168
|
# (Nesting is flattened during parsing, so we just merge by resolved selector)
|
|
163
|
-
|
|
164
|
-
|
|
169
|
+
# NOTE: Using manual each instead of .group_by to avoid intermediate hash allocation.
|
|
170
|
+
# The concise form (.group_by) is ~10-25% slower depending on selector uniqueness.
|
|
171
|
+
by_selector = {}
|
|
172
|
+
regular_rules.each do |rule|
|
|
173
|
+
(by_selector[rule.selector] ||= []) << rule
|
|
174
|
+
end
|
|
175
|
+
by_selector.each do |selector, rules|
|
|
165
176
|
merged_rule = flatten_rules_for_selector(selector, rules)
|
|
166
177
|
merged_rules << merged_rule if merged_rule
|
|
167
178
|
end
|
|
@@ -169,16 +180,28 @@ module Cataract
|
|
|
169
180
|
# Recreate shorthands where possible
|
|
170
181
|
merged_rules.each { |rule| recreate_shorthands!(rule) }
|
|
171
182
|
|
|
183
|
+
# Assign new IDs before checking divergence (so we can build correct selector_lists hash)
|
|
184
|
+
merged_rules.each_with_index { |rule, i| rule.id = i }
|
|
185
|
+
|
|
186
|
+
# Handle selector list divergence: remove rules from selector lists if declarations no longer match
|
|
187
|
+
# This makes selector_list_id authoritative - if set, declarations MUST be identical
|
|
188
|
+
# Only process if selector_lists is enabled in the stylesheet's parser options
|
|
189
|
+
selector_lists = {}
|
|
190
|
+
parser_options = stylesheet.instance_variable_get(:@parser_options) || {}
|
|
191
|
+
if parser_options[:selector_lists]
|
|
192
|
+
update_selector_lists_for_divergence!(merged_rules, selector_lists)
|
|
193
|
+
end
|
|
194
|
+
|
|
172
195
|
# Add passthrough AtRules to output
|
|
173
196
|
merged_rules.concat(at_rules)
|
|
174
197
|
|
|
175
198
|
# Create result stylesheet
|
|
176
199
|
if mutate
|
|
177
200
|
stylesheet.instance_variable_set(:@rules, merged_rules)
|
|
178
|
-
# Update rule IDs
|
|
179
|
-
merged_rules.each_with_index { |rule, i| rule.id = i }
|
|
180
201
|
# Clear media index (no media rules after merge flattens everything)
|
|
181
202
|
stylesheet.instance_variable_set(:@media_index, {})
|
|
203
|
+
# Update selector lists with divergence tracking
|
|
204
|
+
stylesheet.instance_variable_set(:@_selector_lists, selector_lists)
|
|
182
205
|
stylesheet
|
|
183
206
|
else
|
|
184
207
|
# Create new Stylesheet with merged rules
|
|
@@ -186,8 +209,7 @@ module Cataract
|
|
|
186
209
|
result.instance_variable_set(:@rules, merged_rules)
|
|
187
210
|
result.instance_variable_set(:@media_index, {})
|
|
188
211
|
result.instance_variable_set(:@charset, stylesheet.charset)
|
|
189
|
-
|
|
190
|
-
merged_rules.each_with_index { |rule, i| rule.id = i }
|
|
212
|
+
result.instance_variable_set(:@_selector_lists, selector_lists)
|
|
191
213
|
result
|
|
192
214
|
end
|
|
193
215
|
end
|
|
@@ -267,6 +289,11 @@ module Cataract
|
|
|
267
289
|
|
|
268
290
|
return nil if declarations.empty?
|
|
269
291
|
|
|
292
|
+
# Preserve selector_list_id if all rules in group share the same one
|
|
293
|
+
selector_list_ids = rules.filter_map(&:selector_list_id)
|
|
294
|
+
selector_list_ids.uniq!
|
|
295
|
+
selector_list_id = selector_list_ids.size == 1 ? selector_list_ids.first : nil
|
|
296
|
+
|
|
270
297
|
# Create merged rule
|
|
271
298
|
Rule.new(
|
|
272
299
|
0, # ID will be updated later
|
|
@@ -274,7 +301,8 @@ module Cataract
|
|
|
274
301
|
declarations,
|
|
275
302
|
rules.first.specificity, # Use first rule's specificity
|
|
276
303
|
nil, # No parent after flattening
|
|
277
|
-
nil
|
|
304
|
+
nil, # No nesting style after flattening
|
|
305
|
+
selector_list_id # Preserve if all rules share same ID
|
|
278
306
|
)
|
|
279
307
|
end
|
|
280
308
|
|
|
@@ -1141,5 +1169,72 @@ module Cataract
|
|
|
1141
1169
|
rule.declarations.reject! { |d| LIST_STYLE_PROPERTIES.include?(d.property) }
|
|
1142
1170
|
rule.declarations << Declaration.new(PROP_LIST_STYLE, shorthand_value, important)
|
|
1143
1171
|
end
|
|
1172
|
+
|
|
1173
|
+
# Update selector lists to remove diverged rules
|
|
1174
|
+
#
|
|
1175
|
+
# After flattening/cascade, rules that were in the same selector list may have
|
|
1176
|
+
# different declarations. This method builds the selector_lists hash with only
|
|
1177
|
+
# rules that still match, and clears selector_list_id for diverged rules.
|
|
1178
|
+
#
|
|
1179
|
+
# @param merged_rules [Array<Rule>] Flattened rules (with new IDs assigned)
|
|
1180
|
+
# @param selector_lists [Hash] Empty hash to populate with list_id => Array of rule IDs
|
|
1181
|
+
def self.update_selector_lists_for_divergence!(merged_rules, selector_lists)
|
|
1182
|
+
# Group merged rules by selector_list_id (skip rules with no list)
|
|
1183
|
+
# Note: Using manual each loop instead of .select{}.group_by for performance.
|
|
1184
|
+
# The more concise form (.select + .group_by) is ~50-60% slower due to intermediate array allocation.
|
|
1185
|
+
rules_by_list = {}
|
|
1186
|
+
merged_rules.each do |r|
|
|
1187
|
+
next unless r.selector_list_id
|
|
1188
|
+
|
|
1189
|
+
(rules_by_list[r.selector_list_id] ||= []) << r
|
|
1190
|
+
end
|
|
1191
|
+
|
|
1192
|
+
# For each selector list, check if declarations still match
|
|
1193
|
+
rules_by_list.each do |list_id, rules_in_list|
|
|
1194
|
+
# Skip if only one rule in list (nothing to compare)
|
|
1195
|
+
next if rules_in_list.size <= 1
|
|
1196
|
+
|
|
1197
|
+
# Get first rule as reference
|
|
1198
|
+
reference_rule = rules_in_list.first
|
|
1199
|
+
reference_decls = reference_rule.declarations
|
|
1200
|
+
|
|
1201
|
+
# Find rules that still match (have identical declarations)
|
|
1202
|
+
matching_rules = [reference_rule]
|
|
1203
|
+
|
|
1204
|
+
rules_in_list[1..].each do |rule|
|
|
1205
|
+
if declarations_equal?(reference_decls, rule.declarations)
|
|
1206
|
+
matching_rules << rule
|
|
1207
|
+
else
|
|
1208
|
+
# Clear selector_list_id for diverged rule
|
|
1209
|
+
rule.selector_list_id = nil
|
|
1210
|
+
end
|
|
1211
|
+
end
|
|
1212
|
+
|
|
1213
|
+
# Only keep the selector list if at least 2 rules still match
|
|
1214
|
+
if matching_rules.size >= 2
|
|
1215
|
+
# Build selector_lists hash with NEW rule IDs
|
|
1216
|
+
selector_lists[list_id] = matching_rules.map(&:id)
|
|
1217
|
+
else
|
|
1218
|
+
# Clear selector_list_id for the last remaining rule too
|
|
1219
|
+
matching_rules.each { |r| r.selector_list_id = nil }
|
|
1220
|
+
end
|
|
1221
|
+
end
|
|
1222
|
+
end
|
|
1223
|
+
|
|
1224
|
+
# Check if two declaration arrays are identical
|
|
1225
|
+
#
|
|
1226
|
+
# @param decls1 [Array<Declaration>]
|
|
1227
|
+
# @param decls2 [Array<Declaration>]
|
|
1228
|
+
# @return [Boolean]
|
|
1229
|
+
def self.declarations_equal?(decls1, decls2)
|
|
1230
|
+
return false if decls1.size != decls2.size
|
|
1231
|
+
|
|
1232
|
+
# Compare each declaration (property, value, important must all match)
|
|
1233
|
+
decls1.zip(decls2).all? do |d1, d2|
|
|
1234
|
+
d1.property == d2.property &&
|
|
1235
|
+
d1.value == d2.value &&
|
|
1236
|
+
d1.important == d2.important
|
|
1237
|
+
end
|
|
1238
|
+
end
|
|
1144
1239
|
end
|
|
1145
1240
|
end
|
data/lib/cataract/pure/parser.rb
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# Do NOT refactor to "clean Ruby" without benchmarking - you will make it slower.
|
|
12
12
|
#
|
|
13
13
|
# Example: RuboCop suggests using `.positive?` instead of `> 0`, but benchmarking
|
|
14
|
-
# shows `> 0` is 1.26x faster
|
|
14
|
+
# shows `> 0` is 1.26x faster. These micro-optimizations
|
|
15
15
|
# matter in a hot parsing loop.
|
|
16
16
|
|
|
17
17
|
module Cataract
|
|
@@ -65,15 +65,25 @@ module Cataract
|
|
|
65
65
|
true
|
|
66
66
|
end
|
|
67
67
|
|
|
68
|
-
def initialize(css_string, parent_media_sym: nil, depth: 0)
|
|
68
|
+
def initialize(css_string, parser_options: {}, parent_media_sym: nil, depth: 0)
|
|
69
69
|
@css = css_string.dup.freeze
|
|
70
70
|
@pos = 0
|
|
71
71
|
@len = @css.bytesize
|
|
72
72
|
@parent_media_sym = parent_media_sym
|
|
73
73
|
|
|
74
|
+
# Parser options with defaults
|
|
75
|
+
@parser_options = {
|
|
76
|
+
selector_lists: true
|
|
77
|
+
}.merge(parser_options)
|
|
78
|
+
|
|
79
|
+
# Extract selector_lists option to ivar to avoid repeated hash lookups in hot path
|
|
80
|
+
@selector_lists_enabled = @parser_options[:selector_lists]
|
|
81
|
+
|
|
74
82
|
# Parser state
|
|
75
83
|
@rules = [] # Flat array of Rule structs
|
|
76
84
|
@_media_index = {} # Symbol => Array of rule IDs
|
|
85
|
+
@_selector_lists = {} # Hash: list_id => Array of rule IDs
|
|
86
|
+
@_next_selector_list_id = 0 # Counter for selector list IDs
|
|
77
87
|
@imports = [] # Array of ImportStatement structs
|
|
78
88
|
@rule_id_counter = 0 # Next rule ID (0-indexed)
|
|
79
89
|
@media_query_count = 0 # Safety limit
|
|
@@ -103,7 +113,9 @@ module Cataract
|
|
|
103
113
|
# Must be a selector-based rule
|
|
104
114
|
selector = parse_selector
|
|
105
115
|
|
|
106
|
-
|
|
116
|
+
if selector.nil? || selector.empty?
|
|
117
|
+
next
|
|
118
|
+
end
|
|
107
119
|
|
|
108
120
|
# Find the block boundaries
|
|
109
121
|
decl_start = @pos # Should be right after the {
|
|
@@ -159,22 +171,46 @@ module Cataract
|
|
|
159
171
|
# Split comma-separated selectors into individual rules
|
|
160
172
|
selectors = selector.split(',')
|
|
161
173
|
|
|
174
|
+
# Determine if we should track this as a selector list
|
|
175
|
+
# Check boolean first to potentially avoid size() call via short-circuit evaluation
|
|
176
|
+
list_id = nil
|
|
177
|
+
if @selector_lists_enabled && selectors.size > 1
|
|
178
|
+
list_id = @_next_selector_list_id
|
|
179
|
+
@_next_selector_list_id += 1
|
|
180
|
+
@_selector_lists[list_id] = []
|
|
181
|
+
end
|
|
182
|
+
|
|
162
183
|
selectors.each do |individual_selector|
|
|
163
184
|
individual_selector.strip!
|
|
164
185
|
next if individual_selector.empty?
|
|
165
186
|
|
|
166
|
-
|
|
187
|
+
rule_id = @rule_id_counter
|
|
188
|
+
|
|
189
|
+
# Dup declarations for each rule in a selector list to avoid shared state
|
|
190
|
+
# (principle of least surprise - modifying one rule shouldn't affect others)
|
|
191
|
+
# Must deep dup: both the array and the Declaration objects inside
|
|
192
|
+
rule_declarations = if list_id
|
|
193
|
+
declarations.map { |d| Declaration.new(d.property, d.value, d.important) }
|
|
194
|
+
else
|
|
195
|
+
declarations
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Create Rule struct (with selector_list_id as 7th parameter)
|
|
167
199
|
rule = Rule.new(
|
|
168
|
-
|
|
200
|
+
rule_id, # id
|
|
169
201
|
individual_selector, # selector
|
|
170
|
-
|
|
202
|
+
rule_declarations, # declarations
|
|
171
203
|
nil, # specificity (calculated lazily)
|
|
172
204
|
nil, # parent_rule_id
|
|
173
|
-
nil
|
|
205
|
+
nil, # nesting_style
|
|
206
|
+
list_id # selector_list_id
|
|
174
207
|
)
|
|
175
208
|
|
|
176
209
|
@rules << rule
|
|
177
210
|
@rule_id_counter += 1
|
|
211
|
+
|
|
212
|
+
# Track in selector list if applicable
|
|
213
|
+
@_selector_lists[list_id] << rule_id if list_id
|
|
178
214
|
end
|
|
179
215
|
end
|
|
180
216
|
end
|
|
@@ -182,6 +218,7 @@ module Cataract
|
|
|
182
218
|
{
|
|
183
219
|
rules: @rules,
|
|
184
220
|
_media_index: @_media_index,
|
|
221
|
+
_selector_lists: @_selector_lists,
|
|
185
222
|
imports: @imports,
|
|
186
223
|
charset: @charset,
|
|
187
224
|
_has_nesting: @_has_nesting
|
|
@@ -238,17 +275,108 @@ module Cataract
|
|
|
238
275
|
true
|
|
239
276
|
end
|
|
240
277
|
|
|
241
|
-
# Skip whitespace and comments
|
|
278
|
+
# Skip whitespace and comments until no more progress can be made
|
|
279
|
+
#
|
|
280
|
+
# Optimization: Using `begin...end until` instead of `loop + break` reduces VM overhead:
|
|
281
|
+
# - loop + break: 29 instructions with catch table for break/redo/next, uses throw/send
|
|
282
|
+
# - begin...end until: 24 instructions, simple jump-based loop, no catch table
|
|
283
|
+
# Benchmark shows 15-51% speedup depending on YJIT
|
|
242
284
|
def skip_ws_and_comments
|
|
243
|
-
|
|
285
|
+
begin
|
|
244
286
|
old_pos = @pos
|
|
245
287
|
skip_whitespace
|
|
246
288
|
skip_comment
|
|
247
|
-
|
|
289
|
+
end until @pos == old_pos # No progress made # rubocop:disable Lint/Loop
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Parse a single CSS declaration (property: value)
|
|
293
|
+
#
|
|
294
|
+
# Performance-critical helper that parses one declaration.
|
|
295
|
+
# Shared by parse_mixed_block, parse_declarations, and parse_declarations_block.
|
|
296
|
+
#
|
|
297
|
+
# @param pos [Integer] Current position in CSS string
|
|
298
|
+
# @param end_pos [Integer] End position (boundary for parsing)
|
|
299
|
+
# @param parse_important [Boolean] Whether to parse !important flag (false for at-rules)
|
|
300
|
+
# @return [Array(Declaration|nil, Integer)] Tuple of [declaration, new_position]
|
|
301
|
+
def parse_single_declaration(pos, end_pos, parse_important)
|
|
302
|
+
# Parse property name (scan until ':')
|
|
303
|
+
prop_start = pos
|
|
304
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_COLON &&
|
|
305
|
+
@css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
|
|
306
|
+
pos += 1
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Skip if malformed (no colon found)
|
|
310
|
+
if pos >= end_pos || @css.getbyte(pos) != BYTE_COLON
|
|
311
|
+
# Error recovery: skip to next semicolon
|
|
312
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON
|
|
313
|
+
pos += 1
|
|
314
|
+
end
|
|
315
|
+
pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
|
|
316
|
+
return [nil, pos]
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Trim trailing whitespace from property
|
|
320
|
+
prop_end = pos
|
|
321
|
+
while prop_end > prop_start && whitespace?(@css.getbyte(prop_end - 1))
|
|
322
|
+
prop_end -= 1
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Extract and normalize property name
|
|
326
|
+
property = byteslice_encoded(prop_start, prop_end - prop_start)
|
|
327
|
+
# Custom properties (--foo) are case-sensitive and can contain Unicode
|
|
328
|
+
# Regular properties are ASCII-only and case-insensitive
|
|
329
|
+
unless property.bytesize >= 2 && property.getbyte(0) == BYTE_HYPHEN && property.getbyte(1) == BYTE_HYPHEN
|
|
330
|
+
property.force_encoding('US-ASCII')
|
|
331
|
+
property.downcase!
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
pos += 1 # Skip ':'
|
|
335
|
+
|
|
336
|
+
# Skip leading whitespace in value
|
|
337
|
+
while pos < end_pos && whitespace?(@css.getbyte(pos))
|
|
338
|
+
pos += 1
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# Parse value (scan until ';' or '}')
|
|
342
|
+
val_start = pos
|
|
343
|
+
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
|
|
344
|
+
pos += 1
|
|
345
|
+
end
|
|
346
|
+
val_end = pos
|
|
347
|
+
|
|
348
|
+
# Trim trailing whitespace from value
|
|
349
|
+
while val_end > val_start && whitespace?(@css.getbyte(val_end - 1))
|
|
350
|
+
val_end -= 1
|
|
248
351
|
end
|
|
352
|
+
|
|
353
|
+
value = byteslice_encoded(val_start, val_end - val_start)
|
|
354
|
+
|
|
355
|
+
# Parse !important flag if requested
|
|
356
|
+
important = false
|
|
357
|
+
if parse_important && value.end_with?('!important')
|
|
358
|
+
important = true
|
|
359
|
+
# Remove '!important' and trailing whitespace
|
|
360
|
+
value = value[0, value.length - 10].rstrip
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Skip semicolon if present
|
|
364
|
+
pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
|
|
365
|
+
|
|
366
|
+
# Return nil if empty declaration
|
|
367
|
+
return [nil, pos] if prop_end <= prop_start || val_end <= val_start
|
|
368
|
+
|
|
369
|
+
[Declaration.new(property, value, important), pos]
|
|
249
370
|
end
|
|
250
371
|
|
|
251
372
|
# Find matching closing brace
|
|
373
|
+
#
|
|
374
|
+
# Performance notes (benchmarked on bootstrap.css with 2,400 braces):
|
|
375
|
+
# - Using `return` instead of `break` avoids catch table overhead (~2% faster)
|
|
376
|
+
# - Checking RBRACE before LBRACE is faster because closing braces are
|
|
377
|
+
# encountered more frequently when searching forward from an opening brace
|
|
378
|
+
# - Combined optimizations: baseline 666ms → optimized 652ms (2% improvement)
|
|
379
|
+
#
|
|
252
380
|
# Translated from C: see ext/cataract/css_parser.c find_matching_brace
|
|
253
381
|
def find_matching_brace(start_pos)
|
|
254
382
|
depth = 1
|
|
@@ -256,11 +384,11 @@ module Cataract
|
|
|
256
384
|
|
|
257
385
|
while pos < @len
|
|
258
386
|
byte = @css.getbyte(pos)
|
|
259
|
-
if byte ==
|
|
260
|
-
depth += 1
|
|
261
|
-
elsif byte == BYTE_RBRACE
|
|
387
|
+
if byte == BYTE_RBRACE
|
|
262
388
|
depth -= 1
|
|
263
|
-
|
|
389
|
+
return pos if depth == 0
|
|
390
|
+
elsif byte == BYTE_LBRACE
|
|
391
|
+
depth += 1
|
|
264
392
|
end
|
|
265
393
|
pos += 1
|
|
266
394
|
end
|
|
@@ -288,6 +416,7 @@ module Cataract
|
|
|
288
416
|
|
|
289
417
|
# Trim whitespace from selector (in-place to avoid allocation)
|
|
290
418
|
selector_text.strip!
|
|
419
|
+
selector_text
|
|
291
420
|
end
|
|
292
421
|
|
|
293
422
|
# Parse mixed block containing declarations AND nested selectors/at-rules
|
|
@@ -458,64 +587,9 @@ module Cataract
|
|
|
458
587
|
next
|
|
459
588
|
end
|
|
460
589
|
|
|
461
|
-
# This is a declaration - parse it
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
@css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_LBRACE
|
|
465
|
-
pos += 1
|
|
466
|
-
end
|
|
467
|
-
|
|
468
|
-
if pos >= end_pos || @css.getbyte(pos) != BYTE_COLON
|
|
469
|
-
# Malformed - skip to semicolon
|
|
470
|
-
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON
|
|
471
|
-
pos += 1
|
|
472
|
-
end
|
|
473
|
-
pos += 1 if pos < end_pos
|
|
474
|
-
next
|
|
475
|
-
end
|
|
476
|
-
|
|
477
|
-
prop_end = pos
|
|
478
|
-
# Trim trailing whitespace
|
|
479
|
-
while prop_end > prop_start && whitespace?(@css.getbyte(prop_end - 1))
|
|
480
|
-
prop_end -= 1
|
|
481
|
-
end
|
|
482
|
-
|
|
483
|
-
property = byteslice_encoded(prop_start, prop_end - prop_start, encoding: 'US-ASCII')
|
|
484
|
-
property.downcase!
|
|
485
|
-
|
|
486
|
-
pos += 1 # Skip :
|
|
487
|
-
|
|
488
|
-
# Skip leading whitespace in value
|
|
489
|
-
while pos < end_pos && whitespace?(@css.getbyte(pos))
|
|
490
|
-
pos += 1
|
|
491
|
-
end
|
|
492
|
-
|
|
493
|
-
# Parse value (read until ';' or '}')
|
|
494
|
-
val_start = pos
|
|
495
|
-
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
|
|
496
|
-
pos += 1
|
|
497
|
-
end
|
|
498
|
-
val_end = pos
|
|
499
|
-
|
|
500
|
-
# Trim trailing whitespace from value
|
|
501
|
-
while val_end > val_start && whitespace?(@css.getbyte(val_end - 1))
|
|
502
|
-
val_end -= 1
|
|
503
|
-
end
|
|
504
|
-
|
|
505
|
-
value = byteslice_encoded(val_start, val_end - val_start)
|
|
506
|
-
|
|
507
|
-
# Check for !important flag
|
|
508
|
-
important = false
|
|
509
|
-
if value.end_with?('!important')
|
|
510
|
-
important = true
|
|
511
|
-
# NOTE: Using rstrip here instead of manual byte loop since !important is rare (not hot path)
|
|
512
|
-
value = value[0, value.length - 10].rstrip # Remove '!important' and trailing whitespace
|
|
513
|
-
end
|
|
514
|
-
|
|
515
|
-
pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
|
|
516
|
-
|
|
517
|
-
# Create declaration
|
|
518
|
-
declarations << Declaration.new(property, value, important) if prop_end > prop_start && val_end > val_start
|
|
590
|
+
# This is a declaration - parse it using shared helper
|
|
591
|
+
decl, pos = parse_single_declaration(pos, end_pos, true)
|
|
592
|
+
declarations << decl if decl
|
|
519
593
|
end
|
|
520
594
|
|
|
521
595
|
declarations
|
|
@@ -553,9 +627,16 @@ module Cataract
|
|
|
553
627
|
next
|
|
554
628
|
end
|
|
555
629
|
|
|
556
|
-
property
|
|
630
|
+
# Extract property name - use UTF-8 encoding to support custom properties with Unicode
|
|
631
|
+
property = byteslice_encoded(property_start, @pos - property_start)
|
|
557
632
|
property.strip!
|
|
558
|
-
|
|
633
|
+
# Custom properties (--foo) are case-sensitive and can contain Unicode
|
|
634
|
+
# Regular properties are ASCII-only and case-insensitive
|
|
635
|
+
unless property.bytesize >= 2 && property.getbyte(0) == BYTE_HYPHEN && property.getbyte(1) == BYTE_HYPHEN
|
|
636
|
+
# Regular property: force ASCII encoding and downcase
|
|
637
|
+
property.force_encoding('US-ASCII')
|
|
638
|
+
property.downcase!
|
|
639
|
+
end
|
|
559
640
|
@pos += 1 # skip ':'
|
|
560
641
|
|
|
561
642
|
skip_ws_and_comments
|
|
@@ -587,7 +668,7 @@ module Cataract
|
|
|
587
668
|
end
|
|
588
669
|
|
|
589
670
|
# Check for 'important' (9 chars)
|
|
590
|
-
if i >= 8 && value[(i - 8)
|
|
671
|
+
if i >= 8 && value[(i - 8), 9] == 'important'
|
|
591
672
|
i -= 9
|
|
592
673
|
# Skip whitespace before 'important'
|
|
593
674
|
while i >= 0
|
|
@@ -644,16 +725,8 @@ module Cataract
|
|
|
644
725
|
|
|
645
726
|
charset_value = byteslice_encoded(value_start, @pos - value_start)
|
|
646
727
|
charset_value.strip!
|
|
647
|
-
# Remove quotes
|
|
648
|
-
|
|
649
|
-
i = 0
|
|
650
|
-
len = charset_value.bytesize
|
|
651
|
-
while i < len
|
|
652
|
-
byte = charset_value.getbyte(i)
|
|
653
|
-
result << charset_value[i] unless byte == BYTE_DQUOTE || byte == BYTE_SQUOTE
|
|
654
|
-
i += 1
|
|
655
|
-
end
|
|
656
|
-
@charset = result
|
|
728
|
+
# Remove quotes
|
|
729
|
+
@charset = charset_value.delete('"\'')
|
|
657
730
|
|
|
658
731
|
@pos += 1 if peek_byte == BYTE_SEMICOLON # consume semicolon
|
|
659
732
|
return
|
|
@@ -702,11 +775,24 @@ module Cataract
|
|
|
702
775
|
# Recursively parse block content (preserve parent media context)
|
|
703
776
|
nested_parser = Parser.new(
|
|
704
777
|
byteslice_encoded(block_start, block_end - block_start),
|
|
705
|
-
|
|
778
|
+
parser_options: @parser_options,
|
|
779
|
+
parent_media_sym: @parent_media_sym,
|
|
780
|
+
depth: @depth + 1
|
|
706
781
|
)
|
|
707
782
|
|
|
708
783
|
nested_result = nested_parser.parse
|
|
709
784
|
|
|
785
|
+
# Merge nested selector_lists with offsetted IDs
|
|
786
|
+
list_id_offset = @_next_selector_list_id
|
|
787
|
+
if nested_result[:_selector_lists] && !nested_result[:_selector_lists].empty?
|
|
788
|
+
nested_result[:_selector_lists].each do |list_id, rule_ids|
|
|
789
|
+
new_list_id = list_id + list_id_offset
|
|
790
|
+
offsetted_rule_ids = rule_ids.map { |rid| rid + @rule_id_counter }
|
|
791
|
+
@_selector_lists[new_list_id] = offsetted_rule_ids
|
|
792
|
+
end
|
|
793
|
+
@_next_selector_list_id = list_id_offset + nested_result[:_selector_lists].size
|
|
794
|
+
end
|
|
795
|
+
|
|
710
796
|
# Merge nested media_index into ours
|
|
711
797
|
nested_result[:_media_index].each do |media, rule_ids|
|
|
712
798
|
@_media_index[media] ||= []
|
|
@@ -717,6 +803,10 @@ module Cataract
|
|
|
717
803
|
# Add nested rules to main rules array
|
|
718
804
|
nested_result[:rules].each do |rule|
|
|
719
805
|
rule.id = @rule_id_counter
|
|
806
|
+
# Update selector_list_id if applicable
|
|
807
|
+
if rule.is_a?(Rule) && rule.selector_list_id
|
|
808
|
+
rule.selector_list_id += list_id_offset
|
|
809
|
+
end
|
|
720
810
|
@rule_id_counter += 1
|
|
721
811
|
@rules << rule
|
|
722
812
|
end
|
|
@@ -776,12 +866,24 @@ module Cataract
|
|
|
776
866
|
# Parse the content with the combined media context
|
|
777
867
|
nested_parser = Parser.new(
|
|
778
868
|
byteslice_encoded(block_start, block_end - block_start),
|
|
869
|
+
parser_options: @parser_options,
|
|
779
870
|
parent_media_sym: combined_media_sym,
|
|
780
871
|
depth: @depth + 1
|
|
781
872
|
)
|
|
782
873
|
|
|
783
874
|
nested_result = nested_parser.parse
|
|
784
875
|
|
|
876
|
+
# Merge nested selector_lists with offsetted IDs
|
|
877
|
+
list_id_offset = @_next_selector_list_id
|
|
878
|
+
if nested_result[:_selector_lists] && !nested_result[:_selector_lists].empty?
|
|
879
|
+
nested_result[:_selector_lists].each do |list_id, rule_ids|
|
|
880
|
+
new_list_id = list_id + list_id_offset
|
|
881
|
+
offsetted_rule_ids = rule_ids.map { |rid| rid + @rule_id_counter }
|
|
882
|
+
@_selector_lists[new_list_id] = offsetted_rule_ids
|
|
883
|
+
end
|
|
884
|
+
@_next_selector_list_id = list_id_offset + nested_result[:_selector_lists].size
|
|
885
|
+
end
|
|
886
|
+
|
|
785
887
|
# Merge nested media_index into ours (for nested @media)
|
|
786
888
|
nested_result[:_media_index].each do |media, rule_ids|
|
|
787
889
|
@_media_index[media] ||= []
|
|
@@ -792,6 +894,10 @@ module Cataract
|
|
|
792
894
|
# Add nested rules to main rules array and update media_index
|
|
793
895
|
nested_result[:rules].each do |rule|
|
|
794
896
|
rule.id = @rule_id_counter
|
|
897
|
+
# Update selector_list_id if applicable
|
|
898
|
+
if rule.is_a?(Rule) && rule.selector_list_id
|
|
899
|
+
rule.selector_list_id += list_id_offset
|
|
900
|
+
end
|
|
795
901
|
|
|
796
902
|
# Extract media types and add to each first (if different from full query)
|
|
797
903
|
# We add these BEFORE the full query so that when iterating the media_index hash,
|
|
@@ -856,7 +962,11 @@ module Cataract
|
|
|
856
962
|
|
|
857
963
|
# Parse keyframe blocks as rules (0%/from/to etc)
|
|
858
964
|
# Create a nested parser context
|
|
859
|
-
nested_parser = Parser.new(
|
|
965
|
+
nested_parser = Parser.new(
|
|
966
|
+
byteslice_encoded(block_start, block_end - block_start),
|
|
967
|
+
parser_options: @parser_options,
|
|
968
|
+
depth: @depth + 1
|
|
969
|
+
)
|
|
860
970
|
nested_result = nested_parser.parse
|
|
861
971
|
content = nested_result[:rules]
|
|
862
972
|
|
|
@@ -1096,7 +1206,7 @@ module Cataract
|
|
|
1096
1206
|
result = String.new
|
|
1097
1207
|
result << parent_selector
|
|
1098
1208
|
result << ' '
|
|
1099
|
-
result << nested_selector.byteslice(start_pos
|
|
1209
|
+
result << nested_selector.byteslice(start_pos, nested_selector.bytesize - start_pos)
|
|
1100
1210
|
|
|
1101
1211
|
[result, nesting_style]
|
|
1102
1212
|
end
|
|
@@ -1120,7 +1230,8 @@ module Cataract
|
|
|
1120
1230
|
# If child is a condition (contains ':'), wrap it in parentheses
|
|
1121
1231
|
combined += if child_str.include?(':')
|
|
1122
1232
|
# Add parens if not already present
|
|
1123
|
-
|
|
1233
|
+
len = child_str.bytesize
|
|
1234
|
+
if len > 1 && child_str.getbyte(0) == BYTE_LPAREN && child_str.getbyte(len - 1) == BYTE_RPAREN
|
|
1124
1235
|
child_str
|
|
1125
1236
|
else
|
|
1126
1237
|
"(#{child_str})"
|
|
@@ -1282,56 +1393,9 @@ module Cataract
|
|
|
1282
1393
|
end
|
|
1283
1394
|
break if pos >= end_pos
|
|
1284
1395
|
|
|
1285
|
-
# Parse
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
pos += 1
|
|
1289
|
-
end
|
|
1290
|
-
|
|
1291
|
-
# Skip if no colon found (malformed)
|
|
1292
|
-
if pos >= end_pos || @css.getbyte(pos) != BYTE_COLON
|
|
1293
|
-
# Try to recover by finding next semicolon
|
|
1294
|
-
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON
|
|
1295
|
-
pos += 1
|
|
1296
|
-
end
|
|
1297
|
-
pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
|
|
1298
|
-
next
|
|
1299
|
-
end
|
|
1300
|
-
|
|
1301
|
-
prop_end = pos
|
|
1302
|
-
# Trim trailing whitespace from property
|
|
1303
|
-
while prop_end > prop_start && whitespace?(@css.getbyte(prop_end - 1))
|
|
1304
|
-
prop_end -= 1
|
|
1305
|
-
end
|
|
1306
|
-
|
|
1307
|
-
property = byteslice_encoded(prop_start, prop_end - prop_start, encoding: 'US-ASCII')
|
|
1308
|
-
property.downcase!
|
|
1309
|
-
|
|
1310
|
-
pos += 1 # Skip ':'
|
|
1311
|
-
|
|
1312
|
-
# Skip leading whitespace in value
|
|
1313
|
-
while pos < end_pos && whitespace?(@css.getbyte(pos))
|
|
1314
|
-
pos += 1
|
|
1315
|
-
end
|
|
1316
|
-
|
|
1317
|
-
# Parse value (read until ';' or '}')
|
|
1318
|
-
val_start = pos
|
|
1319
|
-
while pos < end_pos && @css.getbyte(pos) != BYTE_SEMICOLON && @css.getbyte(pos) != BYTE_RBRACE
|
|
1320
|
-
pos += 1
|
|
1321
|
-
end
|
|
1322
|
-
val_end = pos
|
|
1323
|
-
|
|
1324
|
-
# Trim trailing whitespace from value
|
|
1325
|
-
while val_end > val_start && whitespace?(@css.getbyte(val_end - 1))
|
|
1326
|
-
val_end -= 1
|
|
1327
|
-
end
|
|
1328
|
-
|
|
1329
|
-
value = byteslice_encoded(val_start, val_end - val_start)
|
|
1330
|
-
|
|
1331
|
-
pos += 1 if pos < end_pos && @css.getbyte(pos) == BYTE_SEMICOLON
|
|
1332
|
-
|
|
1333
|
-
# Create Declaration struct (at-rules don't use !important)
|
|
1334
|
-
declarations << Declaration.new(property, value, false)
|
|
1396
|
+
# Parse declaration using shared helper (at-rules don't use !important)
|
|
1397
|
+
decl, pos = parse_single_declaration(pos, end_pos, false)
|
|
1398
|
+
declarations << decl if decl
|
|
1335
1399
|
end
|
|
1336
1400
|
|
|
1337
1401
|
declarations
|