cataract 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci-manual-rubies.yml +27 -0
- data/.overcommit.yml +1 -1
- data/.rubocop.yml +62 -0
- data/.rubocop_todo.yml +186 -0
- data/BENCHMARKS.md +60 -139
- data/CHANGELOG.md +14 -0
- data/README.md +30 -2
- data/Rakefile +49 -22
- data/cataract.gemspec +4 -1
- data/ext/cataract/cataract.c +47 -47
- data/ext/cataract/css_parser.c +17 -33
- data/ext/cataract/merge.c +58 -2
- data/lib/cataract/at_rule.rb +8 -9
- data/lib/cataract/declaration.rb +18 -0
- data/lib/cataract/import_resolver.rb +3 -4
- data/lib/cataract/pure/byte_constants.rb +69 -0
- data/lib/cataract/pure/helpers.rb +35 -0
- data/lib/cataract/pure/imports.rb +255 -0
- data/lib/cataract/pure/merge.rb +1146 -0
- data/lib/cataract/pure/parser.rb +1236 -0
- data/lib/cataract/pure/serializer.rb +590 -0
- data/lib/cataract/pure/specificity.rb +206 -0
- data/lib/cataract/pure.rb +130 -0
- data/lib/cataract/rule.rb +22 -13
- data/lib/cataract/stylesheet.rb +14 -9
- data/lib/cataract/version.rb +1 -1
- data/lib/cataract.rb +18 -5
- metadata +12 -25
- data/benchmarks/benchmark_harness.rb +0 -193
- data/benchmarks/benchmark_merging.rb +0 -121
- data/benchmarks/benchmark_optimization_comparison.rb +0 -168
- data/benchmarks/benchmark_parsing.rb +0 -153
- data/benchmarks/benchmark_ragel_removal.rb +0 -56
- data/benchmarks/benchmark_runner.rb +0 -70
- data/benchmarks/benchmark_serialization.rb +0 -180
- data/benchmarks/benchmark_shorthand.rb +0 -109
- data/benchmarks/benchmark_shorthand_expansion.rb +0 -176
- data/benchmarks/benchmark_specificity.rb +0 -124
- data/benchmarks/benchmark_string_allocation.rb +0 -151
- data/benchmarks/benchmark_stylesheet_to_s.rb +0 -62
- data/benchmarks/benchmark_to_s_cached.rb +0 -55
- data/benchmarks/benchmark_value_splitter.rb +0 -54
- data/benchmarks/benchmark_yjit.rb +0 -158
- data/benchmarks/benchmark_yjit_workers.rb +0 -61
- data/benchmarks/profile_to_s.rb +0 -23
- data/benchmarks/speedup_calculator.rb +0 -83
- data/benchmarks/system_metadata.rb +0 -81
- data/benchmarks/templates/benchmarks.md.erb +0 -221
- data/benchmarks/yjit_tests.rb +0 -141
- data/scripts/fuzzer/run.rb +0 -828
- data/scripts/fuzzer/worker.rb +0 -99
- data/scripts/generate_benchmarks_md.rb +0 -155
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Pure Ruby CSS parser - Helper methods
|
|
4
|
+
# NO REGEXP ALLOWED - char-by-char parsing only
|
|
5
|
+
|
|
6
|
+
module Cataract
|
|
7
|
+
# Check if a byte is whitespace (space, tab, newline, CR)
|
|
8
|
+
# @param byte [Integer] Byte value from String#getbyte
|
|
9
|
+
# @return [Boolean] true if whitespace
|
|
10
|
+
def self.is_whitespace?(byte)
|
|
11
|
+
byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Check if byte is a letter (a-z, A-Z)
|
|
15
|
+
# @param byte [Integer] Byte value from String#getbyte
|
|
16
|
+
# @return [Boolean] true if letter
|
|
17
|
+
def self.letter?(byte)
|
|
18
|
+
(byte >= BYTE_LOWER_A && byte <= BYTE_LOWER_Z) ||
|
|
19
|
+
(byte >= BYTE_UPPER_A && byte <= BYTE_UPPER_Z)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Check if byte is a digit (0-9)
|
|
23
|
+
# @param byte [Integer] Byte value from String#getbyte
|
|
24
|
+
# @return [Boolean] true if digit
|
|
25
|
+
def self.digit?(byte)
|
|
26
|
+
byte >= BYTE_DIGIT_0 && byte <= BYTE_DIGIT_9
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Check if byte is alphanumeric, hyphen, or underscore (CSS identifier char)
|
|
30
|
+
# @param byte [Integer] Byte value from String#getbyte
|
|
31
|
+
# @return [Boolean] true if valid identifier character
|
|
32
|
+
def self.ident_char?(byte)
|
|
33
|
+
letter?(byte) || digit?(byte) || byte == BYTE_HYPHEN || byte == BYTE_UNDERSCORE
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Pure Ruby CSS parser - Import extraction
|
|
4
|
+
# NO REGEXP ALLOWED - char-by-char parsing only
|
|
5
|
+
|
|
6
|
+
module Cataract
|
|
7
|
+
# Helper: Case-insensitive ASCII byte comparison
|
|
8
|
+
# Compares bytes at given position with ASCII pattern (case-insensitive)
|
|
9
|
+
# Safe to use even if position is in middle of multi-byte UTF-8 characters
|
|
10
|
+
# Returns true if match, false otherwise
|
|
11
|
+
def self.match_ascii_ci?(str, pos, pattern)
|
|
12
|
+
pattern_len = pattern.bytesize
|
|
13
|
+
return false if pos + pattern_len > str.bytesize
|
|
14
|
+
|
|
15
|
+
i = 0
|
|
16
|
+
while i < pattern_len
|
|
17
|
+
str_byte = str.getbyte(pos + i)
|
|
18
|
+
pat_byte = pattern.getbyte(i)
|
|
19
|
+
|
|
20
|
+
# Convert both to lowercase for comparison (ASCII only: A-Z -> a-z)
|
|
21
|
+
str_byte += BYTE_CASE_DIFF if str_byte >= BYTE_UPPER_A && str_byte <= BYTE_UPPER_Z
|
|
22
|
+
pat_byte += BYTE_CASE_DIFF if pat_byte >= BYTE_UPPER_A && pat_byte <= BYTE_UPPER_Z
|
|
23
|
+
|
|
24
|
+
return false if str_byte != pat_byte
|
|
25
|
+
|
|
26
|
+
i += 1
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
true
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Extract @import statements from CSS
|
|
33
|
+
#
|
|
34
|
+
# @param css_string [String] CSS to scan for @imports
|
|
35
|
+
# @return [Array<Hash>] Array of import hashes with :url, :media, :full_match
|
|
36
|
+
def self.extract_imports(css_string)
|
|
37
|
+
imports = []
|
|
38
|
+
|
|
39
|
+
i = 0
|
|
40
|
+
len = css_string.length
|
|
41
|
+
|
|
42
|
+
while i < len
|
|
43
|
+
# Skip whitespace and comments
|
|
44
|
+
while i < len
|
|
45
|
+
byte = css_string.getbyte(i)
|
|
46
|
+
if is_whitespace?(byte)
|
|
47
|
+
i += 1
|
|
48
|
+
elsif i + 1 < len && css_string.getbyte(i) == BYTE_SLASH && css_string.getbyte(i + 1) == BYTE_STAR
|
|
49
|
+
# Skip /* */ comment
|
|
50
|
+
i += 2
|
|
51
|
+
while i + 1 < len && !(css_string.getbyte(i) == BYTE_STAR && css_string.getbyte(i + 1) == BYTE_SLASH)
|
|
52
|
+
i += 1
|
|
53
|
+
end
|
|
54
|
+
i += 2 if i + 1 < len # Skip */
|
|
55
|
+
else
|
|
56
|
+
break
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
break if i >= len
|
|
61
|
+
|
|
62
|
+
# Check for @import (case-insensitive byte comparison)
|
|
63
|
+
if match_ascii_ci?(css_string, i, '@import')
|
|
64
|
+
import_start = i
|
|
65
|
+
i += 7
|
|
66
|
+
|
|
67
|
+
# Skip whitespace after @import
|
|
68
|
+
while i < len && is_whitespace?(css_string.getbyte(i))
|
|
69
|
+
i += 1
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Check for optional url( (case-insensitive byte comparison)
|
|
73
|
+
has_url_function = false
|
|
74
|
+
if match_ascii_ci?(css_string, i, 'url(')
|
|
75
|
+
has_url_function = true
|
|
76
|
+
i += 4
|
|
77
|
+
while i < len && is_whitespace?(css_string.getbyte(i))
|
|
78
|
+
i += 1
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Find opening quote
|
|
83
|
+
byte = css_string.getbyte(i) if i < len
|
|
84
|
+
if i >= len || (byte != BYTE_DQUOTE && byte != BYTE_SQUOTE)
|
|
85
|
+
# Invalid @import, skip to next semicolon
|
|
86
|
+
while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
|
|
87
|
+
i += 1
|
|
88
|
+
end
|
|
89
|
+
i += 1 if i < len # Skip semicolon
|
|
90
|
+
next
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
quote_char = byte
|
|
94
|
+
i += 1 # Skip opening quote
|
|
95
|
+
|
|
96
|
+
url_start = i
|
|
97
|
+
|
|
98
|
+
# Find closing quote (handle escaped quotes)
|
|
99
|
+
while i < len && css_string.getbyte(i) != quote_char
|
|
100
|
+
if css_string.getbyte(i) == BYTE_BACKSLASH && i + 1 < len
|
|
101
|
+
i += 2 # Skip escaped character
|
|
102
|
+
else
|
|
103
|
+
i += 1
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
break if i >= len # Unterminated string
|
|
108
|
+
|
|
109
|
+
url_end = i
|
|
110
|
+
i += 1 # Skip closing quote
|
|
111
|
+
|
|
112
|
+
# Skip closing paren if we had url(
|
|
113
|
+
if has_url_function
|
|
114
|
+
while i < len && is_whitespace?(css_string.getbyte(i))
|
|
115
|
+
i += 1
|
|
116
|
+
end
|
|
117
|
+
if i < len && css_string.getbyte(i) == BYTE_RPAREN
|
|
118
|
+
i += 1
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Skip whitespace before optional media query or semicolon
|
|
123
|
+
while i < len && is_whitespace?(css_string.getbyte(i))
|
|
124
|
+
i += 1
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Check for optional media query (everything until semicolon)
|
|
128
|
+
media_start = nil
|
|
129
|
+
media_end = nil
|
|
130
|
+
|
|
131
|
+
if i < len && css_string.getbyte(i) != BYTE_SEMICOLON
|
|
132
|
+
media_start = i
|
|
133
|
+
|
|
134
|
+
# Find semicolon
|
|
135
|
+
while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
|
|
136
|
+
i += 1
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
media_end = i
|
|
140
|
+
|
|
141
|
+
# Trim trailing whitespace from media query
|
|
142
|
+
while media_end > media_start && is_whitespace?(css_string.getbyte(media_end - 1))
|
|
143
|
+
media_end -= 1
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Skip semicolon
|
|
148
|
+
i += 1 if i < len && css_string.getbyte(i) == BYTE_SEMICOLON
|
|
149
|
+
|
|
150
|
+
import_end = i
|
|
151
|
+
|
|
152
|
+
# Build result hash
|
|
153
|
+
url = css_string[url_start...url_end]
|
|
154
|
+
media = media_start && media_end > media_start ? css_string[media_start...media_end] : nil
|
|
155
|
+
full_match = css_string[import_start...import_end]
|
|
156
|
+
|
|
157
|
+
imports << { url: url, media: media, full_match: full_match }
|
|
158
|
+
else
|
|
159
|
+
i += 1
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
imports
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Parse media query symbol into array of media types
|
|
167
|
+
#
|
|
168
|
+
# @param media_query_sym [Symbol] Media query as symbol (e.g., :screen, :"print, screen")
|
|
169
|
+
# @return [Array<Symbol>] Array of individual media types
|
|
170
|
+
#
|
|
171
|
+
# @example
|
|
172
|
+
# parse_media_types(:screen) #=> [:screen]
|
|
173
|
+
# parse_media_types(:"print, screen") #=> [:print, :screen]
|
|
174
|
+
def self.parse_media_types(media_query_sym)
|
|
175
|
+
query = media_query_sym.to_s
|
|
176
|
+
types = []
|
|
177
|
+
|
|
178
|
+
i = 0
|
|
179
|
+
len = query.length
|
|
180
|
+
|
|
181
|
+
kwords = %w[and or not only]
|
|
182
|
+
|
|
183
|
+
while i < len
|
|
184
|
+
# Skip whitespace
|
|
185
|
+
while i < len && is_whitespace?(query.getbyte(i))
|
|
186
|
+
i += 1
|
|
187
|
+
end
|
|
188
|
+
break if i >= len
|
|
189
|
+
|
|
190
|
+
# Check for opening paren - skip conditions like "(min-width: 768px)"
|
|
191
|
+
if query.getbyte(i) == BYTE_LPAREN
|
|
192
|
+
# Skip to matching closing paren
|
|
193
|
+
paren_depth = 1
|
|
194
|
+
i += 1
|
|
195
|
+
while i < len && paren_depth > 0
|
|
196
|
+
byte = query.getbyte(i)
|
|
197
|
+
if byte == BYTE_LPAREN
|
|
198
|
+
paren_depth += 1
|
|
199
|
+
elsif byte == BYTE_RPAREN
|
|
200
|
+
paren_depth -= 1
|
|
201
|
+
end
|
|
202
|
+
i += 1
|
|
203
|
+
end
|
|
204
|
+
next
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Find end of word (media type or keyword)
|
|
208
|
+
word_start = i
|
|
209
|
+
byte = query.getbyte(i)
|
|
210
|
+
while i < len && !is_whitespace?(byte) && byte != BYTE_COMMA && byte != BYTE_LPAREN && byte != BYTE_COLON
|
|
211
|
+
i += 1
|
|
212
|
+
byte = query.getbyte(i) if i < len
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
if i > word_start
|
|
216
|
+
word = query[word_start...i]
|
|
217
|
+
|
|
218
|
+
# Check if this is a media feature (followed by ':')
|
|
219
|
+
is_media_feature = (i < len && query.getbyte(i) == BYTE_COLON)
|
|
220
|
+
|
|
221
|
+
# Check if it's a keyword (and, or, not, only)
|
|
222
|
+
is_keyword = kwords.include?(word)
|
|
223
|
+
|
|
224
|
+
if !is_keyword && !is_media_feature
|
|
225
|
+
# This is a media type - add it as symbol
|
|
226
|
+
types << word.to_sym
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# Skip to comma or end
|
|
231
|
+
while i < len && query.getbyte(i) != BYTE_COMMA
|
|
232
|
+
if query.getbyte(i) == BYTE_LPAREN
|
|
233
|
+
# Skip condition
|
|
234
|
+
paren_depth = 1
|
|
235
|
+
i += 1
|
|
236
|
+
while i < len && paren_depth > 0
|
|
237
|
+
byte = query.getbyte(i)
|
|
238
|
+
if byte == BYTE_LPAREN
|
|
239
|
+
paren_depth += 1
|
|
240
|
+
elsif byte == BYTE_RPAREN
|
|
241
|
+
paren_depth -= 1
|
|
242
|
+
end
|
|
243
|
+
i += 1
|
|
244
|
+
end
|
|
245
|
+
else
|
|
246
|
+
i += 1
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
i += 1 if i < len && query.getbyte(i) == BYTE_COMMA # Skip comma
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
types
|
|
254
|
+
end
|
|
255
|
+
end
|