cataract 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci-manual-rubies.yml +44 -0
  3. data/.overcommit.yml +1 -1
  4. data/.rubocop.yml +96 -4
  5. data/.rubocop_todo.yml +186 -0
  6. data/BENCHMARKS.md +62 -141
  7. data/CHANGELOG.md +20 -0
  8. data/RAGEL_MIGRATION.md +2 -2
  9. data/README.md +37 -4
  10. data/Rakefile +72 -32
  11. data/cataract.gemspec +4 -1
  12. data/ext/cataract/cataract.c +59 -50
  13. data/ext/cataract/cataract.h +5 -3
  14. data/ext/cataract/css_parser.c +173 -65
  15. data/ext/cataract/extconf.rb +2 -2
  16. data/ext/cataract/{merge.c → flatten.c} +526 -468
  17. data/ext/cataract/shorthand_expander.c +164 -115
  18. data/lib/cataract/at_rule.rb +8 -9
  19. data/lib/cataract/declaration.rb +18 -0
  20. data/lib/cataract/import_resolver.rb +63 -43
  21. data/lib/cataract/import_statement.rb +49 -0
  22. data/lib/cataract/pure/byte_constants.rb +69 -0
  23. data/lib/cataract/pure/flatten.rb +1145 -0
  24. data/lib/cataract/pure/helpers.rb +35 -0
  25. data/lib/cataract/pure/imports.rb +268 -0
  26. data/lib/cataract/pure/parser.rb +1340 -0
  27. data/lib/cataract/pure/serializer.rb +590 -0
  28. data/lib/cataract/pure/specificity.rb +206 -0
  29. data/lib/cataract/pure.rb +153 -0
  30. data/lib/cataract/rule.rb +69 -15
  31. data/lib/cataract/stylesheet.rb +356 -49
  32. data/lib/cataract/version.rb +1 -1
  33. data/lib/cataract.rb +43 -26
  34. metadata +14 -26
  35. data/benchmarks/benchmark_harness.rb +0 -193
  36. data/benchmarks/benchmark_merging.rb +0 -121
  37. data/benchmarks/benchmark_optimization_comparison.rb +0 -168
  38. data/benchmarks/benchmark_parsing.rb +0 -153
  39. data/benchmarks/benchmark_ragel_removal.rb +0 -56
  40. data/benchmarks/benchmark_runner.rb +0 -70
  41. data/benchmarks/benchmark_serialization.rb +0 -180
  42. data/benchmarks/benchmark_shorthand.rb +0 -109
  43. data/benchmarks/benchmark_shorthand_expansion.rb +0 -176
  44. data/benchmarks/benchmark_specificity.rb +0 -124
  45. data/benchmarks/benchmark_string_allocation.rb +0 -151
  46. data/benchmarks/benchmark_stylesheet_to_s.rb +0 -62
  47. data/benchmarks/benchmark_to_s_cached.rb +0 -55
  48. data/benchmarks/benchmark_value_splitter.rb +0 -54
  49. data/benchmarks/benchmark_yjit.rb +0 -158
  50. data/benchmarks/benchmark_yjit_workers.rb +0 -61
  51. data/benchmarks/profile_to_s.rb +0 -23
  52. data/benchmarks/speedup_calculator.rb +0 -83
  53. data/benchmarks/system_metadata.rb +0 -81
  54. data/benchmarks/templates/benchmarks.md.erb +0 -221
  55. data/benchmarks/yjit_tests.rb +0 -141
  56. data/scripts/fuzzer/run.rb +0 -828
  57. data/scripts/fuzzer/worker.rb +0 -99
  58. data/scripts/generate_benchmarks_md.rb +0 -155
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Pure Ruby CSS parser - Helper methods
4
+ # NO REGEXP ALLOWED - char-by-char parsing only
5
+
6
+ module Cataract
7
+ # Check if a byte is whitespace (space, tab, newline, CR)
8
+ # @param byte [Integer] Byte value from String#getbyte
9
+ # @return [Boolean] true if whitespace
10
+ def self.is_whitespace?(byte)
11
+ byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR
12
+ end
13
+
14
+ # Check if byte is a letter (a-z, A-Z)
15
+ # @param byte [Integer] Byte value from String#getbyte
16
+ # @return [Boolean] true if letter
17
+ def self.letter?(byte)
18
+ (byte >= BYTE_LOWER_A && byte <= BYTE_LOWER_Z) ||
19
+ (byte >= BYTE_UPPER_A && byte <= BYTE_UPPER_Z)
20
+ end
21
+
22
+ # Check if byte is a digit (0-9)
23
+ # @param byte [Integer] Byte value from String#getbyte
24
+ # @return [Boolean] true if digit
25
+ def self.digit?(byte)
26
+ byte >= BYTE_DIGIT_0 && byte <= BYTE_DIGIT_9
27
+ end
28
+
29
+ # Check if byte is alphanumeric, hyphen, or underscore (CSS identifier char)
30
+ # @param byte [Integer] Byte value from String#getbyte
31
+ # @return [Boolean] true if valid identifier character
32
+ def self.ident_char?(byte)
33
+ letter?(byte) || digit?(byte) || byte == BYTE_HYPHEN || byte == BYTE_UNDERSCORE
34
+ end
35
+ end
@@ -0,0 +1,268 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Pure Ruby CSS parser - Import extraction
4
+ # NO REGEXP ALLOWED - char-by-char parsing only
5
+
6
+ module Cataract
7
+ # Helper: Case-insensitive ASCII byte comparison
8
+ # Compares bytes at given position with ASCII pattern (case-insensitive)
9
+ # Safe to use even if position is in middle of multi-byte UTF-8 characters
10
+ # Returns true if match, false otherwise
11
+ def self.match_ascii_ci?(str, pos, pattern)
12
+ pattern_len = pattern.bytesize
13
+ return false if pos + pattern_len > str.bytesize
14
+
15
+ i = 0
16
+ while i < pattern_len
17
+ str_byte = str.getbyte(pos + i)
18
+ pat_byte = pattern.getbyte(i)
19
+
20
+ # Convert both to lowercase for comparison (ASCII only: A-Z -> a-z)
21
+ str_byte += BYTE_CASE_DIFF if str_byte >= BYTE_UPPER_A && str_byte <= BYTE_UPPER_Z
22
+ pat_byte += BYTE_CASE_DIFF if pat_byte >= BYTE_UPPER_A && pat_byte <= BYTE_UPPER_Z
23
+
24
+ return false if str_byte != pat_byte
25
+
26
+ i += 1
27
+ end
28
+
29
+ true
30
+ end
31
+
32
+ # Extract @import statements from CSS
33
+ #
34
+ # @param css_string [String] CSS to scan for @imports
35
+ # @return [Array<Hash>] Array of import hashes with :url, :media, :full_match
36
+ def self.extract_imports(css_string)
37
+ imports = []
38
+
39
+ i = 0
40
+ len = css_string.length
41
+
42
+ while i < len
43
+ # Skip whitespace and comments
44
+ while i < len
45
+ byte = css_string.getbyte(i)
46
+ if is_whitespace?(byte)
47
+ i += 1
48
+ elsif i + 1 < len && css_string.getbyte(i) == BYTE_SLASH && css_string.getbyte(i + 1) == BYTE_STAR
49
+ # Skip /* */ comment
50
+ i += 2
51
+ while i + 1 < len && !(css_string.getbyte(i) == BYTE_STAR && css_string.getbyte(i + 1) == BYTE_SLASH)
52
+ i += 1
53
+ end
54
+ i += 2 if i + 1 < len # Skip */
55
+ else
56
+ break
57
+ end
58
+ end
59
+
60
+ break if i >= len
61
+
62
+ # Check for @import (case-insensitive byte comparison)
63
+ if match_ascii_ci?(css_string, i, '@import')
64
+ import_start = i
65
+ i += 7
66
+
67
+ # Skip whitespace after @import
68
+ while i < len && is_whitespace?(css_string.getbyte(i))
69
+ i += 1
70
+ end
71
+
72
+ # Check for optional url( (case-insensitive byte comparison)
73
+ has_url_function = false
74
+ if match_ascii_ci?(css_string, i, 'url(')
75
+ has_url_function = true
76
+ i += 4
77
+ while i < len && is_whitespace?(css_string.getbyte(i))
78
+ i += 1
79
+ end
80
+ end
81
+
82
+ # Find opening quote
83
+ byte = css_string.getbyte(i) if i < len
84
+ if i >= len || (byte != BYTE_DQUOTE && byte != BYTE_SQUOTE)
85
+ # Invalid @import, skip to next semicolon
86
+ while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
87
+ i += 1
88
+ end
89
+ i += 1 if i < len # Skip semicolon
90
+ next
91
+ end
92
+
93
+ quote_char = byte
94
+ i += 1 # Skip opening quote
95
+
96
+ url_start = i
97
+
98
+ # Find closing quote (handle escaped quotes)
99
+ while i < len && css_string.getbyte(i) != quote_char
100
+ if css_string.getbyte(i) == BYTE_BACKSLASH && i + 1 < len
101
+ i += 2 # Skip escaped character
102
+ else
103
+ i += 1
104
+ end
105
+ end
106
+
107
+ break if i >= len # Unterminated string
108
+
109
+ url_end = i
110
+ i += 1 # Skip closing quote
111
+
112
+ # Skip closing paren if we had url(
113
+ if has_url_function
114
+ while i < len && is_whitespace?(css_string.getbyte(i))
115
+ i += 1
116
+ end
117
+ if i < len && css_string.getbyte(i) == BYTE_RPAREN
118
+ i += 1
119
+ end
120
+ end
121
+
122
+ # Skip whitespace before optional media query or semicolon
123
+ while i < len && is_whitespace?(css_string.getbyte(i))
124
+ i += 1
125
+ end
126
+
127
+ # Check for optional media query (everything until semicolon)
128
+ media_start = nil
129
+ media_end = nil
130
+
131
+ if i < len && css_string.getbyte(i) != BYTE_SEMICOLON
132
+ media_start = i
133
+
134
+ # Find semicolon
135
+ while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
136
+ i += 1
137
+ end
138
+
139
+ media_end = i
140
+
141
+ # Trim trailing whitespace from media query
142
+ while media_end > media_start && is_whitespace?(css_string.getbyte(media_end - 1))
143
+ media_end -= 1
144
+ end
145
+ end
146
+
147
+ # Skip semicolon
148
+ i += 1 if i < len && css_string.getbyte(i) == BYTE_SEMICOLON
149
+
150
+ import_end = i
151
+
152
+ # Build result hash
153
+ url = css_string[url_start...url_end]
154
+ media = media_start && media_end > media_start ? css_string[media_start...media_end] : nil
155
+ full_match = css_string[import_start...import_end]
156
+
157
+ imports << { url: url, media: media, full_match: full_match }
158
+ elsif match_ascii_ci?(css_string, i, '@charset')
159
+ # Skip @charset if present - it can come before @import
160
+ while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
161
+ i += 1
162
+ end
163
+ i += 1 if i < len # Skip semicolon
164
+ else
165
+ # If we hit any other content (rules, other at-rules), stop scanning
166
+ # Per CSS spec, @import must be at the top (only @charset can come before)
167
+ byte = css_string.getbyte(i) if i < len
168
+ if i < len && !is_whitespace?(byte)
169
+ break
170
+ end
171
+
172
+ i += 1
173
+ end
174
+ end
175
+
176
+ imports
177
+ end
178
+
179
+ # Parse media query symbol into array of media types
180
+ #
181
+ # @param media_query_sym [Symbol] Media query as symbol (e.g., :screen, :"print, screen")
182
+ # @return [Array<Symbol>] Array of individual media types
183
+ #
184
+ # @example
185
+ # parse_media_types(:screen) #=> [:screen]
186
+ # parse_media_types(:"print, screen") #=> [:print, :screen]
187
+ def self.parse_media_types(media_query_sym)
188
+ query = media_query_sym.to_s
189
+ types = []
190
+
191
+ i = 0
192
+ len = query.length
193
+
194
+ kwords = %w[and or not only]
195
+
196
+ while i < len
197
+ # Skip whitespace
198
+ while i < len && is_whitespace?(query.getbyte(i))
199
+ i += 1
200
+ end
201
+ break if i >= len
202
+
203
+ # Check for opening paren - skip conditions like "(min-width: 768px)"
204
+ if query.getbyte(i) == BYTE_LPAREN
205
+ # Skip to matching closing paren
206
+ paren_depth = 1
207
+ i += 1
208
+ while i < len && paren_depth > 0
209
+ byte = query.getbyte(i)
210
+ if byte == BYTE_LPAREN
211
+ paren_depth += 1
212
+ elsif byte == BYTE_RPAREN
213
+ paren_depth -= 1
214
+ end
215
+ i += 1
216
+ end
217
+ next
218
+ end
219
+
220
+ # Find end of word (media type or keyword)
221
+ word_start = i
222
+ byte = query.getbyte(i)
223
+ while i < len && !is_whitespace?(byte) && byte != BYTE_COMMA && byte != BYTE_LPAREN && byte != BYTE_COLON
224
+ i += 1
225
+ byte = query.getbyte(i) if i < len
226
+ end
227
+
228
+ if i > word_start
229
+ word = query[word_start...i]
230
+
231
+ # Check if this is a media feature (followed by ':')
232
+ is_media_feature = (i < len && query.getbyte(i) == BYTE_COLON)
233
+
234
+ # Check if it's a keyword (and, or, not, only)
235
+ is_keyword = kwords.include?(word)
236
+
237
+ if !is_keyword && !is_media_feature
238
+ # This is a media type - add it as symbol
239
+ types << word.to_sym
240
+ end
241
+ end
242
+
243
+ # Skip to comma or end
244
+ while i < len && query.getbyte(i) != BYTE_COMMA
245
+ if query.getbyte(i) == BYTE_LPAREN
246
+ # Skip condition
247
+ paren_depth = 1
248
+ i += 1
249
+ while i < len && paren_depth > 0
250
+ byte = query.getbyte(i)
251
+ if byte == BYTE_LPAREN
252
+ paren_depth += 1
253
+ elsif byte == BYTE_RPAREN
254
+ paren_depth -= 1
255
+ end
256
+ i += 1
257
+ end
258
+ else
259
+ i += 1
260
+ end
261
+ end
262
+
263
+ i += 1 if i < len && query.getbyte(i) == BYTE_COMMA # Skip comma
264
+ end
265
+
266
+ types
267
+ end
268
+ end