cataract 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci-manual-rubies.yml +27 -0
  3. data/.overcommit.yml +1 -1
  4. data/.rubocop.yml +62 -0
  5. data/.rubocop_todo.yml +186 -0
  6. data/BENCHMARKS.md +60 -139
  7. data/CHANGELOG.md +10 -0
  8. data/README.md +30 -2
  9. data/Rakefile +49 -22
  10. data/cataract.gemspec +4 -1
  11. data/ext/cataract/cataract.c +47 -47
  12. data/ext/cataract/css_parser.c +17 -33
  13. data/ext/cataract/merge.c +6 -0
  14. data/lib/cataract/at_rule.rb +8 -9
  15. data/lib/cataract/declaration.rb +18 -0
  16. data/lib/cataract/import_resolver.rb +3 -4
  17. data/lib/cataract/pure/byte_constants.rb +69 -0
  18. data/lib/cataract/pure/helpers.rb +35 -0
  19. data/lib/cataract/pure/imports.rb +255 -0
  20. data/lib/cataract/pure/merge.rb +1146 -0
  21. data/lib/cataract/pure/parser.rb +1236 -0
  22. data/lib/cataract/pure/serializer.rb +590 -0
  23. data/lib/cataract/pure/specificity.rb +206 -0
  24. data/lib/cataract/pure.rb +130 -0
  25. data/lib/cataract/rule.rb +22 -13
  26. data/lib/cataract/stylesheet.rb +14 -9
  27. data/lib/cataract/version.rb +1 -1
  28. data/lib/cataract.rb +18 -5
  29. metadata +12 -25
  30. data/benchmarks/benchmark_harness.rb +0 -193
  31. data/benchmarks/benchmark_merging.rb +0 -121
  32. data/benchmarks/benchmark_optimization_comparison.rb +0 -168
  33. data/benchmarks/benchmark_parsing.rb +0 -153
  34. data/benchmarks/benchmark_ragel_removal.rb +0 -56
  35. data/benchmarks/benchmark_runner.rb +0 -70
  36. data/benchmarks/benchmark_serialization.rb +0 -180
  37. data/benchmarks/benchmark_shorthand.rb +0 -109
  38. data/benchmarks/benchmark_shorthand_expansion.rb +0 -176
  39. data/benchmarks/benchmark_specificity.rb +0 -124
  40. data/benchmarks/benchmark_string_allocation.rb +0 -151
  41. data/benchmarks/benchmark_stylesheet_to_s.rb +0 -62
  42. data/benchmarks/benchmark_to_s_cached.rb +0 -55
  43. data/benchmarks/benchmark_value_splitter.rb +0 -54
  44. data/benchmarks/benchmark_yjit.rb +0 -158
  45. data/benchmarks/benchmark_yjit_workers.rb +0 -61
  46. data/benchmarks/profile_to_s.rb +0 -23
  47. data/benchmarks/speedup_calculator.rb +0 -83
  48. data/benchmarks/system_metadata.rb +0 -81
  49. data/benchmarks/templates/benchmarks.md.erb +0 -221
  50. data/benchmarks/yjit_tests.rb +0 -141
  51. data/scripts/fuzzer/run.rb +0 -828
  52. data/scripts/fuzzer/worker.rb +0 -99
  53. data/scripts/generate_benchmarks_md.rb +0 -155
@@ -0,0 +1,255 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Pure Ruby CSS parser - Import extraction
4
+ # NO REGEXP ALLOWED - char-by-char parsing only
5
+
6
+ module Cataract
7
+ # Helper: Case-insensitive ASCII byte comparison
8
+ # Compares bytes at given position with ASCII pattern (case-insensitive)
9
+ # Safe to use even if position is in middle of multi-byte UTF-8 characters
10
+ # Returns true if match, false otherwise
11
+ def self.match_ascii_ci?(str, pos, pattern)
12
+ pattern_len = pattern.bytesize
13
+ return false if pos + pattern_len > str.bytesize
14
+
15
+ i = 0
16
+ while i < pattern_len
17
+ str_byte = str.getbyte(pos + i)
18
+ pat_byte = pattern.getbyte(i)
19
+
20
+ # Convert both to lowercase for comparison (ASCII only: A-Z -> a-z)
21
+ str_byte += BYTE_CASE_DIFF if str_byte >= BYTE_UPPER_A && str_byte <= BYTE_UPPER_Z
22
+ pat_byte += BYTE_CASE_DIFF if pat_byte >= BYTE_UPPER_A && pat_byte <= BYTE_UPPER_Z
23
+
24
+ return false if str_byte != pat_byte
25
+
26
+ i += 1
27
+ end
28
+
29
+ true
30
+ end
31
+
32
+ # Extract @import statements from CSS
33
+ #
34
+ # @param css_string [String] CSS to scan for @imports
35
+ # @return [Array<Hash>] Array of import hashes with :url, :media, :full_match
36
+ def self.extract_imports(css_string)
37
+ imports = []
38
+
39
+ i = 0
40
+ len = css_string.length
41
+
42
+ while i < len
43
+ # Skip whitespace and comments
44
+ while i < len
45
+ byte = css_string.getbyte(i)
46
+ if is_whitespace?(byte)
47
+ i += 1
48
+ elsif i + 1 < len && css_string.getbyte(i) == BYTE_SLASH && css_string.getbyte(i + 1) == BYTE_STAR
49
+ # Skip /* */ comment
50
+ i += 2
51
+ while i + 1 < len && !(css_string.getbyte(i) == BYTE_STAR && css_string.getbyte(i + 1) == BYTE_SLASH)
52
+ i += 1
53
+ end
54
+ i += 2 if i + 1 < len # Skip */
55
+ else
56
+ break
57
+ end
58
+ end
59
+
60
+ break if i >= len
61
+
62
+ # Check for @import (case-insensitive byte comparison)
63
+ if match_ascii_ci?(css_string, i, '@import')
64
+ import_start = i
65
+ i += 7
66
+
67
+ # Skip whitespace after @import
68
+ while i < len && is_whitespace?(css_string.getbyte(i))
69
+ i += 1
70
+ end
71
+
72
+ # Check for optional url( (case-insensitive byte comparison)
73
+ has_url_function = false
74
+ if match_ascii_ci?(css_string, i, 'url(')
75
+ has_url_function = true
76
+ i += 4
77
+ while i < len && is_whitespace?(css_string.getbyte(i))
78
+ i += 1
79
+ end
80
+ end
81
+
82
+ # Find opening quote
83
+ byte = css_string.getbyte(i) if i < len
84
+ if i >= len || (byte != BYTE_DQUOTE && byte != BYTE_SQUOTE)
85
+ # Invalid @import, skip to next semicolon
86
+ while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
87
+ i += 1
88
+ end
89
+ i += 1 if i < len # Skip semicolon
90
+ next
91
+ end
92
+
93
+ quote_char = byte
94
+ i += 1 # Skip opening quote
95
+
96
+ url_start = i
97
+
98
+ # Find closing quote (handle escaped quotes)
99
+ while i < len && css_string.getbyte(i) != quote_char
100
+ if css_string.getbyte(i) == BYTE_BACKSLASH && i + 1 < len
101
+ i += 2 # Skip escaped character
102
+ else
103
+ i += 1
104
+ end
105
+ end
106
+
107
+ break if i >= len # Unterminated string
108
+
109
+ url_end = i
110
+ i += 1 # Skip closing quote
111
+
112
+ # Skip closing paren if we had url(
113
+ if has_url_function
114
+ while i < len && is_whitespace?(css_string.getbyte(i))
115
+ i += 1
116
+ end
117
+ if i < len && css_string.getbyte(i) == BYTE_RPAREN
118
+ i += 1
119
+ end
120
+ end
121
+
122
+ # Skip whitespace before optional media query or semicolon
123
+ while i < len && is_whitespace?(css_string.getbyte(i))
124
+ i += 1
125
+ end
126
+
127
+ # Check for optional media query (everything until semicolon)
128
+ media_start = nil
129
+ media_end = nil
130
+
131
+ if i < len && css_string.getbyte(i) != BYTE_SEMICOLON
132
+ media_start = i
133
+
134
+ # Find semicolon
135
+ while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
136
+ i += 1
137
+ end
138
+
139
+ media_end = i
140
+
141
+ # Trim trailing whitespace from media query
142
+ while media_end > media_start && is_whitespace?(css_string.getbyte(media_end - 1))
143
+ media_end -= 1
144
+ end
145
+ end
146
+
147
+ # Skip semicolon
148
+ i += 1 if i < len && css_string.getbyte(i) == BYTE_SEMICOLON
149
+
150
+ import_end = i
151
+
152
+ # Build result hash
153
+ url = css_string[url_start...url_end]
154
+ media = media_start && media_end > media_start ? css_string[media_start...media_end] : nil
155
+ full_match = css_string[import_start...import_end]
156
+
157
+ imports << { url: url, media: media, full_match: full_match }
158
+ else
159
+ i += 1
160
+ end
161
+ end
162
+
163
+ imports
164
+ end
165
+
166
+ # Parse media query symbol into array of media types
167
+ #
168
+ # @param media_query_sym [Symbol] Media query as symbol (e.g., :screen, :"print, screen")
169
+ # @return [Array<Symbol>] Array of individual media types
170
+ #
171
+ # @example
172
+ # parse_media_types(:screen) #=> [:screen]
173
+ # parse_media_types(:"print, screen") #=> [:print, :screen]
174
+ def self.parse_media_types(media_query_sym)
175
+ query = media_query_sym.to_s
176
+ types = []
177
+
178
+ i = 0
179
+ len = query.length
180
+
181
+ kwords = %w[and or not only]
182
+
183
+ while i < len
184
+ # Skip whitespace
185
+ while i < len && is_whitespace?(query.getbyte(i))
186
+ i += 1
187
+ end
188
+ break if i >= len
189
+
190
+ # Check for opening paren - skip conditions like "(min-width: 768px)"
191
+ if query.getbyte(i) == BYTE_LPAREN
192
+ # Skip to matching closing paren
193
+ paren_depth = 1
194
+ i += 1
195
+ while i < len && paren_depth > 0
196
+ byte = query.getbyte(i)
197
+ if byte == BYTE_LPAREN
198
+ paren_depth += 1
199
+ elsif byte == BYTE_RPAREN
200
+ paren_depth -= 1
201
+ end
202
+ i += 1
203
+ end
204
+ next
205
+ end
206
+
207
+ # Find end of word (media type or keyword)
208
+ word_start = i
209
+ byte = query.getbyte(i)
210
+ while i < len && !is_whitespace?(byte) && byte != BYTE_COMMA && byte != BYTE_LPAREN && byte != BYTE_COLON
211
+ i += 1
212
+ byte = query.getbyte(i) if i < len
213
+ end
214
+
215
+ if i > word_start
216
+ word = query[word_start...i]
217
+
218
+ # Check if this is a media feature (followed by ':')
219
+ is_media_feature = (i < len && query.getbyte(i) == BYTE_COLON)
220
+
221
+ # Check if it's a keyword (and, or, not, only)
222
+ is_keyword = kwords.include?(word)
223
+
224
+ if !is_keyword && !is_media_feature
225
+ # This is a media type - add it as symbol
226
+ types << word.to_sym
227
+ end
228
+ end
229
+
230
+ # Skip to comma or end
231
+ while i < len && query.getbyte(i) != BYTE_COMMA
232
+ if query.getbyte(i) == BYTE_LPAREN
233
+ # Skip condition
234
+ paren_depth = 1
235
+ i += 1
236
+ while i < len && paren_depth > 0
237
+ byte = query.getbyte(i)
238
+ if byte == BYTE_LPAREN
239
+ paren_depth += 1
240
+ elsif byte == BYTE_RPAREN
241
+ paren_depth -= 1
242
+ end
243
+ i += 1
244
+ end
245
+ else
246
+ i += 1
247
+ end
248
+ end
249
+
250
+ i += 1 if i < len && query.getbyte(i) == BYTE_COMMA # Skip comma
251
+ end
252
+
253
+ types
254
+ end
255
+ end