cataract 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci-manual-rubies.yml +44 -0
  3. data/.overcommit.yml +1 -1
  4. data/.rubocop.yml +96 -4
  5. data/.rubocop_todo.yml +186 -0
  6. data/BENCHMARKS.md +62 -141
  7. data/CHANGELOG.md +20 -0
  8. data/RAGEL_MIGRATION.md +2 -2
  9. data/README.md +37 -4
  10. data/Rakefile +72 -32
  11. data/cataract.gemspec +4 -1
  12. data/ext/cataract/cataract.c +59 -50
  13. data/ext/cataract/cataract.h +5 -3
  14. data/ext/cataract/css_parser.c +173 -65
  15. data/ext/cataract/extconf.rb +2 -2
  16. data/ext/cataract/{merge.c → flatten.c} +526 -468
  17. data/ext/cataract/shorthand_expander.c +164 -115
  18. data/lib/cataract/at_rule.rb +8 -9
  19. data/lib/cataract/declaration.rb +18 -0
  20. data/lib/cataract/import_resolver.rb +63 -43
  21. data/lib/cataract/import_statement.rb +49 -0
  22. data/lib/cataract/pure/byte_constants.rb +69 -0
  23. data/lib/cataract/pure/flatten.rb +1145 -0
  24. data/lib/cataract/pure/helpers.rb +35 -0
  25. data/lib/cataract/pure/imports.rb +268 -0
  26. data/lib/cataract/pure/parser.rb +1340 -0
  27. data/lib/cataract/pure/serializer.rb +590 -0
  28. data/lib/cataract/pure/specificity.rb +206 -0
  29. data/lib/cataract/pure.rb +153 -0
  30. data/lib/cataract/rule.rb +69 -15
  31. data/lib/cataract/stylesheet.rb +356 -49
  32. data/lib/cataract/version.rb +1 -1
  33. data/lib/cataract.rb +43 -26
  34. metadata +14 -26
  35. data/benchmarks/benchmark_harness.rb +0 -193
  36. data/benchmarks/benchmark_merging.rb +0 -121
  37. data/benchmarks/benchmark_optimization_comparison.rb +0 -168
  38. data/benchmarks/benchmark_parsing.rb +0 -153
  39. data/benchmarks/benchmark_ragel_removal.rb +0 -56
  40. data/benchmarks/benchmark_runner.rb +0 -70
  41. data/benchmarks/benchmark_serialization.rb +0 -180
  42. data/benchmarks/benchmark_shorthand.rb +0 -109
  43. data/benchmarks/benchmark_shorthand_expansion.rb +0 -176
  44. data/benchmarks/benchmark_specificity.rb +0 -124
  45. data/benchmarks/benchmark_string_allocation.rb +0 -151
  46. data/benchmarks/benchmark_stylesheet_to_s.rb +0 -62
  47. data/benchmarks/benchmark_to_s_cached.rb +0 -55
  48. data/benchmarks/benchmark_value_splitter.rb +0 -54
  49. data/benchmarks/benchmark_yjit.rb +0 -158
  50. data/benchmarks/benchmark_yjit_workers.rb +0 -61
  51. data/benchmarks/profile_to_s.rb +0 -23
  52. data/benchmarks/speedup_calculator.rb +0 -83
  53. data/benchmarks/system_metadata.rb +0 -81
  54. data/benchmarks/templates/benchmarks.md.erb +0 -221
  55. data/benchmarks/yjit_tests.rb +0 -141
  56. data/scripts/fuzzer/run.rb +0 -828
  57. data/scripts/fuzzer/worker.rb +0 -99
  58. data/scripts/generate_benchmarks_md.rb +0 -155
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Pure Ruby CSS parser - Specificity calculation
4
+ # NO REGEXP ALLOWED - char-by-char parsing only
5
+
6
+ module Cataract
7
+ # Calculate CSS specificity for a selector
8
+ #
9
+ # @param selector [String] CSS selector
10
+ # @return [Integer] Specificity value
11
+ #
12
+ # Specificity calculation (per CSS spec):
13
+ # - Count IDs (#id) - each worth 100
14
+ # - Count classes/attributes/pseudo-classes (.class, [attr], :pseudo) - each worth 10
15
+ # - Count elements/pseudo-elements (div, ::before) - each worth 1
16
+ def self.calculate_specificity(selector)
17
+ return 0 if selector.nil? || selector.empty?
18
+
19
+ # Counters for specificity components
20
+ id_count = 0
21
+ class_count = 0
22
+ attr_count = 0
23
+ pseudo_class_count = 0
24
+ pseudo_element_count = 0
25
+ element_count = 0
26
+
27
+ i = 0
28
+ len = selector.length
29
+
30
+ pseudo_element_kwords = %w[before after first-line first-letter selection]
31
+
32
+ while i < len
33
+ byte = selector.getbyte(i)
34
+
35
+ # Skip whitespace and combinators
36
+ if byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR ||
37
+ byte == BYTE_GT || byte == BYTE_PLUS || byte == BYTE_TILDE || byte == BYTE_COMMA
38
+ i += 1
39
+ next
40
+ end
41
+
42
+ # ID selector: #id
43
+ if byte == BYTE_HASH
44
+ id_count += 1
45
+ i += 1
46
+ # Skip the identifier
47
+ while i < len && ident_char?(selector.getbyte(i))
48
+ i += 1
49
+ end
50
+ next
51
+ end
52
+
53
+ # Class selector: .class
54
+ if byte == BYTE_DOT
55
+ class_count += 1
56
+ i += 1
57
+ # Skip the identifier
58
+ while i < len && ident_char?(selector.getbyte(i))
59
+ i += 1
60
+ end
61
+ next
62
+ end
63
+
64
+ # Attribute selector: [attr]
65
+ if byte == BYTE_LBRACKET
66
+ attr_count += 1
67
+ i += 1
68
+ # Skip to closing bracket
69
+ bracket_depth = 1
70
+ while i < len && bracket_depth > 0
71
+ b = selector.getbyte(i)
72
+ if b == BYTE_LBRACKET
73
+ bracket_depth += 1
74
+ elsif b == BYTE_RBRACKET
75
+ bracket_depth -= 1
76
+ end
77
+ i += 1
78
+ end
79
+ next
80
+ end
81
+
82
+ # Pseudo-element (::) or pseudo-class (:)
83
+ if byte == BYTE_COLON
84
+ i += 1
85
+ is_pseudo_element = false
86
+
87
+ # Check for double colon (::)
88
+ if i < len && selector.getbyte(i) == BYTE_COLON
89
+ is_pseudo_element = true
90
+ i += 1
91
+ end
92
+
93
+ # Extract pseudo name
94
+ pseudo_start = i
95
+ while i < len && ident_char?(selector.getbyte(i))
96
+ i += 1
97
+ end
98
+ pseudo_name = selector[pseudo_start...i]
99
+
100
+ # Check for legacy pseudo-elements (single colon but should be double)
101
+ is_legacy_pseudo_element = false
102
+ if !is_pseudo_element && !pseudo_name.empty?
103
+ is_legacy_pseudo_element = pseudo_element_kwords.include?(pseudo_name)
104
+ end
105
+
106
+ # Check for :not() - it doesn't count itself, but its content does
107
+ is_not = (pseudo_name == 'not')
108
+
109
+ # Skip function arguments if present
110
+ if i < len && selector.getbyte(i) == BYTE_LPAREN
111
+ i += 1
112
+ paren_depth = 1
113
+
114
+ # If it's :not(), calculate specificity of the content
115
+ if is_not
116
+ not_content_start = i
117
+
118
+ # Find closing paren
119
+ while i < len && paren_depth > 0
120
+ b = selector.getbyte(i)
121
+ if b == BYTE_LPAREN
122
+ paren_depth += 1
123
+ elsif b == BYTE_RPAREN
124
+ paren_depth -= 1
125
+ end
126
+ i += 1 if paren_depth > 0
127
+ end
128
+
129
+ not_content = selector[not_content_start...i]
130
+
131
+ # Recursively calculate specificity of :not() content
132
+ unless not_content.empty?
133
+ not_specificity = calculate_specificity(not_content)
134
+
135
+ # Add :not() content's specificity to our counts
136
+ additional_a = not_specificity / 100
137
+ additional_b = (not_specificity % 100) / 10
138
+ additional_c = not_specificity % 10
139
+
140
+ id_count += additional_a
141
+ class_count += additional_b
142
+ element_count += additional_c
143
+ end
144
+
145
+ i += 1 # Skip closing paren
146
+ else
147
+ # Skip other function arguments
148
+ while i < len && paren_depth > 0
149
+ b = selector.getbyte(i)
150
+ if b == BYTE_LPAREN
151
+ paren_depth += 1
152
+ elsif b == BYTE_RPAREN
153
+ paren_depth -= 1
154
+ end
155
+ i += 1
156
+ end
157
+
158
+ # Count the pseudo-class/element
159
+ if is_pseudo_element || is_legacy_pseudo_element
160
+ pseudo_element_count += 1
161
+ else
162
+ pseudo_class_count += 1
163
+ end
164
+ end
165
+ else
166
+ # No function arguments - count the pseudo-class/element
167
+ if is_not
168
+ # :not without parens is invalid, but don't count it
169
+ elsif is_pseudo_element || is_legacy_pseudo_element
170
+ pseudo_element_count += 1
171
+ else
172
+ pseudo_class_count += 1
173
+ end
174
+ end
175
+ next
176
+ end
177
+
178
+ # Universal selector: *
179
+ if byte == BYTE_ASTERISK
180
+ # Universal selector has specificity 0, don't count
181
+ i += 1
182
+ next
183
+ end
184
+
185
+ # Type selector (element name): div, span, etc.
186
+ if letter?(byte)
187
+ element_count += 1
188
+ # Skip the identifier
189
+ while i < len && ident_char?(selector.getbyte(i))
190
+ i += 1
191
+ end
192
+ next
193
+ end
194
+
195
+ # Unknown character, skip it
196
+ i += 1
197
+ end
198
+
199
+ # Calculate specificity using W3C formula
200
+ specificity = (id_count * 100) +
201
+ ((class_count + attr_count + pseudo_class_count) * 10) +
202
+ ((element_count + pseudo_element_count) * 1)
203
+
204
+ specificity
205
+ end
206
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Pure Ruby implementation of Cataract CSS parser
4
+ #
5
+ # This is a character-by-character parser that closely mirrors the C implementation.
6
+ # ==================================================================
7
+ # NO REGEXP ALLOWED - consume chars one at a time like the C version.
8
+ # ==================================================================
9
+ #
10
+ # Load this instead of the C extension with:
11
+ # require 'cataract/pure'
12
+ #
13
+ # Or run tests with:
14
+ # CATARACT_PURE=1 rake test
15
+
16
+ # Check if C extension is already loaded
17
+ if defined?(Cataract::NATIVE_EXTENSION_LOADED)
18
+ raise LoadError, 'Cataract C extension is already loaded. Cannot load pure Ruby version.'
19
+ end
20
+
21
+ # Define base module and error classes first
22
+ module Cataract
23
+ class Error < StandardError; end
24
+ class DepthError < Error; end
25
+ class SizeError < Error; end
26
+ end
27
+
28
+ require_relative 'version'
29
+
30
+ # Load struct definitions and supporting files
31
+ # (These are also loaded by lib/cataract.rb, but we need them here for direct require)
32
+ require_relative 'declaration'
33
+ require_relative 'rule'
34
+ require_relative 'at_rule'
35
+ require_relative 'import_statement'
36
+ require_relative 'stylesheet_scope'
37
+ require_relative 'stylesheet'
38
+ require_relative 'declarations'
39
+ require_relative 'import_resolver'
40
+
41
+ # Add to_s method to Declarations class for pure Ruby mode
42
+ module Cataract
43
+ class Declarations
44
+ # Serialize declarations to CSS string
45
+ def to_s
46
+ result = String.new
47
+ @values.each_with_index do |decl, i|
48
+ result << decl.property
49
+ result << ': '
50
+ result << decl.value
51
+ result << ' !important' if decl.important
52
+ result << ';'
53
+ result << ' ' if i < @values.length - 1 # Add space after semicolon except for last
54
+ end
55
+ result
56
+ end
57
+ end
58
+ end
59
+
60
+ # Load pure Ruby implementation modules
61
+ require_relative 'pure/byte_constants'
62
+ require_relative 'pure/helpers'
63
+ require_relative 'pure/specificity'
64
+ require_relative 'pure/imports'
65
+ require_relative 'pure/serializer'
66
+ require_relative 'pure/parser'
67
+ require_relative 'pure/flatten'
68
+
69
+ module Cataract
70
+ # Flag to indicate pure Ruby version is loaded
71
+ PURE_RUBY_LOADED = true
72
+
73
+ # Implementation type constant
74
+ IMPLEMENTATION = :ruby
75
+
76
+ # Compile flags (mimic C version)
77
+ COMPILE_FLAGS = {
78
+ debug: false,
79
+ str_buf_optimization: false,
80
+ pure_ruby: true
81
+ }.freeze
82
+
83
+ # Parse CSS string and return hash with rules, media_index, charset, etc.
84
+ #
85
+ # @api private
86
+ # @param css_string [String] CSS to parse
87
+ # @return [Hash] {
88
+ # rules: Array<Rule>, # Flat array of Rule/AtRule structs
89
+ # _media_index: Hash, # Symbol => Array of rule IDs
90
+ # charset: String|nil, # @charset value if present
91
+ # _has_nesting: Boolean # Whether any nested rules exist
92
+ # }
93
+ def self._parse_css(css_string)
94
+ parser = Parser.new(css_string)
95
+ parser.parse
96
+ end
97
+
98
+ # NOTE: Copied from cataract.rb
99
+ # Need to untangle this eventually
100
+ def self.parse_css(css, imports: false)
101
+ css = ImportResolver.resolve(css, imports) if imports
102
+
103
+ Stylesheet.parse(css)
104
+ end
105
+
106
+ # Flatten stylesheet rules according to CSS cascade rules
107
+ #
108
+ # @param stylesheet [Stylesheet] Stylesheet to flatten
109
+ # @return [Stylesheet] New stylesheet with flattened rules
110
+ def self.flatten(stylesheet)
111
+ Flatten.flatten(stylesheet, mutate: false)
112
+ end
113
+
114
+ # Flatten stylesheet rules in-place (mutates receiver)
115
+ #
116
+ # @param stylesheet [Stylesheet] Stylesheet to flatten
117
+ # @return [Stylesheet] Same stylesheet (mutated)
118
+ def self.flatten!(stylesheet)
119
+ Flatten.flatten(stylesheet, mutate: true)
120
+ end
121
+
122
+ # Deprecated: Use flatten instead
123
+ def self.merge(stylesheet)
124
+ warn 'Cataract.merge is deprecated, use Cataract.flatten instead', uplevel: 1
125
+ flatten(stylesheet)
126
+ end
127
+
128
+ # Deprecated: Use flatten! instead
129
+ def self.merge!(stylesheet)
130
+ warn 'Cataract.merge! is deprecated, use Cataract.flatten! instead', uplevel: 1
131
+ flatten!(stylesheet)
132
+ end
133
+
134
+ # Expand a single shorthand declaration into longhand declarations.
135
+ # Underscore prefix indicates semi-private API - use with caution.
136
+ #
137
+ # @param decl [Declaration] Declaration to expand
138
+ # @return [Array<Declaration>] Array of expanded longhand declarations
139
+ # @api private
140
+ def self._expand_shorthand(decl)
141
+ Flatten._expand_shorthand(decl)
142
+ end
143
+
144
+ # Add stub method to Stylesheet for pure Ruby implementation
145
+ class Stylesheet
146
+ # Color conversion is only available in the native C extension
147
+ #
148
+ # @raise [NotImplementedError] Always raises - color conversion requires C extension
149
+ def convert_colors!(*_args)
150
+ raise NotImplementedError, 'convert_colors! is only available in the native C extension'
151
+ end
152
+ end
153
+ end
data/lib/cataract/rule.rb CHANGED
@@ -1,16 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Cataract
4
- # Represents a CSS rule with a selector and declarations.
5
- #
6
- # Rule is a C struct defined as: `Struct.new(:id, :selector, :declarations, :specificity)`
7
- #
8
- # Rules are created by the parser and stored in Stylesheet objects. Each rule
9
- # contains:
4
+ # Rules are created by the parser and stored in Stylesheet objects. Each rule contains:
10
5
  # - An ID (position in the stylesheet)
11
6
  # - A CSS selector string
12
7
  # - An array of Declaration structs
13
8
  # - A specificity value (calculated lazily)
9
+ # - Parent rule ID for nested rules (nil if top-level)
10
+ # - Nesting style (0=implicit, 1=explicit, nil=not nested)
14
11
  #
15
12
  # Media query information is stored separately in Stylesheet's media_index.
16
13
  #
@@ -25,6 +22,17 @@ module Cataract
25
22
  # @attr [String] selector The CSS selector (e.g., "body", ".class", "#id")
26
23
  # @attr [Array<Declaration>] declarations Array of CSS property declarations
27
24
  # @attr [Integer, nil] specificity CSS specificity value (calculated lazily)
25
+ # @attr [Integer, nil] parent_rule_id Parent rule ID for nested rules
26
+ # @attr [Integer, nil] nesting_style 0=implicit, 1=explicit, nil=not nested
27
+ Rule = Struct.new(
28
+ :id,
29
+ :selector,
30
+ :declarations,
31
+ :specificity,
32
+ :parent_rule_id,
33
+ :nesting_style
34
+ )
35
+
28
36
  class Rule
29
37
  # Silence warning about method redefinition. We redefine below to lazily calculate
30
38
  # specificity
@@ -112,20 +120,66 @@ module Cataract
112
120
  end
113
121
  end
114
122
 
115
- # Compare rules by their attributes rather than object identity.
123
+ # Compare rules for logical equality based on CSS semantics.
124
+ #
125
+ # Two rules are equal if they have the same selector and declarations.
126
+ # Shorthand properties are expanded before comparison, so
127
+ # `margin: 10px` equals `margin-top: 10px; margin-right: 10px; ...`
128
+ #
129
+ # Internal implementation details (id, specificity) are not considered
130
+ # since they don't affect the CSS semantics.
116
131
  #
117
- # Two rules are equal if they have the same id, selector, declarations, and specificity.
132
+ # Can also compare against a CSS string, which is parsed and compared.
118
133
  #
119
- # @param other [Object] Object to compare with
120
- # @return [Boolean] true if rules have same attributes
134
+ # @param other [Object] Object to compare with (Rule or String)
135
+ # @return [Boolean] true if rules have same selector and declarations
121
136
  def ==(other)
122
- return false unless other.is_a?(Rule)
137
+ case other
138
+ when Rule
139
+ return false unless selector == other.selector
123
140
 
124
- id == other.id &&
125
- selector == other.selector &&
126
- declarations == other.declarations &&
127
- specificity == other.specificity
141
+ expanded_declarations == other.expanded_declarations
142
+ when String
143
+ # Parse CSS string and compare to first rule
144
+ parsed = Cataract.parse_css(other)
145
+ return false unless parsed.rules.size == 1
146
+
147
+ self == parsed.rules.first
148
+ else
149
+ false
150
+ end
128
151
  end
129
152
  alias eql? ==
153
+
154
+ # Generate hash code for this rule.
155
+ #
156
+ # Hash is based on selector and expanded declarations to match the
157
+ # equality semantics. This allows rules to be used as Hash keys or
158
+ # in Sets correctly.
159
+ #
160
+ # @return [Integer] hash code
161
+ # rubocop:disable Naming/MemoizedInstanceVariableName
162
+ def hash
163
+ @_hash ||= [self.class, selector, expanded_declarations].hash
164
+ end
165
+ # rubocop:enable Naming/MemoizedInstanceVariableName
166
+
167
+ protected
168
+
169
+ # Get expanded and normalized declarations for this rule.
170
+ #
171
+ # Shorthands are expanded into their longhand equivalents and sorted
172
+ # to enable semantic comparison. Result is cached.
173
+ #
174
+ # @return [Array<Declaration>] expanded declarations
175
+ # rubocop:disable Naming/MemoizedInstanceVariableName
176
+ def expanded_declarations
177
+ @_expanded_declarations ||= begin
178
+ expanded = declarations.flat_map { |decl| Cataract._expand_shorthand(decl) }
179
+ expanded.sort_by! { |d| [d.property, d.value, d.important ? 1 : 0] }
180
+ expanded
181
+ end
182
+ end
183
+ # rubocop:enable Naming/MemoizedInstanceVariableName
130
184
  end
131
185
  end