RubyGems - cataract - Versions diffs - 0.1.3 → 0.2.0 - Mend

cataract 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +4 -4
data/.github/workflows/ci-manual-rubies.yml +44 -0
data/.overcommit.yml +1 -1
data/.rubocop.yml +96 -4
data/.rubocop_todo.yml +186 -0
data/BENCHMARKS.md +62 -141
data/CHANGELOG.md +20 -0
data/RAGEL_MIGRATION.md +2 -2
data/README.md +37 -4
data/Rakefile +72 -32
data/cataract.gemspec +4 -1
data/ext/cataract/cataract.c +59 -50
data/ext/cataract/cataract.h +5 -3
data/ext/cataract/css_parser.c +173 -65
data/ext/cataract/extconf.rb +2 -2
data/ext/cataract/{merge.c → flatten.c} +526 -468
data/ext/cataract/shorthand_expander.c +164 -115
data/lib/cataract/at_rule.rb +8 -9
data/lib/cataract/declaration.rb +18 -0
data/lib/cataract/import_resolver.rb +63 -43
data/lib/cataract/import_statement.rb +49 -0
data/lib/cataract/pure/byte_constants.rb +69 -0
data/lib/cataract/pure/flatten.rb +1145 -0
data/lib/cataract/pure/helpers.rb +35 -0
data/lib/cataract/pure/imports.rb +268 -0
data/lib/cataract/pure/parser.rb +1340 -0
data/lib/cataract/pure/serializer.rb +590 -0
data/lib/cataract/pure/specificity.rb +206 -0
data/lib/cataract/pure.rb +153 -0
data/lib/cataract/rule.rb +69 -15
data/lib/cataract/stylesheet.rb +356 -49
data/lib/cataract/version.rb +1 -1
data/lib/cataract.rb +43 -26
metadata +14 -26
data/benchmarks/benchmark_harness.rb +0 -193
data/benchmarks/benchmark_merging.rb +0 -121
data/benchmarks/benchmark_optimization_comparison.rb +0 -168
data/benchmarks/benchmark_parsing.rb +0 -153
data/benchmarks/benchmark_ragel_removal.rb +0 -56
data/benchmarks/benchmark_runner.rb +0 -70
data/benchmarks/benchmark_serialization.rb +0 -180
data/benchmarks/benchmark_shorthand.rb +0 -109
data/benchmarks/benchmark_shorthand_expansion.rb +0 -176
data/benchmarks/benchmark_specificity.rb +0 -124
data/benchmarks/benchmark_string_allocation.rb +0 -151
data/benchmarks/benchmark_stylesheet_to_s.rb +0 -62
data/benchmarks/benchmark_to_s_cached.rb +0 -55
data/benchmarks/benchmark_value_splitter.rb +0 -54
data/benchmarks/benchmark_yjit.rb +0 -158
data/benchmarks/benchmark_yjit_workers.rb +0 -61
data/benchmarks/profile_to_s.rb +0 -23
data/benchmarks/speedup_calculator.rb +0 -83
data/benchmarks/system_metadata.rb +0 -81
data/benchmarks/templates/benchmarks.md.erb +0 -221
data/benchmarks/yjit_tests.rb +0 -141
data/scripts/fuzzer/run.rb +0 -828
data/scripts/fuzzer/worker.rb +0 -99
data/scripts/generate_benchmarks_md.rb +0 -155

data/lib/cataract/pure/helpers.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+# Pure Ruby CSS parser - Helper methods
+# NO REGEXP ALLOWED - char-by-char parsing only
+module Cataract
+  # Check if a byte is whitespace (space, tab, newline, CR)
+  # @param byte [Integer] Byte value from String#getbyte
+  # @return [Boolean] true if whitespace
+  def self.is_whitespace?(byte)
+    byte == BYTE_SPACE || byte == BYTE_TAB || byte == BYTE_NEWLINE || byte == BYTE_CR
+  end
+  # Check if byte is a letter (a-z, A-Z)
+  # @param byte [Integer] Byte value from String#getbyte
+  # @return [Boolean] true if letter
+  def self.letter?(byte)
+    (byte >= BYTE_LOWER_A && byte <= BYTE_LOWER_Z) ||
+    (byte >= BYTE_UPPER_A && byte <= BYTE_UPPER_Z)
+  end
+  # Check if byte is a digit (0-9)
+  # @param byte [Integer] Byte value from String#getbyte
+  # @return [Boolean] true if digit
+  def self.digit?(byte)
+    byte >= BYTE_DIGIT_0 && byte <= BYTE_DIGIT_9
+  end
+  # Check if byte is alphanumeric, hyphen, or underscore (CSS identifier char)
+  # @param byte [Integer] Byte value from String#getbyte
+  # @return [Boolean] true if valid identifier character
+  def self.ident_char?(byte)
+    letter?(byte) || digit?(byte) || byte == BYTE_HYPHEN || byte == BYTE_UNDERSCORE
+  end
+end

data/lib/cataract/pure/imports.rb ADDED Viewed

@@ -0,0 +1,268 @@
+# frozen_string_literal: true
+# Pure Ruby CSS parser - Import extraction
+# NO REGEXP ALLOWED - char-by-char parsing only
+module Cataract
+  # Helper: Case-insensitive ASCII byte comparison
+  # Compares bytes at given position with ASCII pattern (case-insensitive)
+  # Safe to use even if position is in middle of multi-byte UTF-8 characters
+  # Returns true if match, false otherwise
+  def self.match_ascii_ci?(str, pos, pattern)
+    pattern_len = pattern.bytesize
+    return false if pos + pattern_len > str.bytesize
+    i = 0
+    while i < pattern_len
+      str_byte = str.getbyte(pos + i)
+      pat_byte = pattern.getbyte(i)
+      # Convert both to lowercase for comparison (ASCII only: A-Z -> a-z)
+      str_byte += BYTE_CASE_DIFF if str_byte >= BYTE_UPPER_A && str_byte <= BYTE_UPPER_Z
+      pat_byte += BYTE_CASE_DIFF if pat_byte >= BYTE_UPPER_A && pat_byte <= BYTE_UPPER_Z
+      return false if str_byte != pat_byte
+      i += 1
+    end
+    true
+  end
+  # Extract @import statements from CSS
+  #
+  # @param css_string [String] CSS to scan for @imports
+  # @return [Array<Hash>] Array of import hashes with :url, :media, :full_match
+  def self.extract_imports(css_string)
+    imports = []
+    i = 0
+    len = css_string.length
+    while i < len
+      # Skip whitespace and comments
+      while i < len
+        byte = css_string.getbyte(i)
+        if is_whitespace?(byte)
+          i += 1
+        elsif i + 1 < len && css_string.getbyte(i) == BYTE_SLASH && css_string.getbyte(i + 1) == BYTE_STAR
+          # Skip /* */ comment
+          i += 2
+          while i + 1 < len && !(css_string.getbyte(i) == BYTE_STAR && css_string.getbyte(i + 1) == BYTE_SLASH)
+            i += 1
+          end
+          i += 2 if i + 1 < len # Skip */
+        else
+          break
+        end
+      end
+      break if i >= len
+      # Check for @import (case-insensitive byte comparison)
+      if match_ascii_ci?(css_string, i, '@import')
+        import_start = i
+        i += 7
+        # Skip whitespace after @import
+        while i < len && is_whitespace?(css_string.getbyte(i))
+          i += 1
+        end
+        # Check for optional url( (case-insensitive byte comparison)
+        has_url_function = false
+        if match_ascii_ci?(css_string, i, 'url(')
+          has_url_function = true
+          i += 4
+          while i < len && is_whitespace?(css_string.getbyte(i))
+            i += 1
+          end
+        end
+        # Find opening quote
+        byte = css_string.getbyte(i) if i < len
+        if i >= len || (byte != BYTE_DQUOTE && byte != BYTE_SQUOTE)
+          # Invalid @import, skip to next semicolon
+          while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
+            i += 1
+          end
+          i += 1 if i < len # Skip semicolon
+          next
+        end
+        quote_char = byte
+        i += 1 # Skip opening quote
+        url_start = i
+        # Find closing quote (handle escaped quotes)
+        while i < len && css_string.getbyte(i) != quote_char
+          if css_string.getbyte(i) == BYTE_BACKSLASH && i + 1 < len
+            i += 2 # Skip escaped character
+          else
+            i += 1
+          end
+        end
+        break if i >= len # Unterminated string
+        url_end = i
+        i += 1 # Skip closing quote
+        # Skip closing paren if we had url(
+        if has_url_function
+          while i < len && is_whitespace?(css_string.getbyte(i))
+            i += 1
+          end
+          if i < len && css_string.getbyte(i) == BYTE_RPAREN
+            i += 1
+          end
+        end
+        # Skip whitespace before optional media query or semicolon
+        while i < len && is_whitespace?(css_string.getbyte(i))
+          i += 1
+        end
+        # Check for optional media query (everything until semicolon)
+        media_start = nil
+        media_end = nil
+        if i < len && css_string.getbyte(i) != BYTE_SEMICOLON
+          media_start = i
+          # Find semicolon
+          while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
+            i += 1
+          end
+          media_end = i
+          # Trim trailing whitespace from media query
+          while media_end > media_start && is_whitespace?(css_string.getbyte(media_end - 1))
+            media_end -= 1
+          end
+        end
+        # Skip semicolon
+        i += 1 if i < len && css_string.getbyte(i) == BYTE_SEMICOLON
+        import_end = i
+        # Build result hash
+        url = css_string[url_start...url_end]
+        media = media_start && media_end > media_start ? css_string[media_start...media_end] : nil
+        full_match = css_string[import_start...import_end]
+        imports << { url: url, media: media, full_match: full_match }
+      elsif match_ascii_ci?(css_string, i, '@charset')
+        # Skip @charset if present - it can come before @import
+        while i < len && css_string.getbyte(i) != BYTE_SEMICOLON
+          i += 1
+        end
+        i += 1 if i < len # Skip semicolon
+      else
+        # If we hit any other content (rules, other at-rules), stop scanning
+        # Per CSS spec, @import must be at the top (only @charset can come before)
+        byte = css_string.getbyte(i) if i < len
+        if i < len && !is_whitespace?(byte)
+          break
+        end
+        i += 1
+      end
+    end
+    imports
+  end
+  # Parse media query symbol into array of media types
+  #
+  # @param media_query_sym [Symbol] Media query as symbol (e.g., :screen, :"print, screen")
+  # @return [Array<Symbol>] Array of individual media types
+  #
+  # @example
+  #   parse_media_types(:screen) #=> [:screen]
+  #   parse_media_types(:"print, screen") #=> [:print, :screen]
+  def self.parse_media_types(media_query_sym)
+    query = media_query_sym.to_s
+    types = []
+    i = 0
+    len = query.length
+    kwords = %w[and or not only]
+    while i < len
+      # Skip whitespace
+      while i < len && is_whitespace?(query.getbyte(i))
+        i += 1
+      end
+      break if i >= len
+      # Check for opening paren - skip conditions like "(min-width: 768px)"
+      if query.getbyte(i) == BYTE_LPAREN
+        # Skip to matching closing paren
+        paren_depth = 1
+        i += 1
+        while i < len && paren_depth > 0
+          byte = query.getbyte(i)
+          if byte == BYTE_LPAREN
+            paren_depth += 1
+          elsif byte == BYTE_RPAREN
+            paren_depth -= 1
+          end
+          i += 1
+        end
+        next
+      end
+      # Find end of word (media type or keyword)
+      word_start = i
+      byte = query.getbyte(i)
+      while i < len && !is_whitespace?(byte) && byte != BYTE_COMMA && byte != BYTE_LPAREN && byte != BYTE_COLON
+        i += 1
+        byte = query.getbyte(i) if i < len
+      end
+      if i > word_start
+        word = query[word_start...i]
+        # Check if this is a media feature (followed by ':')
+        is_media_feature = (i < len && query.getbyte(i) == BYTE_COLON)
+        # Check if it's a keyword (and, or, not, only)
+        is_keyword = kwords.include?(word)
+        if !is_keyword && !is_media_feature
+          # This is a media type - add it as symbol
+          types << word.to_sym
+        end
+      end
+      # Skip to comma or end
+      while i < len && query.getbyte(i) != BYTE_COMMA
+        if query.getbyte(i) == BYTE_LPAREN
+          # Skip condition
+          paren_depth = 1
+          i += 1
+          while i < len && paren_depth > 0
+            byte = query.getbyte(i)
+            if byte == BYTE_LPAREN
+              paren_depth += 1
+            elsif byte == BYTE_RPAREN
+              paren_depth -= 1
+            end
+            i += 1
+          end
+        else
+          i += 1
+        end
+      end
+      i += 1 if i < len && query.getbyte(i) == BYTE_COMMA # Skip comma
+    end
+    types
+  end
+end