RubyGems - p_css - Versions diffs - 0.1.0 - Mend

p_css 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +7 -0
data/LICENSE.txt +21 -0
data/README.md +302 -0
data/lib/css/cascade.rb +168 -0
data/lib/css/code_points.rb +36 -0
data/lib/css/escape.rb +82 -0
data/lib/css/media_queries/context.rb +60 -0
data/lib/css/media_queries/evaluator.rb +157 -0
data/lib/css/media_queries/nodes.rb +41 -0
data/lib/css/media_queries/parser.rb +374 -0
data/lib/css/media_queries.rb +9 -0
data/lib/css/nesting.rb +229 -0
data/lib/css/nodes.rb +42 -0
data/lib/css/parser.rb +430 -0
data/lib/css/selectors/anb_parser.rb +174 -0
data/lib/css/selectors/matcher.rb +449 -0
data/lib/css/selectors/nodes.rb +61 -0
data/lib/css/selectors/parser.rb +395 -0
data/lib/css/selectors/serializer.rb +102 -0
data/lib/css/selectors/specificity.rb +81 -0
data/lib/css/selectors.rb +11 -0
data/lib/css/serializer.rb +167 -0
data/lib/css/token.rb +78 -0
data/lib/css/token_cursor.rb +49 -0
data/lib/css/tokenizer.rb +441 -0
data/lib/css/urange.rb +45 -0
data/lib/css/version.rb +3 -0
data/lib/css.rb +73 -0
data/lib/p_css.rb +1 -0
metadata +73 -0

data/lib/css/tokenizer.rb ADDED Viewed

@@ -0,0 +1,441 @@
+module CSS
+  # Tokenizer based on CSS Syntax Module Level 3/4 §4.
+  # https://www.w3.org/TR/css-syntax-3/#tokenization
+  class Tokenizer
+    include CodePoints
+    PUNCTUATION = {
+      '(' => :lparen,
+      ')' => :rparen,
+      ',' => :comma,
+      ':' => :colon,
+      ';' => :semicolon,
+      '[' => :lbracket,
+      ']' => :rbracket,
+      '{' => :lbrace,
+      '}' => :rbrace
+    }.freeze
+    # CR / FF (and CR LF) collapse to LF; NUL collapses to U+FFFD. Done in
+    # one pass.
+    PREPROCESS_RE = /\r\n?|\f|\0/.freeze
+    def initialize(input, preserve_comments: false)
+      @input             = preprocess(input)
+      @pos               = 0
+      @newlines          = collect_newline_offsets(@input)
+      @preserve_comments = preserve_comments
+    end
+    def tokenize
+      tokens = []
+      loop do
+        token = next_token
+        break if token.type == :eof
+        tokens << token
+      end
+      tokens
+    end
+    def next_token
+      consume_comments unless @preserve_comments
+      return Token.new(:eof) if @pos >= @input.length
+      start_offset = @pos
+      tok          = consume_one_token
+      line, column = line_column_at(start_offset)
+      tok.assign_position!(Position.new(line:, column:, offset: start_offset, end_offset: @pos))
+    end
+    private
+    def consume_one_token
+      return consume_comment_token if peek == '/' && peek(1) == '*'
+      c = consume
+      return consume_whitespace      if whitespace?(c)
+      return consume_string_token(c) if c == '"' || c == "'"
+      if (c == '+' || c == '-' || c == '.') && number_starts?(c, peek, peek(1))
+        reconsume
+        return consume_numeric_token
+      end
+      if (type = PUNCTUATION[c])
+        return Token.new(type)
+      end
+      case c
+      when '#'
+        if ident_code_point?(peek) || valid_escape?(peek, peek(1))
+          flag = ident_sequence_starts?(peek, peek(1), peek(2)) ? :id : :unrestricted
+          Token.new(:hash, consume_ident_sequence, flag:)
+        else
+          Token.new(:delim, c)
+        end
+      when '+', '.'
+        Token.new(:delim, c)
+      when '-'
+        if peek == '-' && peek(1) == '>'
+          consume
+          consume
+          Token.new(:cdc)
+        elsif ident_sequence_starts?(c, peek, peek(1))
+          reconsume
+          consume_ident_like_token
+        else
+          Token.new(:delim, c)
+        end
+      when '<'
+        if peek == '!' && peek(1) == '-' && peek(2) == '-'
+          consume
+          consume
+          consume
+          Token.new(:cdo)
+        else
+          Token.new(:delim, c)
+        end
+      when '@'
+        if ident_sequence_starts?(peek, peek(1), peek(2))
+          Token.new(:at_keyword, consume_ident_sequence)
+        else
+          Token.new(:delim, c)
+        end
+      when '\\'
+        if valid_escape?(c, peek)
+          reconsume
+          consume_ident_like_token
+        else
+          Token.new(:delim, c)
+        end
+      when '0'..'9'
+        reconsume
+        consume_numeric_token
+      else
+        if ident_start_code_point?(c)
+          reconsume
+          consume_ident_like_token
+        else
+          Token.new(:delim, c)
+        end
+      end
+    end
+    def preprocess(input)
+      input.encode('UTF-8').gsub(PREPROCESS_RE) {
+        $~[0] == "\0" ? CodePoints::REPLACEMENT : "\n"
+      }
+    end
+    def peek(offset = 0)
+      @input[@pos + offset]
+    end
+    def consume
+      c = @input[@pos]
+      return nil if c.nil?
+      @pos += 1
+      c
+    end
+    def reconsume
+      @pos -= 1
+    end
+    def collect_newline_offsets(input)
+      offsets = []
+      i       = -1
+      offsets << i while (i = input.index("\n", i + 1))
+      offsets
+    end
+    # Newline characters themselves are reported as belonging to the line
+    # they terminate (col = offset + 1 on line 1, etc).
+    def line_column_at(offset)
+      idx     = @newlines.bsearch_index { it >= offset } || @newlines.size
+      prev_nl = idx.zero? ? -1 : @newlines[idx - 1]
+      [idx + 1, offset - prev_nl]
+    end
+    def whitespace?(c)
+      c == ' ' || c == "\n" || c == "\t"
+    end
+    def non_printable?(c)
+      return false if c.nil?
+      o = c.ord
+      o <= 0x08 || o == 0x0B || (0x0E..0x1F).cover?(o) || o == 0x7F
+    end
+    # §4.3.8.
+    def valid_escape?(c1, c2)
+      c1 == '\\' && c2 != "\n" && !c2.nil?
+    end
+    # §4.3.9.
+    def ident_sequence_starts?(c1, c2, c3)
+      case c1
+      when '-'
+        ident_start_code_point?(c2) || c2 == '-' || valid_escape?(c2, c3)
+      when '\\'
+        valid_escape?(c1, c2)
+      else
+        ident_start_code_point?(c1)
+      end
+    end
+    # §4.3.10.
+    def number_starts?(c1, c2, c3)
+      case c1
+      when '+', '-'
+        digit?(c2) || (c2 == '.' && digit?(c3))
+      when '.'
+        digit?(c2)
+      else
+        digit?(c1)
+      end
+    end
+    # §4.3.2. Skips through `/* ... */` comments without producing tokens.
+    def consume_comments
+      while peek == '/' && peek(1) == '*'
+        consume
+        consume
+        until eof?
+          if consume == '*' && peek == '/'
+            consume
+            break
+          end
+        end
+      end
+    end
+    # When `preserve_comments` is on, comments are emitted as tokens whose
+    # value is the body between `/*` and `*/`.
+    def consume_comment_token
+      consume
+      consume
+      buf = +''
+      until eof?
+        c = consume
+        if c == '*' && peek == '/'
+          consume
+          break
+        end
+        buf << c
+      end
+      Token.new(:comment, buf)
+    end
+    def eof?
+      @pos >= @input.length
+    end
+    def consume_whitespace
+      consume while whitespace?(peek)
+      Token.new(:whitespace)
+    end
+    # §4.3.5.
+    def consume_string_token(ending)
+      buf = +''
+      loop do
+        c = consume
+        case c
+        when nil, ending
+          return Token.new(:string, buf)
+        when "\n"
+          reconsume
+          return Token.new(:bad_string)
+        when '\\'
+          n = peek
+          if n.nil?
+            next
+          elsif n == "\n"
+            consume
+          else
+            buf << consume_escaped_code_point
+          end
+        else
+          buf << c
+        end
+      end
+    end
+    # §4.3.7. Assumes the backslash has already been consumed.
+    def consume_escaped_code_point
+      c = consume
+      return CodePoints::REPLACEMENT if c.nil?
+      return c                       unless hex_digit?(c)
+      hex = c.dup
+      hex << consume while hex.length < 6 && hex_digit?(peek)
+      consume if whitespace?(peek)
+      n = hex.to_i(16)
+      if n.zero? || (0xD800..0xDFFF).cover?(n) || n > 0x10FFFF
+        CodePoints::REPLACEMENT
+      else
+        [n].pack('U')
+      end
+    end
+    # §4.3.11.
+    def consume_ident_sequence
+      buf = +''
+      loop do
+        c = consume
+        if ident_code_point?(c)
+          buf << c
+        elsif valid_escape?(c, peek)
+          buf << consume_escaped_code_point
+        else
+          reconsume unless c.nil?
+          return buf
+        end
+      end
+    end
+    # §4.3.4.
+    def consume_ident_like_token
+      name = consume_ident_sequence
+      if name.casecmp('url').zero? && peek == '('
+        consume
+        consume while whitespace?(peek) && whitespace?(peek(1))
+        n1 = peek
+        n2 = whitespace?(n1) ? peek(1) : n1
+        if n1 == '"' || n1 == "'" || (whitespace?(n1) && (n2 == '"' || n2 == "'"))
+          Token.new(:function, name)
+        else
+          consume_url_token
+        end
+      elsif peek == '('
+        consume
+        Token.new(:function, name)
+      else
+        Token.new(:ident, name)
+      end
+    end
+    # §4.3.6. Assumes "url(" has already been consumed.
+    def consume_url_token
+      buf = +''
+      consume while whitespace?(peek)
+      loop do
+        c = consume
+        case c
+        when nil, ')'
+          return Token.new(:url, buf)
+        when '"', "'", '('
+          consume_bad_url_remnants
+          return Token.new(:bad_url)
+        when ' ', "\t", "\n"
+          consume while whitespace?(peek)
+          n = peek
+          if n.nil? || n == ')'
+            consume unless n.nil?
+            return Token.new(:url, buf)
+          else
+            consume_bad_url_remnants
+            return Token.new(:bad_url)
+          end
+        when '\\'
+          if valid_escape?(c, peek)
+            buf << consume_escaped_code_point
+          else
+            consume_bad_url_remnants
+            return Token.new(:bad_url)
+          end
+        else
+          if non_printable?(c)
+            consume_bad_url_remnants
+            return Token.new(:bad_url)
+          end
+          buf << c
+        end
+      end
+    end
+    # §4.3.14.
+    def consume_bad_url_remnants
+      loop do
+        c = consume
+        return if c.nil? || c == ')'
+        consume_escaped_code_point if valid_escape?(c, peek)
+      end
+    end
+    # §4.3.3.
+    def consume_numeric_token
+      number, flag = consume_number
+      if ident_sequence_starts?(peek, peek(1), peek(2))
+        Token.new(:dimension, number, flag:, unit: consume_ident_sequence)
+      elsif peek == '%'
+        consume
+        Token.new(:percentage, number)
+      else
+        Token.new(:number, number, flag:)
+      end
+    end
+    # §4.3.12. Returns [numeric_value, :integer | :number].
+    def consume_number
+      repr = +''
+      flag = :integer
+      repr << consume if peek == '+' || peek == '-'
+      repr << consume while digit?(peek)
+      if peek == '.' && digit?(peek(1))
+        repr << consume
+        repr << consume while digit?(peek)
+        flag = :number
+      end
+      if (peek == 'E' || peek == 'e') &&
+          (digit?(peek(1)) || ((peek(1) == '+' || peek(1) == '-') && digit?(peek(2))))
+        repr << consume
+        repr << consume if peek == '+' || peek == '-'
+        repr << consume while digit?(peek)
+        flag = :number
+      end
+      [flag == :integer ? repr.to_i : repr.to_f, flag]
+    end
+  end
+end

data/lib/css/urange.rb ADDED Viewed

@@ -0,0 +1,45 @@
+module CSS
+  # Parser for CSS <urange> tokens, e.g. `U+0-7F`, `U+26`, `U+10??`.
+  # https://drafts.csswg.org/css-syntax/#urange-syntax
+  #
+  # Operates on the source string rather than a token stream because the
+  # tokenizer destructively normalizes shapes like `U+0` (the `+` is
+  # absorbed into a number-token whose sign is lost on serialization).
+  # Sticking with the source preserves the exact form.
+  module Urange
+    URANGE_RE   = /\Au\+([0-9a-f?]{1,6})(?:-([0-9a-f]{1,6}))?\z/i.freeze
+    WILDCARD_RE = /\A[0-9a-f]*\?+\z/i.freeze
+    MAX_CODEPOINT = 0x10FFFF
+    extend self
+    def parse(input)
+      s = input.to_s.strip
+      m = URANGE_RE.match(s)
+      raise ParseError, "invalid urange: #{input.inspect}" unless m
+      start_str, end_str = m[1], m[2]
+      first, last =
+        if end_str
+          raise ParseError, 'wildcards are not allowed in range form' if start_str.include?('?')
+          [start_str.to_i(16), end_str.to_i(16)]
+        elsif start_str.include?('?')
+          raise ParseError, 'wildcards must be trailing' unless start_str.match?(WILDCARD_RE)
+          [start_str.tr('?', '0').to_i(16), start_str.tr('?', 'f').to_i(16)]
+        else
+          n = start_str.to_i(16)
+          [n, n]
+        end
+      raise ParseError, "codepoint out of range: U+#{format('%X', last)}" if last > MAX_CODEPOINT
+      raise ParseError, "urange start must be <= end (U+#{format('%X', first)} > U+#{format('%X', last)})" if first > last
+      Nodes::UnicodeRange.new(first:, last:)
+    end
+  end
+end

data/lib/css/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module CSS
+  VERSION = '0.1.0'
+end

data/lib/css.rb ADDED Viewed

@@ -0,0 +1,73 @@
+module CSS
+  # Bracket information for the three "simple block" pairs. Indexed both by
+  # opening token type (for the parser) and by opening character (for the
+  # serializer).
+  BRACKET_OPEN_CHAR  = {lbrace: '{', lbracket: '[', lparen: '('}.freeze
+  BRACKET_CLOSE_TYPE = {lbrace: :rbrace, lbracket: :rbracket, lparen: :rparen}.freeze
+  BRACKET_PAIRS      = {'{' => '}', '[' => ']', '(' => ')'}.freeze
+end
+require_relative 'css/version'
+require_relative 'css/code_points'
+require_relative 'css/escape'
+require_relative 'css/token'
+require_relative 'css/tokenizer'
+require_relative 'css/token_cursor'
+require_relative 'css/nodes'
+require_relative 'css/parser'
+require_relative 'css/selectors'
+require_relative 'css/media_queries'
+require_relative 'css/serializer'
+require_relative 'css/urange'
+require_relative 'css/nesting'
+require_relative 'css/cascade'
+module CSS
+  class ParseError < StandardError
+    attr_reader :position
+    def initialize(message, position: nil)
+      super(position ? "#{position}: #{message}" : message)
+      @position = position
+    end
+  end
+  class << self
+    def tokenize(input, **opts)                    = Tokenizer.new(input, **opts).tokenize
+    def parse_stylesheet(input, **opts)            = Parser.parse_stylesheet(input, **opts)
+    def parse_rule(input, **opts)                  = Parser.parse_rule(input, **opts)
+    def parse_declaration(input, **opts)           = Parser.parse_declaration(input, **opts)
+    def parse_block_contents(input, **opts)        = Parser.parse_block_contents(input, **opts)
+    def parse_component_value(input, **opts)       = Parser.parse_component_value(input, **opts)
+    def parse_component_values(input, **opts)      = Parser.parse_component_values(input, **opts)
+    def parse_comma_separated_values(input, **opts) = Parser.parse_comma_separated_values(input, **opts)
+    def parse_urange(input) = Urange.parse(input)
+    def parse_selector_list(input) = Selectors::Parser.parse_selector_list(input)
+    def parse_selector(input)      = Selectors::Parser.parse_selector(input)
+    def parse_anb(input)           = Selectors::AnBParser.parse(input)
+    def specificity(selector) = Selectors::SpecificityCalculator.calculate(selector)
+    def matches?(element, selector) = Selectors::Matcher.matches?(element, selector)
+    def parse_media_query_list(input) = MediaQueries::Parser.parse(input)
+    def media_matches?(query_list, context)
+      ql = query_list.is_a?(String) ? MediaQueries::Parser.parse(query_list) : query_list
+      ctx = context.is_a?(MediaQueries::Context) ? context : MediaQueries::Context.default(**context.to_h)
+      MediaQueries::Evaluator.evaluate(ql, ctx)
+    end
+    def cascade(stylesheet, context: MediaQueries::Context.default)
+      Cascade.new(stylesheet, context:)
+    end
+    def desugar(stylesheet) = Nesting.desugar(stylesheet)
+    def serialize(node) = Serializer.serialize(node)
+    alias parse parse_stylesheet
+  end
+end

data/lib/p_css.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require_relative 'css'

metadata ADDED Viewed

@@ -0,0 +1,73 @@
+--- !ruby/object:Gem::Specification
+name: p_css
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Keita Urashima
+bindir: bin
+cert_chain: []
+date: 1980-01-02 00:00:00.000000000 Z
+dependencies: []
+description: p_css is a Ruby implementation of the CSS Syntax Level 4 tokenizer and
+  parser, including support for CSS nesting.
+email:
+- ursm@ursm.jp
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- LICENSE.txt
+- README.md
+- lib/css.rb
+- lib/css/cascade.rb
+- lib/css/code_points.rb
+- lib/css/escape.rb
+- lib/css/media_queries.rb
+- lib/css/media_queries/context.rb
+- lib/css/media_queries/evaluator.rb
+- lib/css/media_queries/nodes.rb
+- lib/css/media_queries/parser.rb
+- lib/css/nesting.rb
+- lib/css/nodes.rb
+- lib/css/parser.rb
+- lib/css/selectors.rb
+- lib/css/selectors/anb_parser.rb
+- lib/css/selectors/matcher.rb
+- lib/css/selectors/nodes.rb
+- lib/css/selectors/parser.rb
+- lib/css/selectors/serializer.rb
+- lib/css/selectors/specificity.rb
+- lib/css/serializer.rb
+- lib/css/token.rb
+- lib/css/token_cursor.rb
+- lib/css/tokenizer.rb
+- lib/css/urange.rb
+- lib/css/version.rb
+- lib/p_css.rb
+homepage: https://github.com/ursm/p_css
+licenses:
+- MIT
+metadata:
+  bug_tracker_uri: https://github.com/ursm/p_css/issues
+  changelog_uri: https://github.com/ursm/p_css/releases
+  source_code_uri: https://github.com/ursm/p_css
+  rubygems_mfa_required: 'true'
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '3.4'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.6.9
+specification_version: 4
+summary: A CSS Syntax Level 4 parser for Ruby, with nesting support.
+test_files: []