RubyGems - parselly - Versions diffs - 1.0.0 → 1.2.0 - Mend

parselly 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/lib/parselly/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Parselly
-  VERSION = '1.0.0'
+  VERSION = '1.2.0'
 end

data/lib/parselly.rb CHANGED Viewed

@@ -8,6 +8,21 @@ require_relative 'parselly/parser'
 require_relative 'parselly/version'
 module Parselly
+  ParseResult = Struct.new(:ast, :errors)
+  class ParseError < StandardError
+    attr_reader :error
+    def initialize(error)
+      @error = error
+      super(error[:message])
+    end
+  end
+  def parse(selector, tolerant: false)
+    Parser.new.parse(selector, tolerant: tolerant)
+  end
   def sanitize(selector)
     scanner = StringScanner.new(selector)
     result = +''
@@ -46,5 +61,5 @@ module Parselly
     "\\#{char.ord.to_s(16)} "
   end
-  module_function :sanitize, :escaped_hex
+  module_function :parse, :sanitize, :escaped_hex
 end

data/parser.y CHANGED Viewed

@@ -82,7 +82,7 @@ rule
   type_selector
     : IDENT
-      { result = Node.new(:type_selector, val[0], @current_position) }
+      { result = Node.new(:type_selector, identifier_value(val[0]), @current_position, raw_value: identifier_raw(val[0])) }
     | STAR
       { result = Node.new(:universal_selector, '*', @current_position) }
     ;
@@ -102,30 +102,30 @@ rule
   id_selector
     : HASH IDENT
-      { result = Node.new(:id_selector, val[1], @current_position) }
+      { result = Node.new(:id_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
     ;
   class_selector
     : DOT IDENT
-      { result = Node.new(:class_selector, val[1], @current_position) }
+      { result = Node.new(:class_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
     ;
   attribute_selector
     : LBRACKET IDENT RBRACKET
-      { result = Node.new(:attribute_selector, val[1], @current_position) }
+      { result = Node.new(:attribute_selector, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
     | LBRACKET IDENT attr_matcher STRING RBRACKET
       {
         result = Node.new(:attribute_selector, nil, @current_position)
-        result.add_child(Node.new(:attribute, val[1], @current_position))
+        result.add_child(Node.new(:attribute, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])))
         result.add_child(val[2])
         result.add_child(Node.new(:value, val[3], @current_position))
       }
     | LBRACKET IDENT attr_matcher IDENT RBRACKET
       {
         result = Node.new(:attribute_selector, nil, @current_position)
-        result.add_child(Node.new(:attribute, val[1], @current_position))
+        result.add_child(Node.new(:attribute, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])))
         result.add_child(val[2])
-        result.add_child(Node.new(:value, val[3], @current_position))
+        result.add_child(Node.new(:value, identifier_value(val[3]), @current_position, raw_value: identifier_raw(val[3])))
       }
     ;
@@ -146,18 +146,24 @@ rule
   pseudo_class_selector
     : COLON IDENT
-      { result = Node.new(:pseudo_class, val[1], @current_position) }
+      { result = Node.new(:pseudo_class, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1])) }
     | COLON IDENT LPAREN any_value RPAREN
       {
-        fn = Node.new(:pseudo_function, val[1], @current_position)
+        fn = Node.new(:pseudo_function, identifier_value(val[1]), @current_position, raw_value: identifier_raw(val[1]))
         fn.add_child(val[3])
         result = fn
       }
+    | IDENT LPAREN any_value RPAREN
+      {
+        fn = Node.new(:pseudo_function, identifier_value(val[0]), @current_position, raw_value: identifier_raw(val[0]))
+        fn.add_child(val[2])
+        result = fn
+      }
     ;
   pseudo_element_selector
     : COLON COLON IDENT
-      { result = Node.new(:pseudo_element, val[2], @current_position) }
+      { result = Node.new(:pseudo_element, identifier_value(val[2]), @current_position, raw_value: identifier_raw(val[2])) }
     ;
   any_value
@@ -258,34 +264,128 @@ rule
 end
 ---- header
+require 'set'
+# Pre-computed sets for faster lookup
+CAN_END_COMPOUND = Set[:IDENT, :STAR, :RPAREN, :RBRACKET].freeze
+CAN_START_COMPOUND = Set[:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].freeze
+TYPE_SELECTOR_TYPES = Set[:IDENT, :STAR].freeze
+SUBCLASS_SELECTOR_TYPES = Set[:DOT, :HASH, :LBRACKET, :COLON].freeze
+SUBCLASS_SELECTOR_END_TYPES = Set[:IDENT, :RBRACKET, :RPAREN].freeze
+NTH_PSEUDO_NAMES = Set['nth-child', 'nth-last-child', 'nth-of-type', 'nth-last-of-type', 'nth-col', 'nth-last-col'].freeze
+AN_PLUS_B_REGEX = /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/.freeze
 ---- inner
-def parse(input)
+def parse(input, tolerant: false)
+  @tolerant = tolerant
+  @errors = []
+  @error_index = nil
+  @suppress_errors = false
   @lexer = Parselly::Lexer.new(input)
-  @tokens = @lexer.tokenize
+  begin
+    @tokens = @lexer.tokenize
+  rescue RuntimeError => e
+    if tolerant
+      @errors << parse_error_from_exception(e)
+      return Parselly::ParseResult.new(nil, @errors)
+    end
+    raise
+  end
   preprocess_tokens!
   @index = 0
-  @current_position = { line: 1, column: 1 }
+  @current_position = { line: 1, column: 1, offset: 0 }
+  if tolerant
+    ast = parse_with_recovery
+    normalize_an_plus_b(ast) if ast
+    return Parselly::ParseResult.new(ast, @errors)
+  end
   ast = do_parse
   normalize_an_plus_b(ast)
   ast
 end
+def parse_with_recovery
+  do_parse
+rescue Parselly::ParseError, RuntimeError
+  parse_partial_ast
+end
+def parse_partial_ast
+  return nil unless @tokens && !@tokens.empty?
+  eof_token = @tokens.last if @tokens.last && @tokens.last[0] == false
+  tokens = @tokens.dup
+  tokens.pop if eof_token
+  limit = @error_index || tokens.length
+  while limit > 0
+    truncated = tokens[0...limit]
+    truncated << eof_token if eof_token
+    begin
+      return parse_from_tokens(truncated, suppress_errors: true)
+    rescue Parselly::ParseError, RuntimeError
+      limit -= 1
+    end
+  end
+  nil
+end
+def parse_from_tokens(tokens, suppress_errors: false)
+  @tokens = tokens
+  @index = 0
+  @current_position = { line: 1, column: 1, offset: 0 }
+  @suppress_errors = suppress_errors
+  do_parse
+ensure
+  @suppress_errors = false
+end
+def parse_error_from_exception(error)
+  line = nil
+  column = nil
+  offset = nil
+  if error.message =~ /at (\d+):(\d+)/
+    line = Regexp.last_match(1).to_i
+    column = Regexp.last_match(2).to_i
+  end
+  if error.message =~ /offset (\d+)/
+    offset = Regexp.last_match(1).to_i
+  end
+  { message: error.message, line: line, column: column, offset: offset }
+end
+def identifier_value(token)
+  token.respond_to?(:value) ? token.value : token
+end
+def identifier_raw(token)
+  token.respond_to?(:raw) ? token.raw : token
+end
 def preprocess_tokens!
-  new_tokens = []
-  i = 0
-  while i < @tokens.size
-    token = @tokens[i]
-    next_token = @tokens[i + 1]
-    new_tokens << token
-    if next_token && needs_descendant?(token, next_token)
-      pos = { line: token[2][:line], column: token[2][:column] }
-      new_tokens << [:DESCENDANT, ' ', pos]
+  return if @tokens.size <= 1
+  new_tokens = Array.new(@tokens.size + (@tokens.size / 2)) # Pre-allocate with conservative estimate
+  new_tokens_idx = 0
+  last_idx = @tokens.size - 1
+  @tokens.each_with_index do |token, i|
+    new_tokens[new_tokens_idx] = token
+    new_tokens_idx += 1
+    if i < last_idx
+      next_token = @tokens[i + 1]
+      if needs_descendant?(token, next_token)
+        pos = { line: token[2][:line], column: token[2][:column], offset: token[2][:offset] }
+        new_tokens[new_tokens_idx] = [:DESCENDANT, ' ', pos]
+        new_tokens_idx += 1
+      end
     end
-    i += 1
   end
-  @tokens = new_tokens
+  @tokens = new_tokens.first(new_tokens_idx)
 end
 # Insert DESCENDANT combinator if:
@@ -297,62 +397,42 @@ def needs_descendant?(current, next_tok)
   current_type = current[0]
   next_type = next_tok[0]
-  can_end = can_end_compound?(current_type)
-  can_start = can_start_compound?(next_type)
   # Type selector followed by subclass selector = same compound
-  if [:IDENT, :STAR].include?(current_type) &&
-     [:DOT, :HASH, :LBRACKET, :COLON].include?(next_type)
-    return false
+  # Subclass selector followed by subclass selector = same compound
+  if SUBCLASS_SELECTOR_TYPES.include?(next_type)
+    return false if TYPE_SELECTOR_TYPES.include?(current_type) ||
+                    SUBCLASS_SELECTOR_END_TYPES.include?(current_type)
   end
-  can_end && can_start
-end
-def can_end_compound?(token_type)
-  [:IDENT, :STAR, :RPAREN, :RBRACKET].include?(token_type)
-end
-def can_start_compound?(token_type)
-  # Type selectors and subclass selectors can start a compound selector
-  [:IDENT, :STAR, :DOT, :HASH, :LBRACKET, :COLON].include?(token_type)
+  CAN_END_COMPOUND.include?(current_type) && CAN_START_COMPOUND.include?(next_type)
 end
 def normalize_an_plus_b(node)
   return unless node.respond_to?(:children) && node.children
-  if node.type == :pseudo_function && nth_pseudo?(node.value)
+  if node.type == :pseudo_function && NTH_PSEUDO_NAMES.include?(node.value)
     child = node.children.first
-    if child && child.type == :selector_list
+    if child&.type == :selector_list
       an_plus_b_value = extract_an_plus_b_value(child)
       if an_plus_b_value
-        node.children[0] = Node.new(:an_plus_b, an_plus_b_value, child.position)
+        node.replace_child(0, Node.new(:an_plus_b, an_plus_b_value, child.position))
       end
     end
   end
   node.children.compact.each { |child| normalize_an_plus_b(child) }
 end
-def nth_pseudo?(name)
-  %w[nth-child nth-last-child nth-of-type nth-last-of-type nth-col nth-last-col].include?(name)
-end
 def extract_an_plus_b_value(selector_list_node)
   return nil unless selector_list_node.children.size == 1
   seq = selector_list_node.children.first
-  return nil unless seq.type == :simple_selector_sequence
-  return nil unless seq.children.size == 1
+  return nil unless seq.type == :simple_selector_sequence && seq.children.size == 1
   type_sel = seq.children.first
   return nil unless type_sel.type == :type_selector
   value = type_sel.value
-  if value =~ /^(even|odd|[+-]?\d*n(?:[+-]\d+)?|[+-]?n(?:[+-]\d+)?|\d+)$/
-    value
-  else
-    nil
-  end
+  value if value =~ AN_PLUS_B_REGEX
 end
 def next_token
@@ -368,5 +448,16 @@ end
 def on_error(token_id, val, vstack)
   token_name = token_to_str(token_id) || '?'
   pos = @current_position || { line: '?', column: '?' }
-  raise "Parse error: unexpected #{token_name} '#{val}' at #{pos[:line]}:#{pos[:column]}"
+  error = {
+    message: "Parse error: unexpected #{token_name} '#{val}' at #{pos[:line]}:#{pos[:column]}",
+    line: pos[:line],
+    column: pos[:column],
+    offset: pos[:offset]
+  }
+  if @tolerant
+    @errors << error unless @suppress_errors
+    @error_index ||= [@index - 1, 0].max
+    raise Parselly::ParseError, error
+  end
+  raise error[:message]
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: parselly
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.2.0
 platform: ruby
 authors:
 - Yudai Takada
@@ -51,7 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.6.9
+rubygems_version: 4.0.4
 specification_version: 4
 summary: Pure Ruby CSS selector parser.
 test_files: []