RubyGems - regexp_parser - Versions diffs - 2.0.1 → 2.0.3 - Mend

regexp_parser 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +17 -0
data/Gemfile +1 -0
data/Rakefile +2 -2
data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
data/lib/regexp_parser/expression/classes/group.rb +6 -1
data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
data/lib/regexp_parser/parser.rb +0 -2
data/lib/regexp_parser/scanner.rb +612 -674
data/lib/regexp_parser/scanner/scanner.rl +8 -8
data/lib/regexp_parser/syntax.rb +4 -4
data/lib/regexp_parser/syntax/any.rb +2 -2
data/lib/regexp_parser/syntax/base.rb +1 -1
data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
data/lib/regexp_parser/version.rb +1 -1
data/spec/expression/subexpression_spec.rb +1 -1
data/spec/expression/to_s_spec.rb +28 -36
data/spec/parser/errors_spec.rb +1 -1
data/spec/parser/quantifiers_spec.rb +1 -0
data/spec/scanner/sets_spec.rb +15 -3
data/spec/spec_helper.rb +1 -0
metadata +2 -2

data/lib/regexp_parser/scanner/scanner.rl CHANGED

@@ -37,7 +37,7 @@
   class_posix           = ('[:' . '^'? . class_name_posix . ':]');
-  # these are not supported in ruby, and need verification
+  # these are not supported in ruby at the moment
   collating_sequence    = '[.' . (alpha | [\-])+ . '.]';
   character_equivalent  = '[=' . alpha . '=]';
@@ -228,13 +228,13 @@
       emit(type, class_name.to_sym, text)
     };
-    collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
-      emit(:set, :collation, copy(data, ts, te))
-    };
-    character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
-      emit(:set, :equivalent, copy(data, ts, te))
-    };
+    # These are not supported in ruby at the moment. Enable them if they are.
+    # collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
+    #   emit(:set, :collation, copy(data, ts, te))
+    # };
+    # character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
+    #   emit(:set, :equivalent, copy(data, ts, te))
+    # };
     meta_char > (set_meta, 1) {
       emit(:literal, :literal, copy(data, ts, te))

data/lib/regexp_parser/syntax.rb CHANGED

@@ -1,9 +1,9 @@
+module Regexp::Syntax
+  class SyntaxError < StandardError; end
+end
 require File.expand_path('../syntax/tokens', __FILE__)
 require File.expand_path('../syntax/base', __FILE__)
 require File.expand_path('../syntax/any', __FILE__)
 require File.expand_path('../syntax/version_lookup', __FILE__)
 require File.expand_path('../syntax/versions', __FILE__)
-module Regexp::Syntax
-  class SyntaxError < StandardError; end
-end

data/lib/regexp_parser/syntax/any.rb CHANGED

@@ -8,8 +8,8 @@ module Regexp::Syntax
       @implements = { :* => [:*] }
     end
-    def implements?(type, token) true end
-    def implements!(type, token) true end
+    def implements?(_type, _token) true end
+    def implements!(_type, _token) true end
   end
 end

data/lib/regexp_parser/syntax/base.rb CHANGED

@@ -1,7 +1,7 @@
 require 'set'
 module Regexp::Syntax
-  class NotImplementedError < SyntaxError
+  class NotImplementedError < Regexp::Syntax::SyntaxError
     def initialize(syntax, type, token)
       super "#{syntax.class.name} does not implement: [#{type}:#{token}]"
     end

data/lib/regexp_parser/syntax/version_lookup.rb CHANGED

@@ -3,13 +3,13 @@ module Regexp::Syntax
   VERSION_REGEXP = /#{VERSION_FORMAT}/
   VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
-  class InvalidVersionNameError < SyntaxError
+  class InvalidVersionNameError < Regexp::Syntax::SyntaxError
     def initialize(name)
       super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
     end
   end
-  class UnknownSyntaxNameError < SyntaxError
+  class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
     def initialize(name)
       super "Unknown syntax name '#{name}'."
     end

data/lib/regexp_parser/version.rb CHANGED

@@ -1,5 +1,5 @@
 class Regexp
   class Parser
-    VERSION = '2.0.1'
+    VERSION = '2.0.3'
   end
 end

data/spec/expression/subexpression_spec.rb CHANGED

@@ -32,7 +32,7 @@ RSpec.describe(Regexp::Expression::Subexpression) do
     }
     root.each_expression do |exp|
-      next unless expected_nesting_level = tests.delete(exp.to_s)
+      next unless (expected_nesting_level = tests.delete(exp.to_s))
       expect(expected_nesting_level).to eq exp.nesting_level
     end

data/spec/expression/to_s_spec.rb CHANGED

@@ -1,58 +1,50 @@
 require 'spec_helper'
 RSpec.describe('Expression#to_s') do
-  specify('literal alternation') do
-    pattern = 'abcd|ghij|klmn|pqur'
+  def parse_frozen(pattern, ruby_version = nil)
+    IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
+  end
-    expect(RP.parse(pattern).to_s).to eq pattern
+  def expect_round_trip(pattern, ruby_version = nil)
+    parsed = parse_frozen(pattern, ruby_version)
+    expect(parsed.to_s).to eql(pattern)
   end
-  specify('quantified alternations') do
-    pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
+  specify('literal alternation') do
+    expect_round_trip('abcd|ghij|klmn|pqur')
+  end
-    expect(RP.parse(pattern).to_s).to eq pattern
+  specify('quantified alternations') do
+    expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
   end
   specify('quantified sets') do
-    pattern = '[abc]+|[^def]{3,6}'
-    expect(RP.parse(pattern).to_s).to eq pattern
+    expect_round_trip('[abc]+|[^def]{3,6}')
   end
   specify('property sets') do
-    pattern = '[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+'
-    expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
+    expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
   end
   specify('groups') do
-    pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
-    expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
+    expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
   end
   specify('assertions') do
-    pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
-    expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
+    expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
   end
   specify('comments') do
-    pattern = '(?#start)a(?#middle)b(?#end)'
-    expect(RP.parse(pattern).to_s).to eq pattern
+    expect_round_trip('(?#start)a(?#middle)b(?#end)')
   end
   specify('options') do
-    pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
-    expect(RP.parse(pattern).to_s).to eq pattern
+    expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
   end
   specify('url') do
-    pattern = ('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
-    expect(RP.parse(pattern).to_s).to eq pattern
+    expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
   end
   specify('multiline source') do
@@ -64,7 +56,7 @@ RSpec.describe('Expression#to_s') do
           \z
         /x
-    expect(RP.parse(multiline).to_s).to eq multiline.source
+    expect(parse_frozen(multiline).to_s).to eql(multiline.source)
   end
   specify('multiline #to_s') do
@@ -76,7 +68,7 @@ RSpec.describe('Expression#to_s') do
           \z
         /x
-    expect(RP.parse(multiline.to_s).to_s).to eq multiline.to_s
+    expect_round_trip(multiline.to_s)
   end
   # Free spacing expressions that use spaces between quantifiers and their
@@ -93,24 +85,24 @@ RSpec.describe('Expression#to_s') do
         /x
     str = 'bbbcged'
-    root = RP.parse(multiline)
+    root = parse_frozen(multiline)
-    expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
+    expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
   end
   # special case: implicit groups used for chained quantifiers produce no parens
   specify 'chained quantifiers #to_s' do
     pattern = /a+{1}{2}/
-    root = RP.parse(pattern)
-    expect(root.to_s).to eq 'a+{1}{2}'
+    root = parse_frozen(pattern)
+    expect(root.to_s).to eql('a+{1}{2}')
   end
   # regression test for https://github.com/ammar/regexp_parser/issues/74
   specify('non-ascii comment') do
     pattern = '(?x) 😋 # 😋'
     root = RP.parse(pattern)
-    expect(root.last).to be_a Regexp::Expression::Comment
-    expect(root.last.to_s).to eq '# 😋'
-    expect(root.to_s).to eq pattern
+    expect(root.last).to be_a(Regexp::Expression::Comment)
+    expect(root.last.to_s).to eql('# 😋')
+    expect(root.to_s).to eql(pattern)
   end
 end

data/spec/parser/errors_spec.rb CHANGED

@@ -9,7 +9,7 @@ RSpec.describe('Parsing errors') do
       .to raise_error(Regexp::Parser::UnknownTokenTypeError)
   end
-  RSpec.shared_examples 'UnknownTokenError' do |type, token|
+  RSpec.shared_examples 'UnknownTokenError' do |type|
     it "raises for unkown tokens of type #{type}" do
       expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
         .to raise_error(Regexp::Parser::UnknownTokenError)

data/spec/parser/quantifiers_spec.rb CHANGED

@@ -11,6 +11,7 @@ RSpec.describe('Quantifier parsing') do
       expect(exp.quantifier.min).to eq min
       expect(exp.quantifier.max).to eq max
       expect(exp.quantifier.mode).to eq mode
+      expect(exp.quantifier.text).to eq text
     end
   end

data/spec/scanner/sets_spec.rb CHANGED

@@ -61,9 +61,6 @@ RSpec.describe('Set scanning') do
   include_examples 'scan', /[[:digit:][:space:]]/,  2 => [:posixclass,    :space,    '[:space:]', 10, 19]
   include_examples 'scan', /[[:^digit:]]/,          1 => [:nonposixclass, :digit,    '[:^digit:]', 1, 11]
-  include_examples 'scan', /[a[.a-b.]c]/,           2 => [:set,    :collation,       '[.a-b.]',    2,  9]
-  include_examples 'scan', /[a[=e=]c]/,             2 => [:set,    :equivalent,      '[=e=]',      2,  7]
   include_examples 'scan', /[a-d&&g-h]/,            4 => [:set,    :intersection,    '&&',         4, 6]
   include_examples 'scan', /[a&&]/,                 2 => [:set,    :intersection,    '&&',         2, 4]
   include_examples 'scan', /[&&z]/,                 1 => [:set,    :intersection,    '&&',         1, 3]
@@ -88,6 +85,21 @@ RSpec.describe('Set scanning') do
     8 => [:set,    :range,           '-',          9, 10],
     10=> [:set,    :close,           ']',          11, 12]
+  # Collations/collating sequences and character equivalents are not enabled
+  # in Ruby at the moment. If they ever are, enable them in the scanner,
+  # add them to a new syntax version, and handle them in the parser. Until then,
+  # expect them to be scanned as regular subsets containing literals.
+  # include_examples 'scan', /[a[.a-b.]c]/,           2 => [:set,    :collation,       '[.a-b.]',    2,  9]
+  # include_examples 'scan', /[a[=e=]c]/,             2 => [:set,    :equivalent,      '[=e=]',      2,  7]
+  include_examples 'scan', '[a[.a-b.]c]',
+    2 => [:set,     :open,           '[',          2,  3],
+    3 => [:literal, :literal,        '.',          3,  4],
+    4 => [:literal, :literal,        'a',          4,  5]
+  include_examples 'scan', '[a[=e=]c]',
+    2 => [:set,     :open,           '[',          2,  3],
+    3 => [:literal, :literal,        '=',          3,  4],
+    4 => [:literal, :literal,        'e',          4,  5]
   # multi-byte characters should not affect indices
   include_examples 'scan', /[れます]/,
     0 => [:set,     :open,           '[',          0, 1],

data/spec/spec_helper.rb CHANGED

@@ -1,3 +1,4 @@
+require 'ice_nine'
 require 'regexp_parser'
 require 'regexp_property_values'
 require_relative 'support/shared_examples'

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: regexp_parser
 version: !ruby/object:Gem::Version
-  version: 2.0.1
+  version: 2.0.3
 platform: ruby
 authors:
 - Ammar Ali
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-12-20 00:00:00.000000000 Z
+date: 2020-12-28 00:00:00.000000000 Z
 dependencies: []
 description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
 email: