RubyGems - regexp_parser - Versions diffs - 2.1.1 → 2.2.0 - Mend

regexp_parser 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +15 -21
data/Rakefile +5 -11
data/lib/regexp_parser/expression/base.rb +123 -0
data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
data/lib/regexp_parser/expression/classes/literal.rb +1 -5
data/lib/regexp_parser/expression/classes/property.rb +0 -2
data/lib/regexp_parser/expression/classes/root.rb +0 -1
data/lib/regexp_parser/expression/classes/type.rb +0 -2
data/lib/regexp_parser/expression/quantifier.rb +1 -1
data/lib/regexp_parser/expression/sequence.rb +0 -1
data/lib/regexp_parser/expression/subexpression.rb +0 -1
data/lib/regexp_parser/expression.rb +6 -130
data/lib/regexp_parser/lexer.rb +7 -5
data/lib/regexp_parser/scanner/properties/long.yml +13 -0
data/lib/regexp_parser/scanner/properties/short.yml +9 -1
data/lib/regexp_parser/syntax/any.rb +1 -3
data/lib/regexp_parser/syntax/base.rb +9 -9
data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
data/lib/regexp_parser/syntax/token/escape.rb +31 -0
data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
data/lib/regexp_parser/syntax/token.rb +45 -0
data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
data/lib/regexp_parser/syntax.rb +1 -1
data/lib/regexp_parser/token.rb +9 -20
data/lib/regexp_parser/version.rb +1 -1
data/lib/regexp_parser.rb +0 -2
data/spec/lexer/nesting_spec.rb +2 -2
data/spec/parser/escapes_spec.rb +43 -31
data/spec/parser/properties_spec.rb +6 -4
data/spec/parser/set/ranges_spec.rb +26 -16
data/spec/scanner/escapes_spec.rb +28 -19
data/spec/scanner/sets_spec.rb +9 -9
data/spec/spec_helper.rb +13 -1
data/spec/support/capturing_stderr.rb +9 -0
data/spec/syntax/versions/1.8.6_spec.rb +2 -2
data/spec/syntax/versions/2.0.0_spec.rb +2 -2
data/spec/syntax/versions/aliases_spec.rb +1 -0
metadata +26 -26
data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
data/lib/regexp_parser/syntax/tokens.rb +0 -45
data/spec/support/runner.rb +0 -42
data/spec/support/warning_extractor.rb +0 -60

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
-  data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
+  metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
+  data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
 SHA512:
-  metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
-  data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
+  metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
+  data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 ## [Unreleased]
+## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Added
+- Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
 ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
 ### Fixed

data/README.md CHANGED Viewed

@@ -1,6 +1,9 @@
 # Regexp::Parser
-[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
+[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
+[![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
+[![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
+[![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
 A Ruby gem for tokenizing, parsing, and transforming regular expressions.
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
 | &emsp;&nbsp;_**Reluctant** (Lazy)_    | `??`, `*?`, `+?`, `{m,M}?`                              | &#x2713; |
 | &emsp;&nbsp;_**Possessive**_          | `?+`, `*+`, `++`, `{m,M}+`                              | &#x2713; |
 | **String Escapes**                    |                                                         | &#x22f1; |
-| &emsp;&nbsp;_**Control**_             | `\C-C`, `\cD`                                           | &#x2713; |
+| &emsp;&nbsp;_**Control** \[1\]_       | `\C-C`, `\cD`                                           | &#x2713; |
 | &emsp;&nbsp;_**Hex**_                 | `\x20`, `\x{701230}`                                    | &#x2713; |
-| &emsp;&nbsp;_**Meta**_                | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C`        | &#x2713; |
+| &emsp;&nbsp;_**Meta** \[1\]_          | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C`        | &#x2713; |
 | &emsp;&nbsp;_**Octal**_               | `\0`, `\01`, `\012`                                     | &#x2713; |
 | &emsp;&nbsp;_**Unicode**_             | `\uHHHH`, `\u{H+ H+}`                                   | &#x2713; |
-| **Unicode Properties**                | _<sub>([Unicode 11.0.0](http://www.unicode.org/versions/Unicode11.0.0/))</sub>_ | &#x22f1; |
+| **Unicode Properties**                | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
 | &emsp;&nbsp;_**Age**_                 | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}`            | &#x2713; |
 | &emsp;&nbsp;_**Blocks**_              | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}`          | &#x2713; |
 | &emsp;&nbsp;_**Classes**_             | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}`                  | &#x2713; |
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
 | &emsp;&nbsp;_**Scripts**_             | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}`              | &#x2713; |
 | &emsp;&nbsp;_**Simple**_              | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}`               | &#x2713; |
+**\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
+ https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
+scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
 ##### Inapplicable Features
 Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
   - **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
   - **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
 See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
 _**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
 ## Testing
-To run the tests simply run rake from the root directory, as 'test' is the default task.
-It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
-The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
-```
-bin/test
-```
-You can run a specific test like so:
+To run the tests simply run rake from the root directory.
-```
-bin/test spec/scanner/properties_spec.rb
-```
+The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
-Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
+Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
 ```
-rake ragel:rb && bin/test spec/scanner/properties_spec.rb
+rake ragel:rb && rspec spec/scanner/properties_spec.rb
 ```
 ## Building

data/Rakefile CHANGED Viewed

@@ -1,26 +1,22 @@
+require 'bundler'
 require 'rubygems'
+require 'rubygems/package_task'
 require 'rake'
 require 'rake/testtask'
-require 'bundler'
-require 'rubygems/package_task'
+require 'rspec/core/rake_task'
 RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
 RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
 RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
 Bundler::GemHelper.install_tasks
+RSpec::Core::RakeTask.new(:spec)
 task :default => [:'test:full']
 namespace :test do
-  task full: :'ragel:rb' do
-    sh 'bin/test'
-  end
+  task full: [:'ragel:rb', :spec]
 end
 namespace :ragel do
@@ -49,13 +45,11 @@ namespace :ragel do
   end
 end
 # Add ragel task as a prerequisite for building the gem to ensure that the
 # latest scanner code is generated and included in the build.
 desc "Runs ragel:rb before building the gem"
 task :build => ['ragel:rb']
 namespace :props do
   desc 'Write new property value hashes for the properties scanner'
   task :update do

data/lib/regexp_parser/expression/base.rb ADDED Viewed

@@ -0,0 +1,123 @@
+module Regexp::Expression
+  class Base
+    attr_accessor :type, :token
+    attr_accessor :text, :ts
+    attr_accessor :level, :set_level, :conditional_level, :nesting_level
+    attr_accessor :quantifier
+    attr_accessor :options
+    def initialize(token, options = {})
+      self.type              = token.type
+      self.token             = token.token
+      self.text              = token.text
+      self.ts                = token.ts
+      self.level             = token.level
+      self.set_level         = token.set_level
+      self.conditional_level = token.conditional_level
+      self.nesting_level     = 0
+      self.quantifier        = nil
+      self.options           = options
+    end
+    def initialize_copy(orig)
+      self.text       = (orig.text       ? orig.text.dup         : nil)
+      self.options    = (orig.options    ? orig.options.dup      : nil)
+      self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
+      super
+    end
+    def to_re(format = :full)
+      ::Regexp.new(to_s(format))
+    end
+    alias :starts_at :ts
+    def base_length
+      to_s(:base).length
+    end
+    def full_length
+      to_s.length
+    end
+    def offset
+      [starts_at, full_length]
+    end
+    def coded_offset
+      '@%d+%d' % offset
+    end
+    def to_s(format = :full)
+      "#{text}#{quantifier_affix(format)}"
+    end
+    def quantifier_affix(expression_format)
+      quantifier.to_s if quantified? && expression_format != :base
+    end
+    def terminal?
+      !respond_to?(:expressions)
+    end
+    def quantify(token, text, min = nil, max = nil, mode = :greedy)
+      self.quantifier = Quantifier.new(token, text, min, max, mode)
+    end
+    def unquantified_clone
+      clone.tap { |exp| exp.quantifier = nil }
+    end
+    def quantified?
+      !quantifier.nil?
+    end
+    # Deprecated. Prefer `#repetitions` which has a more uniform interface.
+    def quantity
+      return [nil,nil] unless quantified?
+      [quantifier.min, quantifier.max]
+    end
+    def repetitions
+      return 1..1 unless quantified?
+      min = quantifier.min
+      max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
+      range = min..max
+      # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
+      if RUBY_VERSION.to_f < 2.7
+        range.define_singleton_method(:minmax) { [min, max] }
+      end
+      range
+    end
+    def greedy?
+      quantified? and quantifier.greedy?
+    end
+    def reluctant?
+      quantified? and quantifier.reluctant?
+    end
+    alias :lazy? :reluctant?
+    def possessive?
+      quantified? and quantifier.possessive?
+    end
+    def attributes
+      {
+        type:              type,
+        token:             token,
+        text:              to_s(:base),
+        starts_at:         ts,
+        length:            full_length,
+        level:             level,
+        set_level:         set_level,
+        conditional_level: conditional_level,
+        options:           options,
+        quantifier:        quantified? ? quantifier.to_h : nil,
+      }
+    end
+    alias :to_h :attributes
+  end
+end

data/lib/regexp_parser/expression/classes/anchor.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
   module Anchor
     class Base < Regexp::Expression::Base; end
@@ -22,5 +21,4 @@ module Regexp::Expression
     EOS      = EndOfString
     EOSobEOL = EndOfStringOrBeforeEndOfLine
   end
 end

data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} RENAMED Viewed

File without changes

data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb RENAMED Viewed

File without changes

data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb RENAMED Viewed

File without changes

data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} RENAMED Viewed

File without changes

data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} RENAMED Viewed

@@ -1,4 +1,5 @@
 module Regexp::Expression
+  # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
   module EscapeSequence
     class Base < Regexp::Expression::Base
       require 'yaml'

data/lib/regexp_parser/expression/classes/free_space.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
   class FreeSpace < Regexp::Expression::Base
     def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
       raise Regexp::Parser::Error, 'Can not quantify a free space object'
@@ -13,5 +12,4 @@ module Regexp::Expression
       text << exp.text
     end
   end
 end

data/lib/regexp_parser/expression/classes/literal.rb CHANGED Viewed

@@ -1,7 +1,3 @@
 module Regexp::Expression
-  class Literal < Regexp::Expression::Base
-    # Obviously nothing special here, yet.
-  end
+  class Literal < Regexp::Expression::Base; end
 end

data/lib/regexp_parser/expression/classes/property.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
   module UnicodeProperty
     class Base < Regexp::Expression::Base
       def negative?
@@ -116,5 +115,4 @@ module Regexp::Expression
     class Script  < UnicodeProperty::Base; end
     class Block   < UnicodeProperty::Base; end
   end
 end # module Regexp::Expression

data/lib/regexp_parser/expression/classes/root.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
   class Root < Regexp::Expression::Subexpression
     def self.build(options = {})
       new(build_token, options)

data/lib/regexp_parser/expression/classes/type.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
   module CharacterType
     class Base < Regexp::Expression::Base; end
@@ -15,5 +14,4 @@ module Regexp::Expression
     class Linebreak        < CharacterType::Base; end
     class ExtendedGrapheme < CharacterType::Base; end
   end
 end

data/lib/regexp_parser/expression/quantifier.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module Regexp::Expression
   class Quantifier
-    MODES = [:greedy, :possessive, :reluctant]
+    MODES = %i[greedy possessive reluctant]
     attr_reader :token, :text, :min, :max, :mode

data/lib/regexp_parser/expression/sequence.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
   # A sequence of expressions. Differs from a Subexpressions by how it handles
   # quantifiers, as it applies them to its last element instead of itself as
   # a whole subexpression.

data/lib/regexp_parser/expression/subexpression.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
   class Subexpression < Regexp::Expression::Base
     include Enumerable

data/lib/regexp_parser/expression.rb CHANGED Viewed

@@ -1,130 +1,6 @@
 require 'regexp_parser/error'
-module Regexp::Expression
-  class Base
-    attr_accessor :type, :token
-    attr_accessor :text, :ts
-    attr_accessor :level, :set_level, :conditional_level, :nesting_level
-    attr_accessor :quantifier
-    attr_accessor :options
-    def initialize(token, options = {})
-      self.type              = token.type
-      self.token             = token.token
-      self.text              = token.text
-      self.ts                = token.ts
-      self.level             = token.level
-      self.set_level         = token.set_level
-      self.conditional_level = token.conditional_level
-      self.nesting_level     = 0
-      self.quantifier        = nil
-      self.options           = options
-    end
-    def initialize_copy(orig)
-      self.text       = (orig.text       ? orig.text.dup         : nil)
-      self.options    = (orig.options    ? orig.options.dup      : nil)
-      self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
-      super
-    end
-    def to_re(format = :full)
-      ::Regexp.new(to_s(format))
-    end
-    alias :starts_at :ts
-    def base_length
-      to_s(:base).length
-    end
-    def full_length
-      to_s.length
-    end
-    def offset
-      [starts_at, full_length]
-    end
-    def coded_offset
-      '@%d+%d' % offset
-    end
-    def to_s(format = :full)
-      "#{text}#{quantifier_affix(format)}"
-    end
-    def quantifier_affix(expression_format)
-      quantifier.to_s if quantified? && expression_format != :base
-    end
-    def terminal?
-      !respond_to?(:expressions)
-    end
-    def quantify(token, text, min = nil, max = nil, mode = :greedy)
-      self.quantifier = Quantifier.new(token, text, min, max, mode)
-    end
-    def unquantified_clone
-      clone.tap { |exp| exp.quantifier = nil }
-    end
-    def quantified?
-      !quantifier.nil?
-    end
-    # Deprecated. Prefer `#repetitions` which has a more uniform interface.
-    def quantity
-      return [nil,nil] unless quantified?
-      [quantifier.min, quantifier.max]
-    end
-    def repetitions
-      return 1..1 unless quantified?
-      min = quantifier.min
-      max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
-      range = min..max
-      # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
-      if RUBY_VERSION.to_f < 2.7
-        range.define_singleton_method(:minmax) { [min, max] }
-      end
-      range
-    end
-    def greedy?
-      quantified? and quantifier.greedy?
-    end
-    def reluctant?
-      quantified? and quantifier.reluctant?
-    end
-    alias :lazy? :reluctant?
-    def possessive?
-      quantified? and quantifier.possessive?
-    end
-    def attributes
-      {
-        type:              type,
-        token:             token,
-        text:              to_s(:base),
-        starts_at:         ts,
-        length:            full_length,
-        level:             level,
-        set_level:         set_level,
-        conditional_level: conditional_level,
-        options:           options,
-        quantifier:        quantified? ? quantifier.to_h : nil,
-      }
-    end
-    alias :to_h :attributes
-  end
-end # module Regexp::Expression
+require 'regexp_parser/expression/base'
 require 'regexp_parser/expression/quantifier'
 require 'regexp_parser/expression/subexpression'
 require 'regexp_parser/expression/sequence'
@@ -132,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
 require 'regexp_parser/expression/classes/alternation'
 require 'regexp_parser/expression/classes/anchor'
-require 'regexp_parser/expression/classes/backref'
+require 'regexp_parser/expression/classes/backreference'
+require 'regexp_parser/expression/classes/character_set'
+require 'regexp_parser/expression/classes/character_set/intersection'
+require 'regexp_parser/expression/classes/character_set/range'
 require 'regexp_parser/expression/classes/conditional'
-require 'regexp_parser/expression/classes/escape'
+require 'regexp_parser/expression/classes/escape_sequence'
 require 'regexp_parser/expression/classes/free_space'
 require 'regexp_parser/expression/classes/group'
 require 'regexp_parser/expression/classes/keep'
@@ -142,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
 require 'regexp_parser/expression/classes/posix_class'
 require 'regexp_parser/expression/classes/property'
 require 'regexp_parser/expression/classes/root'
-require 'regexp_parser/expression/classes/set'
-require 'regexp_parser/expression/classes/set/intersection'
-require 'regexp_parser/expression/classes/set/range'
 require 'regexp_parser/expression/classes/type'
 require 'regexp_parser/expression/methods/match'

data/lib/regexp_parser/lexer.rb CHANGED Viewed

@@ -4,12 +4,14 @@
 # given syntax flavor.
 class Regexp::Lexer
-  OPENING_TOKENS = [
-    :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
-    :atomic, :options, :options_switch, :named, :absence
+  OPENING_TOKENS = %i[
+    capture passive lookahead nlookahead lookbehind nlookbehind
+    atomic options options_switch named absence
   ].freeze
-  CLOSING_TOKENS = [:close].freeze
+  CLOSING_TOKENS = %i[close].freeze
+  CONDITION_TOKENS = %i[condition condition_close].freeze
   def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
     new.lex(input, syntax, options: options, &block)
@@ -40,7 +42,7 @@ class Regexp::Lexer
                                   nesting, set_nesting, conditional_nesting)
       current = merge_condition(current) if type == :conditional and
-        [:condition, :condition_close].include?(token)
+        CONDITION_TOKENS.include?(token)
       last.next = current if last
       current.previous = last if last

data/lib/regexp_parser/scanner/properties/long.yml CHANGED Viewed

@@ -8,6 +8,7 @@ age=10.0: age=10.0
 age=11.0: age=11.0
 age=12.0: age=12.0
 age=12.1: age=12.1
+age=13.0: age=13.0
 age=2.0: age=2.0
 age=2.1: age=2.1
 age=3.0: age=3.0
@@ -64,6 +65,7 @@ changeswhenlowercased: changes_when_lowercased
 changeswhentitlecased: changes_when_titlecased
 changeswhenuppercased: changes_when_uppercased
 cherokee: cherokee
+chorasmian: chorasmian
 closepunctuation: close_punctuation
 cntrl: cntrl
 common: common
@@ -83,6 +85,7 @@ deseret: deseret
 devanagari: devanagari
 diacritic: diacritic
 digit: digit
+divesakuru: dives_akuru
 dogra: dogra
 duployan: duployan
 egyptianhieroglyphs: egyptian_hieroglyphs
@@ -167,6 +170,7 @@ incham: in_cham
 incherokee: in_cherokee
 incherokeesupplement: in_cherokee_supplement
 inchesssymbols: in_chess_symbols
+inchorasmian: in_chorasmian
 incjkcompatibility: in_cjk_compatibility
 incjkcompatibilityforms: in_cjk_compatibility_forms
 incjkcompatibilityideographs: in_cjk_compatibility_ideographs
@@ -181,6 +185,7 @@ incjkunifiedideographsextensionc: in_cjk_unified_ideographs_extension_c
 incjkunifiedideographsextensiond: in_cjk_unified_ideographs_extension_d
 incjkunifiedideographsextensione: in_cjk_unified_ideographs_extension_e
 incjkunifiedideographsextensionf: in_cjk_unified_ideographs_extension_f
+incjkunifiedideographsextensiong: in_cjk_unified_ideographs_extension_g
 incombiningdiacriticalmarks: in_combining_diacritical_marks
 incombiningdiacriticalmarksextended: in_combining_diacritical_marks_extended
 incombiningdiacriticalmarksforsymbols: in_combining_diacritical_marks_for_symbols
@@ -204,6 +209,7 @@ indeseret: in_deseret
 indevanagari: in_devanagari
 indevanagariextended: in_devanagari_extended
 indingbats: in_dingbats
+indivesakuru: in_dives_akuru
 indogra: in_dogra
 indominotiles: in_domino_tiles
 induployan: in_duployan
@@ -269,6 +275,7 @@ inkatakana: in_katakana
 inkatakanaphoneticextensions: in_katakana_phonetic_extensions
 inkayahli: in_kayah_li
 inkharoshthi: in_kharoshthi
+inkhitansmallscript: in_khitan_small_script
 inkhmer: in_khmer
 inkhmersymbols: in_khmer_symbols
 inkhojki: in_khojki
@@ -288,6 +295,7 @@ inlineara: in_linear_a
 inlinearbideograms: in_linear_b_ideograms
 inlinearbsyllabary: in_linear_b_syllabary
 inlisu: in_lisu
+inlisusupplement: in_lisu_supplement
 inlowsurrogates: in_low_surrogates
 inlycian: in_lycian
 inlydian: in_lydian
@@ -395,6 +403,7 @@ insupplementaryprivateuseareab: in_supplementary_private_use_area_b
 insuttonsignwriting: in_sutton_signwriting
 insylotinagri: in_syloti_nagri
 insymbolsandpictographsextendeda: in_symbols_and_pictographs_extended_a
+insymbolsforlegacycomputing: in_symbols_for_legacy_computing
 insyriac: in_syriac
 insyriacsupplement: in_syriac_supplement
 intagalog: in_tagalog
@@ -409,6 +418,7 @@ intamil: in_tamil
 intamilsupplement: in_tamil_supplement
 intangut: in_tangut
 intangutcomponents: in_tangut_components
+intangutsupplement: in_tangut_supplement
 intelugu: in_telugu
 inthaana: in_thaana
 inthai: in_thai
@@ -426,6 +436,7 @@ invedicextensions: in_vedic_extensions
 inverticalforms: in_vertical_forms
 inwancho: in_wancho
 inwarangciti: in_warang_citi
+inyezidi: in_yezidi
 inyijinghexagramsymbols: in_yijing_hexagram_symbols
 inyiradicals: in_yi_radicals
 inyisyllables: in_yi_syllables
@@ -437,6 +448,7 @@ kannada: kannada
 katakana: katakana
 kayahli: kayah_li
 kharoshthi: kharoshthi
+khitansmallscript: khitan_small_script
 khmer: khmer
 khojki: khojki
 khudawadi: khudawadi
@@ -590,5 +602,6 @@ xdigit: xdigit
 xidcontinue: xid_continue
 xidstart: xid_start
 xposixpunct: xposixpunct
+yezidi: yezidi
 yi: yi
 zanabazarsquare: zanabazar_square