RubyGems - regexp_parser - Versions diffs - 1.7.1 → 2.2.1 - Mend

regexp_parser 1.7.1 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +157 -1
data/Gemfile +6 -1
data/LICENSE +1 -1
data/README.md +38 -32
data/Rakefile +18 -27
data/lib/regexp_parser/error.rb +4 -0
data/lib/regexp_parser/expression/base.rb +123 -0
data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
data/lib/regexp_parser/expression/classes/free_space.rb +2 -4
data/lib/regexp_parser/expression/classes/group.rb +28 -3
data/lib/regexp_parser/expression/classes/literal.rb +1 -5
data/lib/regexp_parser/expression/classes/property.rb +1 -3
data/lib/regexp_parser/expression/classes/root.rb +4 -17
data/lib/regexp_parser/expression/classes/type.rb +0 -2
data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
data/lib/regexp_parser/expression/quantifier.rb +11 -2
data/lib/regexp_parser/expression/sequence.rb +3 -20
data/lib/regexp_parser/expression/subexpression.rb +1 -2
data/lib/regexp_parser/expression.rb +7 -139
data/lib/regexp_parser/lexer.rb +13 -11
data/lib/regexp_parser/parser.rb +325 -344
data/lib/regexp_parser/scanner/char_type.rl +11 -11
data/lib/regexp_parser/scanner/properties/long.csv +604 -0
data/lib/regexp_parser/scanner/properties/short.csv +242 -0
data/lib/regexp_parser/scanner/property.rl +2 -2
data/lib/regexp_parser/scanner/scanner.rl +235 -255
data/lib/regexp_parser/scanner.rb +1324 -1387
data/lib/regexp_parser/syntax/any.rb +4 -6
data/lib/regexp_parser/syntax/base.rb +13 -15
data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
data/lib/regexp_parser/syntax/token/escape.rb +31 -0
data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
data/lib/regexp_parser/syntax/token.rb +45 -0
data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
data/lib/regexp_parser/syntax.rb +8 -6
data/lib/regexp_parser/token.rb +9 -20
data/lib/regexp_parser/version.rb +1 -1
data/lib/regexp_parser.rb +0 -2
data/regexp_parser.gemspec +20 -22
metadata +34 -165
data/lib/regexp_parser/scanner/properties/long.yml +0 -594
data/lib/regexp_parser/scanner/properties/short.yml +0 -237
data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
data/lib/regexp_parser/syntax/tokens.rb +0 -45
data/spec/expression/base_spec.rb +0 -94
data/spec/expression/clone_spec.rb +0 -120
data/spec/expression/conditional_spec.rb +0 -89
data/spec/expression/free_space_spec.rb +0 -27
data/spec/expression/methods/match_length_spec.rb +0 -161
data/spec/expression/methods/match_spec.rb +0 -25
data/spec/expression/methods/strfregexp_spec.rb +0 -224
data/spec/expression/methods/tests_spec.rb +0 -99
data/spec/expression/methods/traverse_spec.rb +0 -161
data/spec/expression/options_spec.rb +0 -128
data/spec/expression/root_spec.rb +0 -9
data/spec/expression/sequence_spec.rb +0 -9
data/spec/expression/subexpression_spec.rb +0 -50
data/spec/expression/to_h_spec.rb +0 -26
data/spec/expression/to_s_spec.rb +0 -100
data/spec/lexer/all_spec.rb +0 -22
data/spec/lexer/conditionals_spec.rb +0 -53
data/spec/lexer/delimiters_spec.rb +0 -68
data/spec/lexer/escapes_spec.rb +0 -14
data/spec/lexer/keep_spec.rb +0 -10
data/spec/lexer/literals_spec.rb +0 -89
data/spec/lexer/nesting_spec.rb +0 -99
data/spec/lexer/refcalls_spec.rb +0 -55
data/spec/parser/all_spec.rb +0 -43
data/spec/parser/alternation_spec.rb +0 -88
data/spec/parser/anchors_spec.rb +0 -17
data/spec/parser/conditionals_spec.rb +0 -179
data/spec/parser/errors_spec.rb +0 -30
data/spec/parser/escapes_spec.rb +0 -121
data/spec/parser/free_space_spec.rb +0 -130
data/spec/parser/groups_spec.rb +0 -108
data/spec/parser/keep_spec.rb +0 -6
data/spec/parser/posix_classes_spec.rb +0 -8
data/spec/parser/properties_spec.rb +0 -115
data/spec/parser/quantifiers_spec.rb +0 -52
data/spec/parser/refcalls_spec.rb +0 -112
data/spec/parser/set/intersections_spec.rb +0 -127
data/spec/parser/set/ranges_spec.rb +0 -111
data/spec/parser/sets_spec.rb +0 -178
data/spec/parser/types_spec.rb +0 -18
data/spec/scanner/all_spec.rb +0 -18
data/spec/scanner/anchors_spec.rb +0 -21
data/spec/scanner/conditionals_spec.rb +0 -128
data/spec/scanner/delimiters_spec.rb +0 -52
data/spec/scanner/errors_spec.rb +0 -67
data/spec/scanner/escapes_spec.rb +0 -53
data/spec/scanner/free_space_spec.rb +0 -133
data/spec/scanner/groups_spec.rb +0 -52
data/spec/scanner/keep_spec.rb +0 -10
data/spec/scanner/literals_spec.rb +0 -49
data/spec/scanner/meta_spec.rb +0 -18
data/spec/scanner/properties_spec.rb +0 -64
data/spec/scanner/quantifiers_spec.rb +0 -20
data/spec/scanner/refcalls_spec.rb +0 -36
data/spec/scanner/sets_spec.rb +0 -102
data/spec/scanner/types_spec.rb +0 -14
data/spec/spec_helper.rb +0 -15
data/spec/support/runner.rb +0 -42
data/spec/support/shared_examples.rb +0 -77
data/spec/support/warning_extractor.rb +0 -60
data/spec/syntax/syntax_spec.rb +0 -48
data/spec/syntax/syntax_token_map_spec.rb +0 -23
data/spec/syntax/versions/1.8.6_spec.rb +0 -17
data/spec/syntax/versions/1.9.1_spec.rb +0 -10
data/spec/syntax/versions/1.9.3_spec.rb +0 -9
data/spec/syntax/versions/2.0.0_spec.rb +0 -13
data/spec/syntax/versions/2.2.0_spec.rb +0 -9
data/spec/syntax/versions/aliases_spec.rb +0 -37
data/spec/token/token_spec.rb +0 -85

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: dd872b22bf04a288790ef0f73df9041f14fb88a08c2a03852d9dbbc238b452d6
-  data.tar.gz: 4641097a24b5fa0f7b0c8e5aacc152587fe8b15d30f3f78bbec8157887b8b897
+  metadata.gz: 381a794200168f95ff6329cc8a01330d21a05e02b75e0b06dcc6bd8f763c111d
+  data.tar.gz: bd7617cb3763e6d759c8e1364aed037ae2fff85af3cf28823476cadd14ff080e
 SHA512:
-  metadata.gz: 858570df4a7047a2d8b09555b56de28a66ca4f8022e596c249900f5312f8e7fb9376384ca816bc3c08f3e324930702ad410a28b5be680adea6867e1f8075441e
-  data.tar.gz: 0d70e7b4f18739826bb334fb305e335e44a354ae302214ca3c1884f66ace8680e48a9e4c64b890b220b82056da761084413c8b9b8c5e363382f5cf165b3d3448
+  metadata.gz: 0a039012013e9b57329fd685aaf29386d8b848071e514f59df0acc3437a1dae5c76b6bf94158cc3deece08f3a1fec9437ac84590d97f8590d8dcee1e0dc6c726
+  data.tar.gz: 4d67da41fbef9b9336ccfd02e3a742286bf4ef96d469c8aa2bbb9a6a55ed4aa6027a28b10ba6c9993b15937e3fe51a349632bcf5808f6237cf77a1d29ceb74f2

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,160 @@
-## [Unreleased]
+## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- fixed Syntax version of absence groups (`(?~...)`)
+  - the lexer accepted them for any Ruby version
+  - now they are only recognized for Ruby >= 2.4.1 in which they were introduced
+- reduced gem size by excluding specs from package
+- removed deprecated `test_files` gemspec setting
+- no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
+- no longer depend on `set`
+  - `set` was removed from the stdlib and made a standalone gem as of Ruby 3
+  - this made it a hidden/undeclared dependency of `regexp_parser`
+## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Added
+- added support for 13 new unicode properties introduced in Ruby 3.1.0
+## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
+  * thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
+## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Added
+- common ancestor for all scanning/parsing/lexing errors
+  * `Regexp::Parser::Error` can now be rescued as a catch-all
+  * the following errors (and their many descendants) now inherit from it:
+    - `Regexp::Expression::Conditional::TooManyBranches`
+    - `Regexp::Parser::ParserError`
+    - `Regexp::Scanner::ScannerError`
+    - `Regexp::Scanner::ValidationError`
+    - `Regexp::Syntax::SyntaxError`
+  * it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
+  * thanks to [sandstrom](https://github.com/sandstrom) for the cue
+### Fixed
+- fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
+  * a regression in v2.0.1 had caused them to be scanned as literals
+- fixed scanning of some backreference and subexpression call edge cases
+  * e.g. `\k<+1>`, `\g<x-1>`
+- fixed tokenization of some escapes in character sets
+  * `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
+  * all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
+  * if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
+  * the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
+- fixed handling of control/metacontrol escapes in character sets
+  * e.g. `[\cX]`, `[\M-\C-X]`
+  * they were misread as bunch of individual literals, escapes, and ranges
+- fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
+## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- fixed error when scanning some unlikely and redundant but valid charset patterns
+  * e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
+- fixed ancestry of some error classes related to syntax version lookup
+  * `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
+  * they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
+## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- fixed `FrozenError` when calling `#to_s` on a frozen `Group::Passive`
+  * thanks to [Daniel Gollahon](https://github.com/dgollahon)
+## [2.0.1] - 2020-12-20 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- fixed error when scanning some group names
+  * this affected names containing hyphens, digits or multibyte chars, e.g. `/(?<a1>a)/`
+  * thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
+- fixed error when scanning hex escapes with just one hex digit
+  * e.g. `/\x0A/` was scanned correctly, but the equivalent `/\xA/` was not
+  * thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
+## [2.0.0] - 2020-11-25 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Changed
+- some methods that used to return byte-based indices now return char-based indices
+  * the returned values have only changed for Regexps that contain multibyte chars
+  * this is only a breaking change if you used such methods directly AND relied on them pointing to bytes
+  * affected methods:
+  * `Regexp::Token` `#length`, `#offset`, `#te`, `#ts`
+  * `Regexp::Expression::Base` `#full_length`, `#offset`, `#starts_at`, `#te`, `#ts`
+  * thanks to [Akinori MUSHA](https://github.com/knu) for the report
+- removed some deprecated methods/signatures
+  * these are rarely used and have been showing deprecation warnings for a long time
+  * `Regexp::Expression::Subexpression.new` with 3 arguments
+  * `Regexp::Expression::Root.new` without a token argument
+  * `Regexp::Expression.parsed`
+### Added
+- `Regexp::Expression::Base#base_length`
+  * returns the character count of an expression body, ignoring any quantifier
+- pragmatic, experimental support for chained quantifiers
+  * e.g.: `/^a{10}{4,6}$/` matches exactly 40, 50 or 60 `a`s
+  * successive quantifiers used to be silently dropped by the parser
+  * they are now wrapped with passive groups as if they were written `(?:a{10}){4,6}`
+  * thanks to [calfeld](https://github.com/calfeld) for reporting this a while back
+### Fixed
+- incorrect encoding output for non-ascii comments
+  * this led to a crash when calling `#to_s` on parse results containing such comments
+  * thanks to [Michael Glass](https://github.com/michaelglass) for the report
+- some crashes when scanning contrived patterns such as `'\😋'`
+### [1.8.2] - 2020-10-11 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- fix `FrozenError` in `Expression::Base#repetitions` on Ruby 3.0
+  * thanks to [Thomas Walpole](https://github.com/twalpole)
+- removed "unknown future version" warning on Ruby 3.0
+### [1.8.1] - 2020-09-28 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- fixed scanning of comment-like text in normal mode
+  * this was an old bug, but had become more prevalent in v1.8.0
+  * thanks to [Tietew](https://github.com/Tietew) for the report
+- specified correct minimum Ruby version in gemspec
+  * it said 1.9 but really required 2.0 as of v1.8.0
+### [1.8.0] - 2020-09-20 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Changed
+- dropped support for running on Ruby 1.9.x
+### Added
+- regexp flags can now be passed when parsing a `String` as regexp body
+  * see the [README](/README.md#usage) for details
+  * thanks to [Owen Stephens](https://github.com/owst)
+- bare occurrences of `\g` and `\k` are now allowed and scanned as literal escapes
+  * matches Onigmo behavior
+  * thanks for the report to [Marc-André Lafortune](https://github.com/marcandre)
+### Fixed
+- fixed parsing comments without preceding space or trailing newline in x-mode
+  * thanks to [Owen Stephens](https://github.com/owst)
 ### [1.7.1] - 2020-06-07 - [Ammar Ali](mailto:ammarabuali@gmail.com)

data/Gemfile CHANGED Viewed

@@ -3,7 +3,12 @@ source 'https://rubygems.org'
 gemspec
 group :development, :test do
+  gem 'ice_nine', '~> 0.11.2'
   gem 'rake', '~> 13.0'
   gem 'regexp_property_values', '~> 1.0'
-  gem 'rspec', '~> 3.8'
+  gem 'rspec', '~> 3.10'
+  if RUBY_VERSION.to_f >= 2.7
+    gem 'gouteur'
+    gem 'rubocop', '~> 1.7'
+  end
 end

data/LICENSE CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2010, 2012-2015,  Ammar Ali
+Copyright (c) 2010, 2012-2022,  Ammar Ali
 Permission is hereby granted, free of charge, to any person
 obtaining a copy of this software and associated documentation

data/README.md CHANGED Viewed

@@ -1,6 +1,9 @@
 # Regexp::Parser
-[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://secure.travis-ci.org/ammar/regexp_parser.svg?branch=master)](http://travis-ci.org/ammar/regexp_parser) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
+[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
+[![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
+[![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
+[![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
 A Ruby gem for tokenizing, parsing, and transforming regular expressions.
@@ -8,8 +11,8 @@ A Ruby gem for tokenizing, parsing, and transforming regular expressions.
   * A scanner/tokenizer based on [Ragel](http://www.colm.net/open-source/ragel/)
   * A lexer that produces a "stream" of token objects.
   * A parser that produces a "tree" of Expression objects (OO API)
-* Runs on Ruby 1.9, 2.x, and JRuby (1.9 mode) runtimes.
-* Recognizes Ruby 1.8, 1.9, and 2.x regular expressions [See Supported Syntax](#supported-syntax)
+* Runs on Ruby 2.x, 3.x and JRuby runtimes
+* Recognizes Ruby 1.8, 1.9, 2.x and 3.x regular expressions [See Supported Syntax](#supported-syntax)
 _For examples of regexp_parser in use, see [Example Projects](#example-projects)._
@@ -18,13 +21,10 @@ _For examples of regexp_parser in use, see [Example Projects](#example-projects)
 ---
 ## Requirements
-* Ruby >= 1.9
+* Ruby >= 2.0
 * Ragel >= 6.0, but only if you want to build the gem or work on the scanner.
-_Note: See the .travis.yml file for covered versions._
 ---
 ## Install
@@ -72,6 +72,17 @@ called with the results as follows:
 * **Parser**: after completion, the block gets passed the root expression.
   _The result of the block is returned._
+All three methods accept either a `Regexp` or `String` (containing the pattern)
+- if a String is passed, `options` can be supplied:
+```ruby
+require 'regexp_parser'
+Regexp::Parser.parse(
+  "a+ # Recognises a and A...",
+  options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE
+)
+```
 ---
 ## Components
@@ -306,7 +317,7 @@ Expression class. See the next section for details._
 ## Supported Syntax
 The three modules support all the regular expression syntax features of Ruby 1.8,
-1.9, and 2.x:
+1.9, 2.x and 3.x:
 _Note that not all of these are available in all versions of Ruby_
@@ -349,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
 | &emsp;&nbsp;_**Reluctant** (Lazy)_    | `??`, `*?`, `+?`, `{m,M}?`                              | &#x2713; |
 | &emsp;&nbsp;_**Possessive**_          | `?+`, `*+`, `++`, `{m,M}+`                              | &#x2713; |
 | **String Escapes**                    |                                                         | &#x22f1; |
-| &emsp;&nbsp;_**Control**_             | `\C-C`, `\cD`                                           | &#x2713; |
+| &emsp;&nbsp;_**Control** \[1\]_       | `\C-C`, `\cD`                                           | &#x2713; |
 | &emsp;&nbsp;_**Hex**_                 | `\x20`, `\x{701230}`                                    | &#x2713; |
-| &emsp;&nbsp;_**Meta**_                | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C`        | &#x2713; |
+| &emsp;&nbsp;_**Meta** \[1\]_          | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C`        | &#x2713; |
 | &emsp;&nbsp;_**Octal**_               | `\0`, `\01`, `\012`                                     | &#x2713; |
 | &emsp;&nbsp;_**Unicode**_             | `\uHHHH`, `\u{H+ H+}`                                   | &#x2713; |
-| **Unicode Properties**                | _<sub>([Unicode 11.0.0](http://www.unicode.org/versions/Unicode11.0.0/))</sub>_ | &#x22f1; |
+| **Unicode Properties**                | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
 | &emsp;&nbsp;_**Age**_                 | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}`            | &#x2713; |
 | &emsp;&nbsp;_**Blocks**_              | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}`          | &#x2713; |
 | &emsp;&nbsp;_**Classes**_             | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}`                  | &#x2713; |
@@ -363,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
 | &emsp;&nbsp;_**Scripts**_             | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}`              | &#x2713; |
 | &emsp;&nbsp;_**Simple**_              | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}`               | &#x2713; |
+**\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
+ https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
+scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
 ##### Inapplicable Features
 Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
@@ -376,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
   - **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
   - **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
 See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
 _**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
@@ -384,26 +398,14 @@ or incorrectly return tokens/objects as literals._
 ## Testing
-To run the tests simply run rake from the root directory, as 'test' is the default task.
+To run the tests simply run rake from the root directory.
-It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
+The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
-The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
+Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
 ```
-bin/test
-```
-You can run a specific test like so:
-```
-bin/test spec/scanner/properties_spec.rb
-```
-Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
-```
-rake ragel:rb && bin/test spec/scanner/properties_spec.rb
+rake ragel:rb && rspec spec/scanner/properties_spec.rb
 ```
 ## Building
@@ -429,13 +431,17 @@ rake install
 ## Example Projects
 Projects using regexp_parser.
+- [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
+- [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
 - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
-- [mutant](https://github.com/mbj/mutant) (before v0.9.0) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
+- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
-- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) uses regexp_parser to generate examples of postal codes.
+- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
-- [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
+- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.
 ## References
@@ -464,4 +470,4 @@ Documentation and books used while working on this project.
 ---
 ##### Copyright
-_Copyright (c) 2010-2019 Ammar Ali. See LICENSE file for details._
+_Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._

data/Rakefile CHANGED Viewed

@@ -1,35 +1,31 @@
+require 'bundler'
 require 'rubygems'
+require 'rubygems/package_task'
 require 'rake'
 require 'rake/testtask'
+require 'rspec/core/rake_task'
-require 'bundler'
-require 'rubygems/package_task'
-RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
-RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
-RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
+RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
+RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
+RAGEL_SOURCE_FILES = %w[scanner] # scanner.rl imports the other files
 Bundler::GemHelper.install_tasks
+RSpec::Core::RakeTask.new(:spec)
 task :default => [:'test:full']
 namespace :test do
-  task full: :'ragel:rb' do
-    sh 'bin/test'
-  end
+  task full: [:'ragel:rb', :spec]
 end
 namespace :ragel do
   desc "Process the ragel source files and output ruby code"
-  task :rb do |t|
-    RAGEL_SOURCE_FILES.each do |file|
-      output_file = "#{RAGEL_OUTPUT_DIR}/#{file}.rb"
+  task :rb do
+    RAGEL_SOURCE_FILES.each do |source_file|
+      output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
       # using faster flat table driven FSM, about 25% larger code, but about 30% faster
-      sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{file}.rl -o #{output_file}"
+      sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
       contents = File.read(output_file)
@@ -42,34 +38,29 @@ namespace :ragel do
   end
   desc "Delete the ragel generated source file(s)"
-  task :clean do |t|
+  task :clean do
     RAGEL_SOURCE_FILES.each do |file|
       sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
     end
   end
 end
 # Add ragel task as a prerequisite for building the gem to ensure that the
 # latest scanner code is generated and included in the build.
 desc "Runs ragel:rb before building the gem"
 task :build => ['ragel:rb']
 namespace :props do
   desc 'Write new property value hashes for the properties scanner'
   task :update do
     require 'regexp_property_values'
     RegexpPropertyValues.update
-    dir = File.expand_path('../lib/regexp_parser/scanner/properties', __FILE__)
+    dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
-    require 'psych'
     write_hash_to_file = ->(hash, path) do
       File.open(path, 'w') do |f|
-        f.puts '#',
-               "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
-               '#',
-               hash.sort.to_h.to_yaml
+        f.puts "# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT",
+               *hash.sort.map { |pair| pair.join(',') }
       end
       puts "Wrote #{hash.count} aliases to `#{path}`"
     end
@@ -77,11 +68,11 @@ namespace :props do
     long_names_to_tokens = RegexpPropertyValues.all.map do |val|
       [val.identifier, val.full_name.downcase]
     end
-    write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
+    write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.csv")
     short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
       [k.identifier, v.full_name.downcase]
     end
-    write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
+    write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.csv")
   end
 end

data/lib/regexp_parser/error.rb ADDED Viewed

@@ -0,0 +1,4 @@
+class Regexp::Parser
+  # base class for all gem-specific errors (inherited but never raised itself)
+  class Error < StandardError; end
+end

data/lib/regexp_parser/expression/base.rb ADDED Viewed

@@ -0,0 +1,123 @@
+module Regexp::Expression
+  class Base
+    attr_accessor :type, :token
+    attr_accessor :text, :ts
+    attr_accessor :level, :set_level, :conditional_level, :nesting_level
+    attr_accessor :quantifier
+    attr_accessor :options
+    def initialize(token, options = {})
+      self.type              = token.type
+      self.token             = token.token
+      self.text              = token.text
+      self.ts                = token.ts
+      self.level             = token.level
+      self.set_level         = token.set_level
+      self.conditional_level = token.conditional_level
+      self.nesting_level     = 0
+      self.quantifier        = nil
+      self.options           = options
+    end
+    def initialize_copy(orig)
+      self.text       = (orig.text       ? orig.text.dup         : nil)
+      self.options    = (orig.options    ? orig.options.dup      : nil)
+      self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
+      super
+    end
+    def to_re(format = :full)
+      ::Regexp.new(to_s(format))
+    end
+    alias :starts_at :ts
+    def base_length
+      to_s(:base).length
+    end
+    def full_length
+      to_s.length
+    end
+    def offset
+      [starts_at, full_length]
+    end
+    def coded_offset
+      '@%d+%d' % offset
+    end
+    def to_s(format = :full)
+      "#{text}#{quantifier_affix(format)}"
+    end
+    def quantifier_affix(expression_format)
+      quantifier.to_s if quantified? && expression_format != :base
+    end
+    def terminal?
+      !respond_to?(:expressions)
+    end
+    def quantify(token, text, min = nil, max = nil, mode = :greedy)
+      self.quantifier = Quantifier.new(token, text, min, max, mode)
+    end
+    def unquantified_clone
+      clone.tap { |exp| exp.quantifier = nil }
+    end
+    def quantified?
+      !quantifier.nil?
+    end
+    # Deprecated. Prefer `#repetitions` which has a more uniform interface.
+    def quantity
+      return [nil,nil] unless quantified?
+      [quantifier.min, quantifier.max]
+    end
+    def repetitions
+      return 1..1 unless quantified?
+      min = quantifier.min
+      max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
+      range = min..max
+      # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
+      if RUBY_VERSION.to_f < 2.7
+        range.define_singleton_method(:minmax) { [min, max] }
+      end
+      range
+    end
+    def greedy?
+      quantified? and quantifier.greedy?
+    end
+    def reluctant?
+      quantified? and quantifier.reluctant?
+    end
+    alias :lazy? :reluctant?
+    def possessive?
+      quantified? and quantifier.possessive?
+    end
+    def attributes
+      {
+        type:              type,
+        token:             token,
+        text:              to_s(:base),
+        starts_at:         ts,
+        length:            full_length,
+        level:             level,
+        set_level:         set_level,
+        conditional_level: conditional_level,
+        options:           options,
+        quantifier:        quantified? ? quantifier.to_h : nil,
+      }
+    end
+    alias :to_h :attributes
+  end
+end

data/lib/regexp_parser/expression/classes/anchor.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
   module Anchor
     class Base < Regexp::Expression::Base; end
@@ -22,5 +21,4 @@ module Regexp::Expression
     EOS      = EndOfString
     EOSobEOL = EndOfStringOrBeforeEndOfLine
   end
 end

data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} RENAMED Viewed

@@ -2,6 +2,11 @@ module Regexp::Expression
   module Backreference
     class Base < Regexp::Expression::Base
       attr_accessor :referenced_expression
+      def initialize_copy(orig)
+        self.referenced_expression = orig.referenced_expression.dup
+        super
+      end
     end
     class Number < Backreference::Base

data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb RENAMED Viewed

File without changes

data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb RENAMED Viewed

@@ -7,7 +7,8 @@ module Regexp::Expression
       alias :ts :starts_at
       def <<(exp)
-        complete? && raise("Can't add more than 2 expressions to a Range")
+        complete? and raise Regexp::Parser::Error,
+          "Can't add more than 2 expressions to a Range"
         super
       end

data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} RENAMED Viewed

File without changes

data/lib/regexp_parser/expression/classes/conditional.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module Regexp::Expression
   module Conditional
-    class TooManyBranches < StandardError
+    class TooManyBranches < Regexp::Parser::Error
       def initialize
         super('The conditional expression has more than 2 branches')
       end
@@ -15,6 +15,11 @@ module Regexp::Expression
         ref = text.tr("'<>()", "")
         ref =~ /\D/ ? ref : Integer(ref)
       end
+      def initialize_copy(orig)
+        self.referenced_expression = orig.referenced_expression.dup
+        super
+      end
     end
     class Branch < Regexp::Expression::Sequence; end
@@ -53,6 +58,11 @@ module Regexp::Expression
       def to_s(format = :full)
         "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
       end
+      def initialize_copy(orig)
+        self.referenced_expression = orig.referenced_expression.dup
+        super
+      end
     end
   end
 end