regexp_parser 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +63 -6
  3. data/Gemfile +1 -0
  4. data/README.md +12 -6
  5. data/lib/regexp_parser/error.rb +1 -1
  6. data/lib/regexp_parser/expression/base.rb +9 -57
  7. data/lib/regexp_parser/expression/classes/backreference.rb +1 -0
  8. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -2
  9. data/lib/regexp_parser/expression/classes/character_set.rb +2 -2
  10. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
  11. data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
  12. data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
  13. data/lib/regexp_parser/expression/classes/group.rb +6 -6
  14. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  15. data/lib/regexp_parser/expression/classes/root.rb +3 -5
  16. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
  17. data/lib/regexp_parser/expression/methods/construct.rb +43 -0
  18. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  19. data/lib/regexp_parser/expression/methods/tests.rb +10 -1
  20. data/lib/regexp_parser/expression/quantifier.rb +41 -23
  21. data/lib/regexp_parser/expression/sequence.rb +9 -23
  22. data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
  23. data/lib/regexp_parser/expression/shared.rb +85 -0
  24. data/lib/regexp_parser/expression/subexpression.rb +11 -7
  25. data/lib/regexp_parser/expression.rb +4 -2
  26. data/lib/regexp_parser/parser.rb +21 -72
  27. data/lib/regexp_parser/scanner/property.rl +1 -1
  28. data/lib/regexp_parser/scanner/scanner.rl +42 -31
  29. data/lib/regexp_parser/scanner.rb +725 -793
  30. data/lib/regexp_parser/syntax/token/escape.rb +1 -1
  31. data/lib/regexp_parser/syntax/token/unicode_property.rb +0 -5
  32. data/lib/regexp_parser/version.rb +1 -1
  33. metadata +10 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 369b108d8410e12bd6af5c659f58cb56c583e48780c1b35b6270bb21cc6a4ee7
4
- data.tar.gz: 30cd2c0823ae154a2db04c705f898f252774ec8ab9ef304833c5e3546ba7406a
3
+ metadata.gz: f871ec3cdea5a594f72f5386f1b344710e6204f7307ba40d966653197f526be8
4
+ data.tar.gz: dd93c880f29ec77531faa2379fbfc8e34a9b67680664c6a3477d38afeaa1809a
5
5
  SHA512:
6
- metadata.gz: 4104bec7dd02a7ea099de9aeacb766fb1a2db50cb52bd84f44e4bde93431d436b75d0f1b3f4d62242713a1eeca3f4d8c0be034270d515979aad8ad2d504880b0
7
- data.tar.gz: 11deb2d7c8a6fad3fa9cb18b3f29cae15bab7e12e6cbbc968706dd02c16b0d1a6b1d69f05a5f665f7b46947315b0ea4ecda62dab8ddca8b5ef71f521b877da74
6
+ metadata.gz: 45e52ab0ce7bec3e4a275efa3828532778c49e8d36eec1ea82a43755a87abc9eee97e986027aa8f5c64fd604f15164d2ad4f37e5d6e22a5a1e3e9da6788271b9
7
+ data.tar.gz: 1f5514f3252294d9fe0877cff1d8b0db0400838c97ed78d15bbb794b94595c20d081681e4b1fe9bb6c89be7749514d8b2b8cf385360d002cd89e2a76ce6d2e63
data/CHANGELOG.md CHANGED
@@ -1,5 +1,62 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ### Added
4
+
5
+ - `Regexp::Expression::Base.construct` and `.token_class` methods
6
+
7
+ ## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
8
+
9
+ ### Fixed
10
+
11
+ - fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
12
+ - they used to be treated as reluctant or possessive mode indicators
13
+ - however, Ruby does not support these modes for interval quantifiers
14
+ - they are now treated as chained quantifiers instead, as Ruby does it
15
+ - c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
16
+ - fixed `Expression::Base#nesting_level` for some tree rewrite cases
17
+ - e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
18
+ - fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
19
+ - they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
20
+ - they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
21
+
22
+ ### Added
23
+
24
+ - added `Expression::Base#==` for (deep) comparison of expressions
25
+ - added `Expression::Base#parts`
26
+ - returns the text elements and subexpressions of an expression
27
+ - e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
28
+ - added `Expression::Base#te` (a.k.a. token end index)
29
+ - `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
30
+ - made some `Expression::Base` methods available on `Quantifier` instances, too
31
+ - `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
32
+ - `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
33
+ - `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
34
+ - this allows a more unified handling with `Expression::Base` instances
35
+ - allowed `Quantifier#initialize` to take a token and options Hash like other nodes
36
+ - added a deprecation warning for initializing Quantifiers with 4+ arguments:
37
+
38
+ Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
39
+ is deprecated.
40
+
41
+ It will no longer be supported in regexp_parser v3.0.0.
42
+
43
+ Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
44
+ with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
45
+ will be derived automatically.
46
+
47
+ Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
48
+
49
+ This is consistent with how Expression::Base instances are created.
50
+
51
+
52
+ ## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
53
+
54
+ ### Fixed
55
+
56
+ - removed five inexistent unicode properties from `Syntax#features`
57
+ - these were never supported by Ruby or the `Regexp::Scanner`
58
+ - thanks to [Markus Schirp](https://github.com/mbj) for the report
59
+
3
60
  ## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
4
61
 
5
62
  ### Added
@@ -180,7 +237,7 @@
180
237
 
181
238
  ### Added
182
239
 
183
- - `Expression#each_expression` and `#traverse` can now be called without a block
240
+ - `Expression::Base#each_expression` and `#traverse` can now be called without a block
184
241
  * this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
185
242
  * thanks to [Masataka Kuwabara](https://github.com/pocke)
186
243
 
@@ -206,7 +263,7 @@
206
263
  - Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
207
264
  - Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
208
265
  - Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
209
- - Fixed `Expression#match` and `#=~` not working with a single argument
266
+ - Fixed `Expression::Base#match` and `#=~` not working with a single argument
210
267
 
211
268
  ### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
212
269
 
@@ -214,15 +271,15 @@
214
271
 
215
272
  - Added `#referenced_expression` for backrefs, subexp calls and conditionals
216
273
  * returns the `Group` expression that is being referenced via name or number
217
- - Added `Expression#repetitions`
274
+ - Added `Expression::Base#repetitions`
218
275
  * returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
219
276
  * like `#quantity` but with a more uniform interface
220
- - Added `Expression#match_length`
277
+ - Added `Expression::Base#match_length`
221
278
  * allows to inspect and iterate over String lengths matched by the Expression
222
279
 
223
280
  ### Fixed
224
281
 
225
- - Fixed `Expression#clone` "direction"
282
+ - Fixed `Expression::Base#clone` "direction"
226
283
  * it used to dup ivars onto the callee, leaving only the clone referencing the original objects
227
284
  * this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
228
285
  - Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
@@ -384,7 +441,7 @@ This release includes several breaking changes, mostly to character sets, #map a
384
441
  - Fixed a thread safety issue (issue #45)
385
442
  - Some public class methods that were only reliable for
386
443
  internal use are now private instance methods (PR #46)
387
- - Improved the usefulness of Expression#options (issue #43) -
444
+ - Improved the usefulness of Expression::Base#options (issue #43) -
388
445
  #options and derived methods such as #i?, #m? and #x? are now
389
446
  defined for all Expressions that are affected by such flags.
390
447
  - Fixed scanning of whitespace following (?x) (commit 5c94bd2)
data/Gemfile CHANGED
@@ -8,6 +8,7 @@ group :development, :test do
8
8
  gem 'regexp_property_values', '~> 1.3'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
+ gem 'benchmark-ips', '~> 2.1'
11
12
  gem 'gouteur'
12
13
  gem 'rubocop', '~> 1.7'
13
14
  end
data/README.md CHANGED
@@ -367,12 +367,12 @@ _Note that not all of these are available in all versions of Ruby_
367
367
  | **POSIX Classes** | `[:alpha:]`, `[:^digit:]` | &#x2713; |
368
368
  | **Quantifiers** | | &#x22f1; |
369
369
  | &emsp;&nbsp;_**Greedy**_ | `?`, `*`, `+`, `{m,M}` | &#x2713; |
370
- | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | &#x2713; |
371
- | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | &#x2713; |
370
+ | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?` \[1\] | &#x2713; |
371
+ | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++` \[1\] | &#x2713; |
372
372
  | **String Escapes** | | &#x22f1; |
373
- | &emsp;&nbsp;_**Control** \[1\]_ | `\C-C`, `\cD` | &#x2713; |
373
+ | &emsp;&nbsp;_**Control** \[2\]_ | `\C-C`, `\cD` | &#x2713; |
374
374
  | &emsp;&nbsp;_**Hex**_ | `\x20`, `\x{701230}` | &#x2713; |
375
- | &emsp;&nbsp;_**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
375
+ | &emsp;&nbsp;_**Meta** \[2\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
376
376
  | &emsp;&nbsp;_**Octal**_ | `\0`, `\01`, `\012` | &#x2713; |
377
377
  | &emsp;&nbsp;_**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | &#x2713; |
378
378
  | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
@@ -384,7 +384,11 @@ _Note that not all of these are available in all versions of Ruby_
384
384
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
385
385
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
386
386
 
387
- **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
387
+ **\[1\]**: Ruby does not support lazy or possessive interval quantifiers. Any `+` or `?` that follows an interval
388
+ quantifier will be treated as another, chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
389
+ [#69](https://github.com/ammar/regexp_parser/pull/69).
390
+
391
+ **\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
388
392
  https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
389
393
  scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
390
394
 
@@ -443,12 +447,14 @@ Projects using regexp_parser.
443
447
 
444
448
  - [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
445
449
 
446
- - [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
450
+ - [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
447
451
 
448
452
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
449
453
 
450
454
  - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
451
455
 
456
+ - [repper](https://github.com/jaynetics/repper) is a regular expression pretty-printer for Ruby.
457
+
452
458
  - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
453
459
 
454
460
  - [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.
@@ -1,4 +1,4 @@
1
1
  class Regexp::Parser
2
- # base class for all gem-specific errors (inherited but never raised itself)
2
+ # base class for all gem-specific errors
3
3
  class Error < StandardError; end
4
4
  end
@@ -1,29 +1,15 @@
1
1
  module Regexp::Expression
2
2
  class Base
3
- attr_accessor :type, :token
4
- attr_accessor :text, :ts
5
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
-
7
- attr_accessor :quantifier
8
- attr_accessor :options
3
+ include Regexp::Expression::Shared
9
4
 
10
5
  def initialize(token, options = {})
11
- self.type = token.type
12
- self.token = token.token
13
- self.text = token.text
14
- self.ts = token.ts
15
- self.level = token.level
16
- self.set_level = token.set_level
17
- self.conditional_level = token.conditional_level
18
- self.nesting_level = 0
19
- self.quantifier = nil
20
- self.options = options
6
+ init_from_token_and_options(token, options)
21
7
  end
22
8
 
23
9
  def initialize_copy(orig)
24
- self.text = (orig.text ? orig.text.dup : nil)
25
- self.options = (orig.options ? orig.options.dup : nil)
26
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
10
+ self.text = orig.text.dup if orig.text
11
+ self.options = orig.options.dup if orig.options
12
+ self.quantifier = orig.quantifier.clone if orig.quantifier
27
13
  super
28
14
  end
29
15
 
@@ -31,48 +17,14 @@ module Regexp::Expression
31
17
  ::Regexp.new(to_s(format))
32
18
  end
33
19
 
34
- alias :starts_at :ts
35
-
36
- def base_length
37
- to_s(:base).length
38
- end
39
-
40
- def full_length
41
- to_s.length
42
- end
43
-
44
- def offset
45
- [starts_at, full_length]
46
- end
47
-
48
- def coded_offset
49
- '@%d+%d' % offset
50
- end
51
-
52
- def to_s(format = :full)
53
- "#{text}#{quantifier_affix(format)}"
54
- end
55
-
56
- def quantifier_affix(expression_format)
57
- quantifier.to_s if quantified? && expression_format != :base
58
- end
59
-
60
- def terminal?
61
- !respond_to?(:expressions)
62
- end
63
-
64
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
- self.quantifier = Quantifier.new(token, text, min, max, mode)
20
+ def quantify(*args)
21
+ self.quantifier = Quantifier.new(*args)
66
22
  end
67
23
 
68
24
  def unquantified_clone
69
25
  clone.tap { |exp| exp.quantifier = nil }
70
26
  end
71
27
 
72
- def quantified?
73
- !quantifier.nil?
74
- end
75
-
76
28
  # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
29
  def quantity
78
30
  return [nil,nil] unless quantified?
@@ -104,7 +56,7 @@ module Regexp::Expression
104
56
  quantified? and quantifier.possessive?
105
57
  end
106
58
 
107
- def attributes
59
+ def to_h
108
60
  {
109
61
  type: type,
110
62
  token: token,
@@ -118,6 +70,6 @@ module Regexp::Expression
118
70
  quantifier: quantified? ? quantifier.to_h : nil,
119
71
  }
120
72
  end
121
- alias :to_h :attributes
73
+ alias :attributes :to_h
122
74
  end
123
75
  end
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify name with token :backref, one way or the other, in v3.0.0
2
3
  module Backreference
3
4
  class Base < Regexp::Expression::Base
4
5
  attr_accessor :referenced_expression
@@ -16,8 +16,8 @@ module Regexp::Expression
16
16
  count == 2
17
17
  end
18
18
 
19
- def to_s(_format = :full)
20
- expressions.join(text)
19
+ def parts
20
+ intersperse(expressions, text.dup)
21
21
  end
22
22
  end
23
23
  end
@@ -20,8 +20,8 @@ module Regexp::Expression
20
20
  self.closed = true
21
21
  end
22
22
 
23
- def to_s(format = :full)
24
- "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
23
+ def parts
24
+ ["#{text}#{'^' if negated?}", *expressions, ']']
25
25
  end
26
26
  end
27
27
  end # module Regexp::Expression
@@ -55,8 +55,8 @@ module Regexp::Expression
55
55
  condition.reference
56
56
  end
57
57
 
58
- def to_s(format = :full)
59
- "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
58
+ def parts
59
+ [text.dup, condition, *intersperse(branches, '|'), ')']
60
60
  end
61
61
 
62
62
  def initialize_copy(orig)
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class FreeSpace < Regexp::Expression::Base
3
- def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
3
+ def quantify(*_args)
4
4
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
5
5
  end
6
6
  end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def to_s(format = :full)
5
- "#{text}#{expressions.join})#{quantifier_affix(format)}"
4
+ def parts
5
+ [text.dup, *expressions, ')']
6
6
  end
7
7
 
8
8
  def capturing?; false end
@@ -18,9 +18,9 @@ module Regexp::Expression
18
18
  super
19
19
  end
20
20
 
21
- def to_s(format = :full)
21
+ def parts
22
22
  if implicit?
23
- "#{expressions.join}#{quantifier_affix(format)}"
23
+ expressions
24
24
  else
25
25
  super
26
26
  end
@@ -65,8 +65,8 @@ module Regexp::Expression
65
65
  end
66
66
 
67
67
  class Comment < Group::Base
68
- def to_s(_format = :full)
69
- text.dup
68
+ def parts
69
+ [text.dup]
70
70
  end
71
71
 
72
72
  def comment?; true end
@@ -1,5 +1,7 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
+ # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
+ # that contains all expressions to its left.
3
5
  class Mark < Regexp::Expression::Base; end
4
6
  end
5
7
  end
@@ -1,11 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class Root < Regexp::Expression::Subexpression
3
3
  def self.build(options = {})
4
- new(build_token, options)
5
- end
6
-
7
- def self.build_token
8
- Regexp::Token.new(:expression, :root, '', 0)
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
9
7
  end
10
8
  end
11
9
  end
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify name with token :property, on way or the other, in v3.0.0
2
3
  module UnicodeProperty
3
4
  class Base < Regexp::Expression::Base
4
5
  def negative?
@@ -0,0 +1,43 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
+ elsif self == Alternation || self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ elsif self <= EscapeSequence::Base
32
+ Regexp::Syntax::Token::Escape
33
+ else
34
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
35
+ end
36
+ end
37
+ end
38
+
39
+ def token_class
40
+ self.class.token_class
41
+ end
42
+ end
43
+ end
@@ -112,7 +112,7 @@ module Regexp::Expression
112
112
  end
113
113
 
114
114
  def inner_match_length
115
- dummy = Regexp::Expression::Root.build
115
+ dummy = Regexp::Expression::Root.construct
116
116
  dummy.expressions = expressions.map(&:clone)
117
117
  dummy.quantifier = quantifier && quantifier.clone
118
118
  dummy.match_length
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- class Base
2
+ module Shared
3
3
 
4
4
  # Test if this expression has the given test_type, which can be either
5
5
  # a symbol or an array of symbols to check against the expression's type.
@@ -93,5 +93,14 @@ module Regexp::Expression
93
93
  "Array, Hash, or Symbol expected, #{scope.class.name} given"
94
94
  end
95
95
  end
96
+
97
+ # Deep-compare two expressions for equality.
98
+ def ==(other)
99
+ other.class == self.class &&
100
+ other.to_s == to_s &&
101
+ other.options == options
102
+ end
103
+ alias :=== :==
104
+ alias :eql? :==
96
105
  end
97
106
  end
@@ -1,26 +1,24 @@
1
1
  module Regexp::Expression
2
+ # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
3
+ # call super in #initialize, but raise in #quantifier= and #quantify,
4
+ # or introduce an Expression::Quantifiable intermediate class.
5
+ # Or actually allow chaining as a more concise but tricky solution than PR#69.
2
6
  class Quantifier
7
+ include Regexp::Expression::Shared
8
+
3
9
  MODES = %i[greedy possessive reluctant]
4
10
 
5
- attr_reader :token, :text, :min, :max, :mode
11
+ attr_reader :min, :max, :mode
6
12
 
7
- def initialize(token, text, min, max, mode)
8
- @token = token
9
- @text = text
10
- @mode = mode
11
- @min = min
12
- @max = max
13
- end
14
-
15
- def initialize_copy(orig)
16
- @text = orig.text.dup
17
- super
18
- end
13
+ def initialize(*args)
14
+ deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
19
15
 
20
- def to_s
21
- text.dup
16
+ init_from_token_and_options(*args)
17
+ @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
18
+ @min, @max = minmax
19
+ # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
+ self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
22
21
  end
23
- alias :to_str :to_s
24
22
 
25
23
  def to_h
26
24
  {
@@ -41,13 +39,33 @@ module Regexp::Expression
41
39
  end
42
40
  alias :lazy? :reluctant?
43
41
 
44
- def ==(other)
45
- other.class == self.class &&
46
- other.token == token &&
47
- other.mode == mode &&
48
- other.min == min &&
49
- other.max == max
42
+ private
43
+
44
+ def deprecated_old_init(token, text, min, max, mode = :greedy)
45
+ warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
+ "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
+ "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
48
+ "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
49
+ "will be derived automatically.\n"\
50
+ "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
51
+ "This is consistent with how Expression::Base instances are created. "
52
+ @token = token
53
+ @text = text
54
+ @min = min
55
+ @max = max
56
+ @mode = mode
57
+ end
58
+
59
+ def minmax
60
+ case token
61
+ when /zero_or_one/ then [0, 1]
62
+ when /zero_or_more/ then [0, -1]
63
+ when /one_or_more/ then [1, -1]
64
+ when :interval
65
+ int_min = text[/\{(\d*)/, 1]
66
+ int_max = text[/,?(\d*)\}/, 1]
67
+ [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
68
+ end
50
69
  end
51
- alias :eq :==
52
70
  end
53
71
  end
@@ -7,31 +7,17 @@ module Regexp::Expression
7
7
  # branches, and CharacterSet::Intersection intersected sequences.
8
8
  class Sequence < Regexp::Expression::Subexpression
9
9
  class << self
10
- def add_to(subexpression, params = {}, active_opts = {})
11
- sequence = at_levels(
12
- subexpression.level,
13
- subexpression.set_level,
14
- params[:conditional_level] || subexpression.conditional_level
10
+ def add_to(exp, params = {}, active_opts = {})
11
+ sequence = construct(
12
+ level: exp.level,
13
+ set_level: exp.set_level,
14
+ conditional_level: params[:conditional_level] || exp.conditional_level,
15
15
  )
16
- sequence.nesting_level = subexpression.nesting_level + 1
16
+ sequence.nesting_level = exp.nesting_level + 1
17
17
  sequence.options = active_opts
18
- subexpression.expressions << sequence
18
+ exp.expressions << sequence
19
19
  sequence
20
20
  end
21
-
22
- def at_levels(level, set_level, conditional_level)
23
- token = Regexp::Token.new(
24
- :expression,
25
- :sequence,
26
- '',
27
- nil, # ts
28
- nil, # te
29
- level,
30
- set_level,
31
- conditional_level
32
- )
33
- new(token)
34
- end
35
21
  end
36
22
 
37
23
  def starts_at
@@ -39,12 +25,12 @@ module Regexp::Expression
39
25
  end
40
26
  alias :ts :starts_at
41
27
 
42
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
28
+ def quantify(*args)
43
29
  target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
44
30
  target or raise Regexp::Parser::Error,
45
31
  "No valid target found for '#{text}' quantifier"
46
32
 
47
- target.quantify(token, text, min, max, mode)
33
+ target.quantify(*args)
48
34
  end
49
35
  end
50
36
  end
@@ -18,8 +18,8 @@ module Regexp::Expression
18
18
  self.class::OPERAND.add_to(self, {}, active_opts)
19
19
  end
20
20
 
21
- def to_s(format = :full)
22
- sequences.map { |e| e.to_s(format) }.join(text)
21
+ def parts
22
+ intersperse(expressions, text.dup)
23
23
  end
24
24
  end
25
25
  end