regexp_parser 2.3.0 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +63 -6
  3. data/Gemfile +1 -0
  4. data/README.md +12 -6
  5. data/lib/regexp_parser/error.rb +1 -1
  6. data/lib/regexp_parser/expression/base.rb +9 -57
  7. data/lib/regexp_parser/expression/classes/backreference.rb +1 -0
  8. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -2
  9. data/lib/regexp_parser/expression/classes/character_set.rb +2 -2
  10. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
  11. data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
  12. data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
  13. data/lib/regexp_parser/expression/classes/group.rb +6 -6
  14. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  15. data/lib/regexp_parser/expression/classes/root.rb +3 -5
  16. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
  17. data/lib/regexp_parser/expression/methods/construct.rb +43 -0
  18. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  19. data/lib/regexp_parser/expression/methods/tests.rb +10 -1
  20. data/lib/regexp_parser/expression/quantifier.rb +41 -23
  21. data/lib/regexp_parser/expression/sequence.rb +9 -23
  22. data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
  23. data/lib/regexp_parser/expression/shared.rb +85 -0
  24. data/lib/regexp_parser/expression/subexpression.rb +11 -7
  25. data/lib/regexp_parser/expression.rb +4 -2
  26. data/lib/regexp_parser/parser.rb +21 -72
  27. data/lib/regexp_parser/scanner/property.rl +1 -1
  28. data/lib/regexp_parser/scanner/scanner.rl +42 -31
  29. data/lib/regexp_parser/scanner.rb +725 -793
  30. data/lib/regexp_parser/syntax/token/escape.rb +1 -1
  31. data/lib/regexp_parser/syntax/token/unicode_property.rb +0 -5
  32. data/lib/regexp_parser/version.rb +1 -1
  33. metadata +10 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 369b108d8410e12bd6af5c659f58cb56c583e48780c1b35b6270bb21cc6a4ee7
4
- data.tar.gz: 30cd2c0823ae154a2db04c705f898f252774ec8ab9ef304833c5e3546ba7406a
3
+ metadata.gz: f871ec3cdea5a594f72f5386f1b344710e6204f7307ba40d966653197f526be8
4
+ data.tar.gz: dd93c880f29ec77531faa2379fbfc8e34a9b67680664c6a3477d38afeaa1809a
5
5
  SHA512:
6
- metadata.gz: 4104bec7dd02a7ea099de9aeacb766fb1a2db50cb52bd84f44e4bde93431d436b75d0f1b3f4d62242713a1eeca3f4d8c0be034270d515979aad8ad2d504880b0
7
- data.tar.gz: 11deb2d7c8a6fad3fa9cb18b3f29cae15bab7e12e6cbbc968706dd02c16b0d1a6b1d69f05a5f665f7b46947315b0ea4ecda62dab8ddca8b5ef71f521b877da74
6
+ metadata.gz: 45e52ab0ce7bec3e4a275efa3828532778c49e8d36eec1ea82a43755a87abc9eee97e986027aa8f5c64fd604f15164d2ad4f37e5d6e22a5a1e3e9da6788271b9
7
+ data.tar.gz: 1f5514f3252294d9fe0877cff1d8b0db0400838c97ed78d15bbb794b94595c20d081681e4b1fe9bb6c89be7749514d8b2b8cf385360d002cd89e2a76ce6d2e63
data/CHANGELOG.md CHANGED
@@ -1,5 +1,62 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ### Added
4
+
5
+ - `Regexp::Expression::Base.construct` and `.token_class` methods
6
+
7
+ ## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
8
+
9
+ ### Fixed
10
+
11
+ - fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
12
+ - they used to be treated as reluctant or possessive mode indicators
13
+ - however, Ruby does not support these modes for interval quantifiers
14
+ - they are now treated as chained quantifiers instead, as Ruby does it
15
+ - c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
16
+ - fixed `Expression::Base#nesting_level` for some tree rewrite cases
17
+ - e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
18
+ - fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
19
+ - they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
20
+ - they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
21
+
22
+ ### Added
23
+
24
+ - added `Expression::Base#==` for (deep) comparison of expressions
25
+ - added `Expression::Base#parts`
26
+ - returns the text elements and subexpressions of an expression
27
+ - e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
28
+ - added `Expression::Base#te` (a.k.a. token end index)
29
+ - `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
30
+ - made some `Expression::Base` methods available on `Quantifier` instances, too
31
+ - `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
32
+ - `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
33
+ - `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
34
+ - this allows a more unified handling with `Expression::Base` instances
35
+ - allowed `Quantifier#initialize` to take a token and options Hash like other nodes
36
+ - added a deprecation warning for initializing Quantifiers with 4+ arguments:
37
+
38
+ Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
39
+ is deprecated.
40
+
41
+ It will no longer be supported in regexp_parser v3.0.0.
42
+
43
+ Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
44
+ with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
45
+ will be derived automatically.
46
+
47
+ Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
48
+
49
+ This is consistent with how Expression::Base instances are created.
50
+
51
+
52
+ ## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
53
+
54
+ ### Fixed
55
+
56
+ - removed five inexistent unicode properties from `Syntax#features`
57
+ - these were never supported by Ruby or the `Regexp::Scanner`
58
+ - thanks to [Markus Schirp](https://github.com/mbj) for the report
59
+
3
60
  ## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
4
61
 
5
62
  ### Added
@@ -180,7 +237,7 @@
180
237
 
181
238
  ### Added
182
239
 
183
- - `Expression#each_expression` and `#traverse` can now be called without a block
240
+ - `Expression::Base#each_expression` and `#traverse` can now be called without a block
184
241
  * this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
185
242
  * thanks to [Masataka Kuwabara](https://github.com/pocke)
186
243
 
@@ -206,7 +263,7 @@
206
263
  - Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
207
264
  - Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
208
265
  - Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
209
- - Fixed `Expression#match` and `#=~` not working with a single argument
266
+ - Fixed `Expression::Base#match` and `#=~` not working with a single argument
210
267
 
211
268
  ### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
212
269
 
@@ -214,15 +271,15 @@
214
271
 
215
272
  - Added `#referenced_expression` for backrefs, subexp calls and conditionals
216
273
  * returns the `Group` expression that is being referenced via name or number
217
- - Added `Expression#repetitions`
274
+ - Added `Expression::Base#repetitions`
218
275
  * returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
219
276
  * like `#quantity` but with a more uniform interface
220
- - Added `Expression#match_length`
277
+ - Added `Expression::Base#match_length`
221
278
  * allows to inspect and iterate over String lengths matched by the Expression
222
279
 
223
280
  ### Fixed
224
281
 
225
- - Fixed `Expression#clone` "direction"
282
+ - Fixed `Expression::Base#clone` "direction"
226
283
  * it used to dup ivars onto the callee, leaving only the clone referencing the original objects
227
284
  * this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
228
285
  - Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
@@ -384,7 +441,7 @@ This release includes several breaking changes, mostly to character sets, #map a
384
441
  - Fixed a thread safety issue (issue #45)
385
442
  - Some public class methods that were only reliable for
386
443
  internal use are now private instance methods (PR #46)
387
- - Improved the usefulness of Expression#options (issue #43) -
444
+ - Improved the usefulness of Expression::Base#options (issue #43) -
388
445
  #options and derived methods such as #i?, #m? and #x? are now
389
446
  defined for all Expressions that are affected by such flags.
390
447
  - Fixed scanning of whitespace following (?x) (commit 5c94bd2)
data/Gemfile CHANGED
@@ -8,6 +8,7 @@ group :development, :test do
8
8
  gem 'regexp_property_values', '~> 1.3'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
+ gem 'benchmark-ips', '~> 2.1'
11
12
  gem 'gouteur'
12
13
  gem 'rubocop', '~> 1.7'
13
14
  end
data/README.md CHANGED
@@ -367,12 +367,12 @@ _Note that not all of these are available in all versions of Ruby_
367
367
  | **POSIX Classes** | `[:alpha:]`, `[:^digit:]` | &#x2713; |
368
368
  | **Quantifiers** | | &#x22f1; |
369
369
  | &emsp;&nbsp;_**Greedy**_ | `?`, `*`, `+`, `{m,M}` | &#x2713; |
370
- | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | &#x2713; |
371
- | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | &#x2713; |
370
+ | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?` \[1\] | &#x2713; |
371
+ | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++` \[1\] | &#x2713; |
372
372
  | **String Escapes** | | &#x22f1; |
373
- | &emsp;&nbsp;_**Control** \[1\]_ | `\C-C`, `\cD` | &#x2713; |
373
+ | &emsp;&nbsp;_**Control** \[2\]_ | `\C-C`, `\cD` | &#x2713; |
374
374
  | &emsp;&nbsp;_**Hex**_ | `\x20`, `\x{701230}` | &#x2713; |
375
- | &emsp;&nbsp;_**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
375
+ | &emsp;&nbsp;_**Meta** \[2\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
376
376
  | &emsp;&nbsp;_**Octal**_ | `\0`, `\01`, `\012` | &#x2713; |
377
377
  | &emsp;&nbsp;_**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | &#x2713; |
378
378
  | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
@@ -384,7 +384,11 @@ _Note that not all of these are available in all versions of Ruby_
384
384
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
385
385
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
386
386
 
387
- **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
387
+ **\[1\]**: Ruby does not support lazy or possessive interval quantifiers. Any `+` or `?` that follows an interval
388
+ quantifier will be treated as another, chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
389
+ [#69](https://github.com/ammar/regexp_parser/pull/69).
390
+
391
+ **\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
388
392
  https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
389
393
  scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
390
394
 
@@ -443,12 +447,14 @@ Projects using regexp_parser.
443
447
 
444
448
  - [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
445
449
 
446
- - [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
450
+ - [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
447
451
 
448
452
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
449
453
 
450
454
  - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
451
455
 
456
+ - [repper](https://github.com/jaynetics/repper) is a regular expression pretty-printer for Ruby.
457
+
452
458
  - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
453
459
 
454
460
  - [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.
@@ -1,4 +1,4 @@
1
1
  class Regexp::Parser
2
- # base class for all gem-specific errors (inherited but never raised itself)
2
+ # base class for all gem-specific errors
3
3
  class Error < StandardError; end
4
4
  end
@@ -1,29 +1,15 @@
1
1
  module Regexp::Expression
2
2
  class Base
3
- attr_accessor :type, :token
4
- attr_accessor :text, :ts
5
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
-
7
- attr_accessor :quantifier
8
- attr_accessor :options
3
+ include Regexp::Expression::Shared
9
4
 
10
5
  def initialize(token, options = {})
11
- self.type = token.type
12
- self.token = token.token
13
- self.text = token.text
14
- self.ts = token.ts
15
- self.level = token.level
16
- self.set_level = token.set_level
17
- self.conditional_level = token.conditional_level
18
- self.nesting_level = 0
19
- self.quantifier = nil
20
- self.options = options
6
+ init_from_token_and_options(token, options)
21
7
  end
22
8
 
23
9
  def initialize_copy(orig)
24
- self.text = (orig.text ? orig.text.dup : nil)
25
- self.options = (orig.options ? orig.options.dup : nil)
26
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
10
+ self.text = orig.text.dup if orig.text
11
+ self.options = orig.options.dup if orig.options
12
+ self.quantifier = orig.quantifier.clone if orig.quantifier
27
13
  super
28
14
  end
29
15
 
@@ -31,48 +17,14 @@ module Regexp::Expression
31
17
  ::Regexp.new(to_s(format))
32
18
  end
33
19
 
34
- alias :starts_at :ts
35
-
36
- def base_length
37
- to_s(:base).length
38
- end
39
-
40
- def full_length
41
- to_s.length
42
- end
43
-
44
- def offset
45
- [starts_at, full_length]
46
- end
47
-
48
- def coded_offset
49
- '@%d+%d' % offset
50
- end
51
-
52
- def to_s(format = :full)
53
- "#{text}#{quantifier_affix(format)}"
54
- end
55
-
56
- def quantifier_affix(expression_format)
57
- quantifier.to_s if quantified? && expression_format != :base
58
- end
59
-
60
- def terminal?
61
- !respond_to?(:expressions)
62
- end
63
-
64
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
- self.quantifier = Quantifier.new(token, text, min, max, mode)
20
+ def quantify(*args)
21
+ self.quantifier = Quantifier.new(*args)
66
22
  end
67
23
 
68
24
  def unquantified_clone
69
25
  clone.tap { |exp| exp.quantifier = nil }
70
26
  end
71
27
 
72
- def quantified?
73
- !quantifier.nil?
74
- end
75
-
76
28
  # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
29
  def quantity
78
30
  return [nil,nil] unless quantified?
@@ -104,7 +56,7 @@ module Regexp::Expression
104
56
  quantified? and quantifier.possessive?
105
57
  end
106
58
 
107
- def attributes
59
+ def to_h
108
60
  {
109
61
  type: type,
110
62
  token: token,
@@ -118,6 +70,6 @@ module Regexp::Expression
118
70
  quantifier: quantified? ? quantifier.to_h : nil,
119
71
  }
120
72
  end
121
- alias :to_h :attributes
73
+ alias :attributes :to_h
122
74
  end
123
75
  end
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify name with token :backref, one way or the other, in v3.0.0
2
3
  module Backreference
3
4
  class Base < Regexp::Expression::Base
4
5
  attr_accessor :referenced_expression
@@ -16,8 +16,8 @@ module Regexp::Expression
16
16
  count == 2
17
17
  end
18
18
 
19
- def to_s(_format = :full)
20
- expressions.join(text)
19
+ def parts
20
+ intersperse(expressions, text.dup)
21
21
  end
22
22
  end
23
23
  end
@@ -20,8 +20,8 @@ module Regexp::Expression
20
20
  self.closed = true
21
21
  end
22
22
 
23
- def to_s(format = :full)
24
- "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
23
+ def parts
24
+ ["#{text}#{'^' if negated?}", *expressions, ']']
25
25
  end
26
26
  end
27
27
  end # module Regexp::Expression
@@ -55,8 +55,8 @@ module Regexp::Expression
55
55
  condition.reference
56
56
  end
57
57
 
58
- def to_s(format = :full)
59
- "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
58
+ def parts
59
+ [text.dup, condition, *intersperse(branches, '|'), ')']
60
60
  end
61
61
 
62
62
  def initialize_copy(orig)
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class FreeSpace < Regexp::Expression::Base
3
- def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
3
+ def quantify(*_args)
4
4
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
5
5
  end
6
6
  end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def to_s(format = :full)
5
- "#{text}#{expressions.join})#{quantifier_affix(format)}"
4
+ def parts
5
+ [text.dup, *expressions, ')']
6
6
  end
7
7
 
8
8
  def capturing?; false end
@@ -18,9 +18,9 @@ module Regexp::Expression
18
18
  super
19
19
  end
20
20
 
21
- def to_s(format = :full)
21
+ def parts
22
22
  if implicit?
23
- "#{expressions.join}#{quantifier_affix(format)}"
23
+ expressions
24
24
  else
25
25
  super
26
26
  end
@@ -65,8 +65,8 @@ module Regexp::Expression
65
65
  end
66
66
 
67
67
  class Comment < Group::Base
68
- def to_s(_format = :full)
69
- text.dup
68
+ def parts
69
+ [text.dup]
70
70
  end
71
71
 
72
72
  def comment?; true end
@@ -1,5 +1,7 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
+ # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
+ # that contains all expressions to its left.
3
5
  class Mark < Regexp::Expression::Base; end
4
6
  end
5
7
  end
@@ -1,11 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class Root < Regexp::Expression::Subexpression
3
3
  def self.build(options = {})
4
- new(build_token, options)
5
- end
6
-
7
- def self.build_token
8
- Regexp::Token.new(:expression, :root, '', 0)
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
9
7
  end
10
8
  end
11
9
  end
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify name with token :property, on way or the other, in v3.0.0
2
3
  module UnicodeProperty
3
4
  class Base < Regexp::Expression::Base
4
5
  def negative?
@@ -0,0 +1,43 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
+ elsif self == Alternation || self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ elsif self <= EscapeSequence::Base
32
+ Regexp::Syntax::Token::Escape
33
+ else
34
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
35
+ end
36
+ end
37
+ end
38
+
39
+ def token_class
40
+ self.class.token_class
41
+ end
42
+ end
43
+ end
@@ -112,7 +112,7 @@ module Regexp::Expression
112
112
  end
113
113
 
114
114
  def inner_match_length
115
- dummy = Regexp::Expression::Root.build
115
+ dummy = Regexp::Expression::Root.construct
116
116
  dummy.expressions = expressions.map(&:clone)
117
117
  dummy.quantifier = quantifier && quantifier.clone
118
118
  dummy.match_length
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- class Base
2
+ module Shared
3
3
 
4
4
  # Test if this expression has the given test_type, which can be either
5
5
  # a symbol or an array of symbols to check against the expression's type.
@@ -93,5 +93,14 @@ module Regexp::Expression
93
93
  "Array, Hash, or Symbol expected, #{scope.class.name} given"
94
94
  end
95
95
  end
96
+
97
+ # Deep-compare two expressions for equality.
98
+ def ==(other)
99
+ other.class == self.class &&
100
+ other.to_s == to_s &&
101
+ other.options == options
102
+ end
103
+ alias :=== :==
104
+ alias :eql? :==
96
105
  end
97
106
  end
@@ -1,26 +1,24 @@
1
1
  module Regexp::Expression
2
+ # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
3
+ # call super in #initialize, but raise in #quantifier= and #quantify,
4
+ # or introduce an Expression::Quantifiable intermediate class.
5
+ # Or actually allow chaining as a more concise but tricky solution than PR#69.
2
6
  class Quantifier
7
+ include Regexp::Expression::Shared
8
+
3
9
  MODES = %i[greedy possessive reluctant]
4
10
 
5
- attr_reader :token, :text, :min, :max, :mode
11
+ attr_reader :min, :max, :mode
6
12
 
7
- def initialize(token, text, min, max, mode)
8
- @token = token
9
- @text = text
10
- @mode = mode
11
- @min = min
12
- @max = max
13
- end
14
-
15
- def initialize_copy(orig)
16
- @text = orig.text.dup
17
- super
18
- end
13
+ def initialize(*args)
14
+ deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
19
15
 
20
- def to_s
21
- text.dup
16
+ init_from_token_and_options(*args)
17
+ @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
18
+ @min, @max = minmax
19
+ # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
+ self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
22
21
  end
23
- alias :to_str :to_s
24
22
 
25
23
  def to_h
26
24
  {
@@ -41,13 +39,33 @@ module Regexp::Expression
41
39
  end
42
40
  alias :lazy? :reluctant?
43
41
 
44
- def ==(other)
45
- other.class == self.class &&
46
- other.token == token &&
47
- other.mode == mode &&
48
- other.min == min &&
49
- other.max == max
42
+ private
43
+
44
+ def deprecated_old_init(token, text, min, max, mode = :greedy)
45
+ warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
+ "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
+ "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
48
+ "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
49
+ "will be derived automatically.\n"\
50
+ "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
51
+ "This is consistent with how Expression::Base instances are created. "
52
+ @token = token
53
+ @text = text
54
+ @min = min
55
+ @max = max
56
+ @mode = mode
57
+ end
58
+
59
+ def minmax
60
+ case token
61
+ when /zero_or_one/ then [0, 1]
62
+ when /zero_or_more/ then [0, -1]
63
+ when /one_or_more/ then [1, -1]
64
+ when :interval
65
+ int_min = text[/\{(\d*)/, 1]
66
+ int_max = text[/,?(\d*)\}/, 1]
67
+ [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
68
+ end
50
69
  end
51
- alias :eq :==
52
70
  end
53
71
  end
@@ -7,31 +7,17 @@ module Regexp::Expression
7
7
  # branches, and CharacterSet::Intersection intersected sequences.
8
8
  class Sequence < Regexp::Expression::Subexpression
9
9
  class << self
10
- def add_to(subexpression, params = {}, active_opts = {})
11
- sequence = at_levels(
12
- subexpression.level,
13
- subexpression.set_level,
14
- params[:conditional_level] || subexpression.conditional_level
10
+ def add_to(exp, params = {}, active_opts = {})
11
+ sequence = construct(
12
+ level: exp.level,
13
+ set_level: exp.set_level,
14
+ conditional_level: params[:conditional_level] || exp.conditional_level,
15
15
  )
16
- sequence.nesting_level = subexpression.nesting_level + 1
16
+ sequence.nesting_level = exp.nesting_level + 1
17
17
  sequence.options = active_opts
18
- subexpression.expressions << sequence
18
+ exp.expressions << sequence
19
19
  sequence
20
20
  end
21
-
22
- def at_levels(level, set_level, conditional_level)
23
- token = Regexp::Token.new(
24
- :expression,
25
- :sequence,
26
- '',
27
- nil, # ts
28
- nil, # te
29
- level,
30
- set_level,
31
- conditional_level
32
- )
33
- new(token)
34
- end
35
21
  end
36
22
 
37
23
  def starts_at
@@ -39,12 +25,12 @@ module Regexp::Expression
39
25
  end
40
26
  alias :ts :starts_at
41
27
 
42
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
28
+ def quantify(*args)
43
29
  target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
44
30
  target or raise Regexp::Parser::Error,
45
31
  "No valid target found for '#{text}' quantifier"
46
32
 
47
- target.quantify(token, text, min, max, mode)
33
+ target.quantify(*args)
48
34
  end
49
35
  end
50
36
  end
@@ -18,8 +18,8 @@ module Regexp::Expression
18
18
  self.class::OPERAND.add_to(self, {}, active_opts)
19
19
  end
20
20
 
21
- def to_s(format = :full)
22
- sequences.map { |e| e.to_s(format) }.join(text)
21
+ def parts
22
+ intersperse(expressions, text.dup)
23
23
  end
24
24
  end
25
25
  end