regexp_parser 2.2.1 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +70 -6
  3. data/Gemfile +2 -1
  4. data/README.md +23 -9
  5. data/Rakefile +1 -56
  6. data/lib/regexp_parser/error.rb +1 -1
  7. data/lib/regexp_parser/expression/base.rb +9 -57
  8. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -2
  9. data/lib/regexp_parser/expression/classes/character_set.rb +2 -2
  10. data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
  11. data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +6 -6
  13. data/lib/regexp_parser/expression/methods/tests.rb +10 -1
  14. data/lib/regexp_parser/expression/quantifier.rb +40 -23
  15. data/lib/regexp_parser/expression/sequence.rb +2 -2
  16. data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
  17. data/lib/regexp_parser/expression/shared.rb +81 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +11 -7
  19. data/lib/regexp_parser/expression.rb +1 -0
  20. data/lib/regexp_parser/lexer.rb +1 -1
  21. data/lib/regexp_parser/parser.rb +12 -60
  22. data/lib/regexp_parser/scanner/properties/long.csv +18 -0
  23. data/lib/regexp_parser/scanner/properties/short.csv +4 -0
  24. data/lib/regexp_parser/scanner/property.rl +1 -1
  25. data/lib/regexp_parser/scanner/scanner.rl +42 -31
  26. data/lib/regexp_parser/scanner.rb +729 -797
  27. data/lib/regexp_parser/syntax/any.rb +2 -5
  28. data/lib/regexp_parser/syntax/base.rb +91 -64
  29. data/lib/regexp_parser/syntax/token/quantifier.rb +4 -4
  30. data/lib/regexp_parser/syntax/token/unicode_property.rb +26 -5
  31. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  32. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  33. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  34. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  35. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  36. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  37. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  38. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  39. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  40. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  41. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  42. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  43. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  44. data/lib/regexp_parser/syntax/versions/3.1.0.rb +3 -9
  45. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  46. data/lib/regexp_parser/syntax/versions.rb +1 -1
  47. data/lib/regexp_parser/version.rb +1 -1
  48. metadata +4 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 381a794200168f95ff6329cc8a01330d21a05e02b75e0b06dcc6bd8f763c111d
4
- data.tar.gz: bd7617cb3763e6d759c8e1364aed037ae2fff85af3cf28823476cadd14ff080e
3
+ metadata.gz: 8b84a4bb274f31b8608c7dc9d55ff6f1b8d92d0d147976f38079ae7701a6debe
4
+ data.tar.gz: 41db5f094d0beafade30a1fac2707cbc827831e818c485ad35d7173f18c6a91a
5
5
  SHA512:
6
- metadata.gz: 0a039012013e9b57329fd685aaf29386d8b848071e514f59df0acc3437a1dae5c76b6bf94158cc3deece08f3a1fec9437ac84590d97f8590d8dcee1e0dc6c726
7
- data.tar.gz: 4d67da41fbef9b9336ccfd02e3a742286bf4ef96d469c8aa2bbb9a6a55ed4aa6027a28b10ba6c9993b15937e3fe51a349632bcf5808f6237cf77a1d29ceb74f2
6
+ metadata.gz: 5dcde6135ac42db609402e47e04ee3be1da8854de286d2baad15dafee04d451814fd7a3bae7adc5440a1fced811e242b69f5fd14bcfc4f3bd5091f86769d56be
7
+ data.tar.gz: 2660d0fb28a972a1de53b71b16f8591e573d4214724b5eea8a452549598ff5d0fc5b731149e8332f65bce01c812f4d0d72135bba7e3016064d9f05202a8b5580
data/CHANGELOG.md CHANGED
@@ -1,3 +1,67 @@
1
+ ## [Unreleased]
2
+
3
+ ## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
4
+
5
+ ### Fixed
6
+
7
+ - fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
8
+ - they used to be treated as reluctant or possessive mode indicators
9
+ - however, Ruby does not support these modes for interval quantifiers
10
+ - they are now treated as chained quantifiers instead, as Ruby does it
11
+ - c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
12
+ - fixed `Expression::Base#nesting_level` for some tree rewrite cases
13
+ - e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
14
+ - fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
15
+ - they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
16
+ - they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
17
+
18
+ ### Added
19
+
20
+ - added `Expression::Base#==` for (deep) comparison of expressions
21
+ - added `Expression::Base#parts`
22
+ - returns the text elements and subexpressions of an expression
23
+ - e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
24
+ - added `Expression::Base#te` (a.k.a. token end index)
25
+ - `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
26
+ - made some `Expression::Base` methods available on `Quantifier` instances, too
27
+ - `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
28
+ - `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
29
+ - `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
30
+ - this allows a more unified handling with `Expression::Base` instances
31
+ - allowed `Quantifier#initialize` to take a token and options Hash like other nodes
32
+ - added a deprecation warning for initializing Quantifiers with 4+ arguments:
33
+
34
+ Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
35
+ is deprecated.
36
+
37
+ It will no longer be supported in regexp_parser v3.0.0.
38
+
39
+ Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode`
40
+ with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode
41
+ will be derived automatically.
42
+
43
+ This is consistent with how Expression::Base instances are created.
44
+
45
+
46
+ ## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
47
+
48
+ ### Fixed
49
+
50
+ - removed five inexistent unicode properties from `Syntax#features`
51
+ - these were never supported by Ruby or the `Regexp::Scanner`
52
+ - thanks to [Markus Schirp](https://github.com/mbj) for the report
53
+
54
+ ## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
55
+
56
+ ### Added
57
+
58
+ - improved parsing performance through `Syntax` refactoring
59
+ - instead of fresh `Syntax` instances, pre-loaded constants are now re-used
60
+ - this approximately doubles the parsing speed for simple regexps
61
+ - added methods to `Syntax` classes to show relative feature sets
62
+ - e.g. `Regexp::Syntax::V3_2_0.added_features`
63
+ - support for new unicode properties of Ruby 3.2 / Unicode 14.0
64
+
1
65
  ## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
2
66
 
3
67
  ### Fixed
@@ -167,7 +231,7 @@
167
231
 
168
232
  ### Added
169
233
 
170
- - `Expression#each_expression` and `#traverse` can now be called without a block
234
+ - `Expression::Base#each_expression` and `#traverse` can now be called without a block
171
235
  * this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
172
236
  * thanks to [Masataka Kuwabara](https://github.com/pocke)
173
237
 
@@ -193,7 +257,7 @@
193
257
  - Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
194
258
  - Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
195
259
  - Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
196
- - Fixed `Expression#match` and `#=~` not working with a single argument
260
+ - Fixed `Expression::Base#match` and `#=~` not working with a single argument
197
261
 
198
262
  ### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
199
263
 
@@ -201,15 +265,15 @@
201
265
 
202
266
  - Added `#referenced_expression` for backrefs, subexp calls and conditionals
203
267
  * returns the `Group` expression that is being referenced via name or number
204
- - Added `Expression#repetitions`
268
+ - Added `Expression::Base#repetitions`
205
269
  * returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
206
270
  * like `#quantity` but with a more uniform interface
207
- - Added `Expression#match_length`
271
+ - Added `Expression::Base#match_length`
208
272
  * allows to inspect and iterate over String lengths matched by the Expression
209
273
 
210
274
  ### Fixed
211
275
 
212
- - Fixed `Expression#clone` "direction"
276
+ - Fixed `Expression::Base#clone` "direction"
213
277
  * it used to dup ivars onto the callee, leaving only the clone referencing the original objects
214
278
  * this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
215
279
  - Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
@@ -371,7 +435,7 @@ This release includes several breaking changes, mostly to character sets, #map a
371
435
  - Fixed a thread safety issue (issue #45)
372
436
  - Some public class methods that were only reliable for
373
437
  internal use are now private instance methods (PR #46)
374
- - Improved the usefulness of Expression#options (issue #43) -
438
+ - Improved the usefulness of Expression::Base#options (issue #43) -
375
439
  #options and derived methods such as #i?, #m? and #x? are now
376
440
  defined for all Expressions that are affected by such flags.
377
441
  - Fixed scanning of whitespace following (?x) (commit 5c94bd2)
data/Gemfile CHANGED
@@ -5,9 +5,10 @@ gemspec
5
5
  group :development, :test do
6
6
  gem 'ice_nine', '~> 0.11.2'
7
7
  gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.0'
8
+ gem 'regexp_property_values', '~> 1.3'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
+ gem 'benchmark-ips', '~> 2.1'
11
12
  gem 'gouteur'
12
13
  gem 'rubocop', '~> 1.7'
13
14
  end
data/README.md CHANGED
@@ -157,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
157
157
  flavor variations. Syntax only comes into play in the lexer.
158
158
 
159
159
  #### Example
160
- The following instantiates syntax objects for Ruby 2.0, 1.9, 1.8, and
160
+ The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
161
161
  checks a few of their implementation features.
162
162
 
163
163
  ```ruby
164
164
  require 'regexp_parser'
165
165
 
166
- ruby_20 = Regexp::Syntax.new 'ruby/2.0'
166
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0'
167
167
  ruby_20.implements? :quantifier, :zero_or_one # => true
168
168
  ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
169
169
  ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
170
170
  ruby_20.implements? :conditional, :condition # => true
171
171
 
172
- ruby_19 = Regexp::Syntax.new 'ruby/1.9'
172
+ ruby_19 = Regexp::Syntax.for 'ruby/1.9'
173
173
  ruby_19.implements? :quantifier, :zero_or_one # => true
174
174
  ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
175
175
  ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
176
176
  ruby_19.implements? :conditional, :condition # => false
177
177
 
178
- ruby_18 = Regexp::Syntax.new 'ruby/1.8'
178
+ ruby_18 = Regexp::Syntax.for 'ruby/1.8'
179
179
  ruby_18.implements? :quantifier, :zero_or_one # => true
180
180
  ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
181
181
  ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
182
182
  ruby_18.implements? :conditional, :condition # => false
183
183
  ```
184
184
 
185
+ Syntax objects can also be queried about their complete and relative feature sets.
186
+
187
+ ```ruby
188
+ require 'regexp_parser'
189
+
190
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
191
+ ruby_20.added_features # => { conditional: [...], ... }
192
+ ruby_20.removed_features # => { property: [:newline], ... }
193
+ ruby_20.features # => { anchor: [...], ... }
194
+ ```
185
195
 
186
196
  #### Notes
187
197
  * Variations on a token, for example a named group with angle brackets (< and >)
@@ -357,12 +367,12 @@ _Note that not all of these are available in all versions of Ruby_
357
367
  | **POSIX Classes** | `[:alpha:]`, `[:^digit:]` | &#x2713; |
358
368
  | **Quantifiers** | | &#x22f1; |
359
369
  | &emsp;&nbsp;_**Greedy**_ | `?`, `*`, `+`, `{m,M}` | &#x2713; |
360
- | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | &#x2713; |
361
- | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | &#x2713; |
370
+ | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?` \[1\] | &#x2713; |
371
+ | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++` \[1\] | &#x2713; |
362
372
  | **String Escapes** | | &#x22f1; |
363
- | &emsp;&nbsp;_**Control** \[1\]_ | `\C-C`, `\cD` | &#x2713; |
373
+ | &emsp;&nbsp;_**Control** \[2\]_ | `\C-C`, `\cD` | &#x2713; |
364
374
  | &emsp;&nbsp;_**Hex**_ | `\x20`, `\x{701230}` | &#x2713; |
365
- | &emsp;&nbsp;_**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
375
+ | &emsp;&nbsp;_**Meta** \[2\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
366
376
  | &emsp;&nbsp;_**Octal**_ | `\0`, `\01`, `\012` | &#x2713; |
367
377
  | &emsp;&nbsp;_**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | &#x2713; |
368
378
  | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
@@ -374,7 +384,11 @@ _Note that not all of these are available in all versions of Ruby_
374
384
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
375
385
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
376
386
 
377
- **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
387
+ **\[1\]**: Ruby does not support lazy or possessive interval quantifiers. Any `+` or `?` that follows an interval
388
+ quantifier will be treated as another, chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
389
+ [#69](https://github.com/ammar/regexp_parser/pull/69).
390
+
391
+ **\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
378
392
  https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
379
393
  scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
380
394
 
data/Rakefile CHANGED
@@ -5,9 +5,7 @@ require 'rake'
5
5
  require 'rake/testtask'
6
6
  require 'rspec/core/rake_task'
7
7
 
8
- RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
9
- RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
10
- RAGEL_SOURCE_FILES = %w[scanner] # scanner.rl imports the other files
8
+ Dir['tasks/**/*.rake'].each { |file| load(file) }
11
9
 
12
10
  Bundler::GemHelper.install_tasks
13
11
 
@@ -19,60 +17,7 @@ namespace :test do
19
17
  task full: [:'ragel:rb', :spec]
20
18
  end
21
19
 
22
- namespace :ragel do
23
- desc "Process the ragel source files and output ruby code"
24
- task :rb do
25
- RAGEL_SOURCE_FILES.each do |source_file|
26
- output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
27
- # using faster flat table driven FSM, about 25% larger code, but about 30% faster
28
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
29
-
30
- contents = File.read(output_file)
31
-
32
- File.open(output_file, 'r+') do |file|
33
- contents = "# -*- warn-indent:false; -*-\n" + contents
34
-
35
- file.write(contents)
36
- end
37
- end
38
- end
39
-
40
- desc "Delete the ragel generated source file(s)"
41
- task :clean do
42
- RAGEL_SOURCE_FILES.each do |file|
43
- sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
44
- end
45
- end
46
- end
47
-
48
20
  # Add ragel task as a prerequisite for building the gem to ensure that the
49
21
  # latest scanner code is generated and included in the build.
50
22
  desc "Runs ragel:rb before building the gem"
51
23
  task :build => ['ragel:rb']
52
-
53
- namespace :props do
54
- desc 'Write new property value hashes for the properties scanner'
55
- task :update do
56
- require 'regexp_property_values'
57
- RegexpPropertyValues.update
58
- dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
59
-
60
- write_hash_to_file = ->(hash, path) do
61
- File.open(path, 'w') do |f|
62
- f.puts "# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT",
63
- *hash.sort.map { |pair| pair.join(',') }
64
- end
65
- puts "Wrote #{hash.count} aliases to `#{path}`"
66
- end
67
-
68
- long_names_to_tokens = RegexpPropertyValues.all.map do |val|
69
- [val.identifier, val.full_name.downcase]
70
- end
71
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.csv")
72
-
73
- short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
74
- [k.identifier, v.full_name.downcase]
75
- end
76
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.csv")
77
- end
78
- end
@@ -1,4 +1,4 @@
1
1
  class Regexp::Parser
2
- # base class for all gem-specific errors (inherited but never raised itself)
2
+ # base class for all gem-specific errors
3
3
  class Error < StandardError; end
4
4
  end
@@ -1,29 +1,15 @@
1
1
  module Regexp::Expression
2
2
  class Base
3
- attr_accessor :type, :token
4
- attr_accessor :text, :ts
5
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
-
7
- attr_accessor :quantifier
8
- attr_accessor :options
3
+ include Regexp::Expression::Shared
9
4
 
10
5
  def initialize(token, options = {})
11
- self.type = token.type
12
- self.token = token.token
13
- self.text = token.text
14
- self.ts = token.ts
15
- self.level = token.level
16
- self.set_level = token.set_level
17
- self.conditional_level = token.conditional_level
18
- self.nesting_level = 0
19
- self.quantifier = nil
20
- self.options = options
6
+ init_from_token_and_options(token, options)
21
7
  end
22
8
 
23
9
  def initialize_copy(orig)
24
- self.text = (orig.text ? orig.text.dup : nil)
25
- self.options = (orig.options ? orig.options.dup : nil)
26
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
10
+ self.text = orig.text.dup if orig.text
11
+ self.options = orig.options.dup if orig.options
12
+ self.quantifier = orig.quantifier.clone if orig.quantifier
27
13
  super
28
14
  end
29
15
 
@@ -31,48 +17,14 @@ module Regexp::Expression
31
17
  ::Regexp.new(to_s(format))
32
18
  end
33
19
 
34
- alias :starts_at :ts
35
-
36
- def base_length
37
- to_s(:base).length
38
- end
39
-
40
- def full_length
41
- to_s.length
42
- end
43
-
44
- def offset
45
- [starts_at, full_length]
46
- end
47
-
48
- def coded_offset
49
- '@%d+%d' % offset
50
- end
51
-
52
- def to_s(format = :full)
53
- "#{text}#{quantifier_affix(format)}"
54
- end
55
-
56
- def quantifier_affix(expression_format)
57
- quantifier.to_s if quantified? && expression_format != :base
58
- end
59
-
60
- def terminal?
61
- !respond_to?(:expressions)
62
- end
63
-
64
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
- self.quantifier = Quantifier.new(token, text, min, max, mode)
20
+ def quantify(*args)
21
+ self.quantifier = Quantifier.new(*args)
66
22
  end
67
23
 
68
24
  def unquantified_clone
69
25
  clone.tap { |exp| exp.quantifier = nil }
70
26
  end
71
27
 
72
- def quantified?
73
- !quantifier.nil?
74
- end
75
-
76
28
  # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
29
  def quantity
78
30
  return [nil,nil] unless quantified?
@@ -104,7 +56,7 @@ module Regexp::Expression
104
56
  quantified? and quantifier.possessive?
105
57
  end
106
58
 
107
- def attributes
59
+ def to_h
108
60
  {
109
61
  type: type,
110
62
  token: token,
@@ -118,6 +70,6 @@ module Regexp::Expression
118
70
  quantifier: quantified? ? quantifier.to_h : nil,
119
71
  }
120
72
  end
121
- alias :to_h :attributes
73
+ alias :attributes :to_h
122
74
  end
123
75
  end
@@ -16,8 +16,8 @@ module Regexp::Expression
16
16
  count == 2
17
17
  end
18
18
 
19
- def to_s(_format = :full)
20
- expressions.join(text)
19
+ def parts
20
+ intersperse(expressions, text.dup)
21
21
  end
22
22
  end
23
23
  end
@@ -20,8 +20,8 @@ module Regexp::Expression
20
20
  self.closed = true
21
21
  end
22
22
 
23
- def to_s(format = :full)
24
- "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
23
+ def parts
24
+ ["#{text}#{'^' if negated?}", *expressions, ']']
25
25
  end
26
26
  end
27
27
  end # module Regexp::Expression
@@ -55,8 +55,8 @@ module Regexp::Expression
55
55
  condition.reference
56
56
  end
57
57
 
58
- def to_s(format = :full)
59
- "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
58
+ def parts
59
+ [text.dup, condition, *intersperse(branches, '|'), ')']
60
60
  end
61
61
 
62
62
  def initialize_copy(orig)
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class FreeSpace < Regexp::Expression::Base
3
- def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
3
+ def quantify(*_args)
4
4
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
5
5
  end
6
6
  end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def to_s(format = :full)
5
- "#{text}#{expressions.join})#{quantifier_affix(format)}"
4
+ def parts
5
+ [text.dup, *expressions, ')']
6
6
  end
7
7
 
8
8
  def capturing?; false end
@@ -18,9 +18,9 @@ module Regexp::Expression
18
18
  super
19
19
  end
20
20
 
21
- def to_s(format = :full)
21
+ def parts
22
22
  if implicit?
23
- "#{expressions.join}#{quantifier_affix(format)}"
23
+ expressions
24
24
  else
25
25
  super
26
26
  end
@@ -65,8 +65,8 @@ module Regexp::Expression
65
65
  end
66
66
 
67
67
  class Comment < Group::Base
68
- def to_s(_format = :full)
69
- text.dup
68
+ def parts
69
+ [text.dup]
70
70
  end
71
71
 
72
72
  def comment?; true end
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- class Base
2
+ module Shared
3
3
 
4
4
  # Test if this expression has the given test_type, which can be either
5
5
  # a symbol or an array of symbols to check against the expression's type.
@@ -93,5 +93,14 @@ module Regexp::Expression
93
93
  "Array, Hash, or Symbol expected, #{scope.class.name} given"
94
94
  end
95
95
  end
96
+
97
+ # Deep-compare two expressions for equality.
98
+ def ==(other)
99
+ other.class == self.class &&
100
+ other.to_s == to_s &&
101
+ other.options == options
102
+ end
103
+ alias :=== :==
104
+ alias :eql? :==
96
105
  end
97
106
  end
@@ -1,26 +1,24 @@
1
1
  module Regexp::Expression
2
+ # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
3
+ # call super in #initialize, but raise in #quantifier= and #quantify,
4
+ # or introduce an Expression::Quantifiable intermediate class.
5
+ # Or actually allow chaining as a more concise but tricky solution than PR#69.
2
6
  class Quantifier
7
+ include Regexp::Expression::Shared
8
+
3
9
  MODES = %i[greedy possessive reluctant]
4
10
 
5
- attr_reader :token, :text, :min, :max, :mode
11
+ attr_reader :min, :max, :mode
6
12
 
7
- def initialize(token, text, min, max, mode)
8
- @token = token
9
- @text = text
10
- @mode = mode
11
- @min = min
12
- @max = max
13
- end
14
-
15
- def initialize_copy(orig)
16
- @text = orig.text.dup
17
- super
18
- end
13
+ def initialize(*args)
14
+ deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
19
15
 
20
- def to_s
21
- text.dup
16
+ init_from_token_and_options(*args)
17
+ @mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
18
+ @min, @max = minmax
19
+ # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
+ self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
22
21
  end
23
- alias :to_str :to_s
24
22
 
25
23
  def to_h
26
24
  {
@@ -41,13 +39,32 @@ module Regexp::Expression
41
39
  end
42
40
  alias :lazy? :reluctant?
43
41
 
44
- def ==(other)
45
- other.class == self.class &&
46
- other.token == token &&
47
- other.mode == mode &&
48
- other.min == min &&
49
- other.max == max
42
+ private
43
+
44
+ def deprecated_old_init(token, text, min, max, mode = :greedy)
45
+ warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
+ "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
+ "Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode` "\
48
+ "with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode "\
49
+ "will be derived automatically. \nThis is consistent with how Expression::Base "\
50
+ "instances are created."
51
+ @token = token
52
+ @text = text
53
+ @min = min
54
+ @max = max
55
+ @mode = mode
56
+ end
57
+
58
+ def minmax
59
+ case token
60
+ when /zero_or_one/ then [0, 1]
61
+ when /zero_or_more/ then [0, -1]
62
+ when /one_or_more/ then [1, -1]
63
+ when :interval
64
+ int_min = text[/\{(\d*)/, 1]
65
+ int_max = text[/,?(\d*)\}/, 1]
66
+ [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
67
+ end
50
68
  end
51
- alias :eq :==
52
69
  end
53
70
  end
@@ -39,12 +39,12 @@ module Regexp::Expression
39
39
  end
40
40
  alias :ts :starts_at
41
41
 
42
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
42
+ def quantify(*args)
43
43
  target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
44
44
  target or raise Regexp::Parser::Error,
45
45
  "No valid target found for '#{text}' quantifier"
46
46
 
47
- target.quantify(token, text, min, max, mode)
47
+ target.quantify(*args)
48
48
  end
49
49
  end
50
50
  end
@@ -18,8 +18,8 @@ module Regexp::Expression
18
18
  self.class::OPERAND.add_to(self, {}, active_opts)
19
19
  end
20
20
 
21
- def to_s(format = :full)
22
- sequences.map { |e| e.to_s(format) }.join(text)
21
+ def parts
22
+ intersperse(expressions, text.dup)
23
23
  end
24
24
  end
25
25
  end