regexp_parser 2.2.1 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +70 -6
  3. data/Gemfile +2 -1
  4. data/README.md +23 -9
  5. data/Rakefile +1 -56
  6. data/lib/regexp_parser/error.rb +1 -1
  7. data/lib/regexp_parser/expression/base.rb +9 -57
  8. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -2
  9. data/lib/regexp_parser/expression/classes/character_set.rb +2 -2
  10. data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
  11. data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +6 -6
  13. data/lib/regexp_parser/expression/methods/tests.rb +10 -1
  14. data/lib/regexp_parser/expression/quantifier.rb +40 -23
  15. data/lib/regexp_parser/expression/sequence.rb +2 -2
  16. data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
  17. data/lib/regexp_parser/expression/shared.rb +81 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +11 -7
  19. data/lib/regexp_parser/expression.rb +1 -0
  20. data/lib/regexp_parser/lexer.rb +1 -1
  21. data/lib/regexp_parser/parser.rb +12 -60
  22. data/lib/regexp_parser/scanner/properties/long.csv +18 -0
  23. data/lib/regexp_parser/scanner/properties/short.csv +4 -0
  24. data/lib/regexp_parser/scanner/property.rl +1 -1
  25. data/lib/regexp_parser/scanner/scanner.rl +42 -31
  26. data/lib/regexp_parser/scanner.rb +729 -797
  27. data/lib/regexp_parser/syntax/any.rb +2 -5
  28. data/lib/regexp_parser/syntax/base.rb +91 -64
  29. data/lib/regexp_parser/syntax/token/quantifier.rb +4 -4
  30. data/lib/regexp_parser/syntax/token/unicode_property.rb +26 -5
  31. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  32. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  33. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  34. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  35. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  36. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  37. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  38. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  39. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  40. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  41. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  42. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  43. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  44. data/lib/regexp_parser/syntax/versions/3.1.0.rb +3 -9
  45. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  46. data/lib/regexp_parser/syntax/versions.rb +1 -1
  47. data/lib/regexp_parser/version.rb +1 -1
  48. metadata +4 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 381a794200168f95ff6329cc8a01330d21a05e02b75e0b06dcc6bd8f763c111d
4
- data.tar.gz: bd7617cb3763e6d759c8e1364aed037ae2fff85af3cf28823476cadd14ff080e
3
+ metadata.gz: 8b84a4bb274f31b8608c7dc9d55ff6f1b8d92d0d147976f38079ae7701a6debe
4
+ data.tar.gz: 41db5f094d0beafade30a1fac2707cbc827831e818c485ad35d7173f18c6a91a
5
5
  SHA512:
6
- metadata.gz: 0a039012013e9b57329fd685aaf29386d8b848071e514f59df0acc3437a1dae5c76b6bf94158cc3deece08f3a1fec9437ac84590d97f8590d8dcee1e0dc6c726
7
- data.tar.gz: 4d67da41fbef9b9336ccfd02e3a742286bf4ef96d469c8aa2bbb9a6a55ed4aa6027a28b10ba6c9993b15937e3fe51a349632bcf5808f6237cf77a1d29ceb74f2
6
+ metadata.gz: 5dcde6135ac42db609402e47e04ee3be1da8854de286d2baad15dafee04d451814fd7a3bae7adc5440a1fced811e242b69f5fd14bcfc4f3bd5091f86769d56be
7
+ data.tar.gz: 2660d0fb28a972a1de53b71b16f8591e573d4214724b5eea8a452549598ff5d0fc5b731149e8332f65bce01c812f4d0d72135bba7e3016064d9f05202a8b5580
data/CHANGELOG.md CHANGED
@@ -1,3 +1,67 @@
1
+ ## [Unreleased]
2
+
3
+ ## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
4
+
5
+ ### Fixed
6
+
7
+ - fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
8
+ - they used to be treated as reluctant or possessive mode indicators
9
+ - however, Ruby does not support these modes for interval quantifiers
10
+ - they are now treated as chained quantifiers instead, as Ruby does it
11
+ - c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
12
+ - fixed `Expression::Base#nesting_level` for some tree rewrite cases
13
+ - e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
14
+ - fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
15
+ - they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
16
+ - they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
17
+
18
+ ### Added
19
+
20
+ - added `Expression::Base#==` for (deep) comparison of expressions
21
+ - added `Expression::Base#parts`
22
+ - returns the text elements and subexpressions of an expression
23
+ - e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
24
+ - added `Expression::Base#te` (a.k.a. token end index)
25
+ - `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
26
+ - made some `Expression::Base` methods available on `Quantifier` instances, too
27
+ - `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
28
+ - `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
29
+ - `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
30
+ - this allows a more unified handling with `Expression::Base` instances
31
+ - allowed `Quantifier#initialize` to take a token and options Hash like other nodes
32
+ - added a deprecation warning for initializing Quantifiers with 4+ arguments:
33
+
34
+ Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
35
+ is deprecated.
36
+
37
+ It will no longer be supported in regexp_parser v3.0.0.
38
+
39
+ Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode`
40
+ with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode
41
+ will be derived automatically.
42
+
43
+ This is consistent with how Expression::Base instances are created.
44
+
45
+
46
+ ## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
47
+
48
+ ### Fixed
49
+
50
+ - removed five inexistent unicode properties from `Syntax#features`
51
+ - these were never supported by Ruby or the `Regexp::Scanner`
52
+ - thanks to [Markus Schirp](https://github.com/mbj) for the report
53
+
54
+ ## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
55
+
56
+ ### Added
57
+
58
+ - improved parsing performance through `Syntax` refactoring
59
+ - instead of fresh `Syntax` instances, pre-loaded constants are now re-used
60
+ - this approximately doubles the parsing speed for simple regexps
61
+ - added methods to `Syntax` classes to show relative feature sets
62
+ - e.g. `Regexp::Syntax::V3_2_0.added_features`
63
+ - support for new unicode properties of Ruby 3.2 / Unicode 14.0
64
+
1
65
  ## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
2
66
 
3
67
  ### Fixed
@@ -167,7 +231,7 @@
167
231
 
168
232
  ### Added
169
233
 
170
- - `Expression#each_expression` and `#traverse` can now be called without a block
234
+ - `Expression::Base#each_expression` and `#traverse` can now be called without a block
171
235
  * this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
172
236
  * thanks to [Masataka Kuwabara](https://github.com/pocke)
173
237
 
@@ -193,7 +257,7 @@
193
257
  - Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
194
258
  - Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
195
259
  - Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
196
- - Fixed `Expression#match` and `#=~` not working with a single argument
260
+ - Fixed `Expression::Base#match` and `#=~` not working with a single argument
197
261
 
198
262
  ### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
199
263
 
@@ -201,15 +265,15 @@
201
265
 
202
266
  - Added `#referenced_expression` for backrefs, subexp calls and conditionals
203
267
  * returns the `Group` expression that is being referenced via name or number
204
- - Added `Expression#repetitions`
268
+ - Added `Expression::Base#repetitions`
205
269
  * returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
206
270
  * like `#quantity` but with a more uniform interface
207
- - Added `Expression#match_length`
271
+ - Added `Expression::Base#match_length`
208
272
  * allows to inspect and iterate over String lengths matched by the Expression
209
273
 
210
274
  ### Fixed
211
275
 
212
- - Fixed `Expression#clone` "direction"
276
+ - Fixed `Expression::Base#clone` "direction"
213
277
  * it used to dup ivars onto the callee, leaving only the clone referencing the original objects
214
278
  * this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
215
279
  - Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
@@ -371,7 +435,7 @@ This release includes several breaking changes, mostly to character sets, #map a
371
435
  - Fixed a thread safety issue (issue #45)
372
436
  - Some public class methods that were only reliable for
373
437
  internal use are now private instance methods (PR #46)
374
- - Improved the usefulness of Expression#options (issue #43) -
438
+ - Improved the usefulness of Expression::Base#options (issue #43) -
375
439
  #options and derived methods such as #i?, #m? and #x? are now
376
440
  defined for all Expressions that are affected by such flags.
377
441
  - Fixed scanning of whitespace following (?x) (commit 5c94bd2)
data/Gemfile CHANGED
@@ -5,9 +5,10 @@ gemspec
5
5
  group :development, :test do
6
6
  gem 'ice_nine', '~> 0.11.2'
7
7
  gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.0'
8
+ gem 'regexp_property_values', '~> 1.3'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
+ gem 'benchmark-ips', '~> 2.1'
11
12
  gem 'gouteur'
12
13
  gem 'rubocop', '~> 1.7'
13
14
  end
data/README.md CHANGED
@@ -157,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
157
157
  flavor variations. Syntax only comes into play in the lexer.
158
158
 
159
159
  #### Example
160
- The following instantiates syntax objects for Ruby 2.0, 1.9, 1.8, and
160
+ The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
161
161
  checks a few of their implementation features.
162
162
 
163
163
  ```ruby
164
164
  require 'regexp_parser'
165
165
 
166
- ruby_20 = Regexp::Syntax.new 'ruby/2.0'
166
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0'
167
167
  ruby_20.implements? :quantifier, :zero_or_one # => true
168
168
  ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
169
169
  ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
170
170
  ruby_20.implements? :conditional, :condition # => true
171
171
 
172
- ruby_19 = Regexp::Syntax.new 'ruby/1.9'
172
+ ruby_19 = Regexp::Syntax.for 'ruby/1.9'
173
173
  ruby_19.implements? :quantifier, :zero_or_one # => true
174
174
  ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
175
175
  ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
176
176
  ruby_19.implements? :conditional, :condition # => false
177
177
 
178
- ruby_18 = Regexp::Syntax.new 'ruby/1.8'
178
+ ruby_18 = Regexp::Syntax.for 'ruby/1.8'
179
179
  ruby_18.implements? :quantifier, :zero_or_one # => true
180
180
  ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
181
181
  ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
182
182
  ruby_18.implements? :conditional, :condition # => false
183
183
  ```
184
184
 
185
+ Syntax objects can also be queried about their complete and relative feature sets.
186
+
187
+ ```ruby
188
+ require 'regexp_parser'
189
+
190
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
191
+ ruby_20.added_features # => { conditional: [...], ... }
192
+ ruby_20.removed_features # => { property: [:newline], ... }
193
+ ruby_20.features # => { anchor: [...], ... }
194
+ ```
185
195
 
186
196
  #### Notes
187
197
  * Variations on a token, for example a named group with angle brackets (< and >)
@@ -357,12 +367,12 @@ _Note that not all of these are available in all versions of Ruby_
357
367
  | **POSIX Classes** | `[:alpha:]`, `[:^digit:]` | &#x2713; |
358
368
  | **Quantifiers** | | &#x22f1; |
359
369
  | &emsp;&nbsp;_**Greedy**_ | `?`, `*`, `+`, `{m,M}` | &#x2713; |
360
- | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | &#x2713; |
361
- | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | &#x2713; |
370
+ | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?` \[1\] | &#x2713; |
371
+ | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++` \[1\] | &#x2713; |
362
372
  | **String Escapes** | | &#x22f1; |
363
- | &emsp;&nbsp;_**Control** \[1\]_ | `\C-C`, `\cD` | &#x2713; |
373
+ | &emsp;&nbsp;_**Control** \[2\]_ | `\C-C`, `\cD` | &#x2713; |
364
374
  | &emsp;&nbsp;_**Hex**_ | `\x20`, `\x{701230}` | &#x2713; |
365
- | &emsp;&nbsp;_**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
375
+ | &emsp;&nbsp;_**Meta** \[2\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
366
376
  | &emsp;&nbsp;_**Octal**_ | `\0`, `\01`, `\012` | &#x2713; |
367
377
  | &emsp;&nbsp;_**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | &#x2713; |
368
378
  | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
@@ -374,7 +384,11 @@ _Note that not all of these are available in all versions of Ruby_
374
384
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
375
385
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
376
386
 
377
- **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
387
+ **\[1\]**: Ruby does not support lazy or possessive interval quantifiers. Any `+` or `?` that follows an interval
388
+ quantifier will be treated as another, chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
389
+ [#69](https://github.com/ammar/regexp_parser/pull/69).
390
+
391
+ **\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
378
392
  https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
379
393
  scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
380
394
 
data/Rakefile CHANGED
@@ -5,9 +5,7 @@ require 'rake'
5
5
  require 'rake/testtask'
6
6
  require 'rspec/core/rake_task'
7
7
 
8
- RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
9
- RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
10
- RAGEL_SOURCE_FILES = %w[scanner] # scanner.rl imports the other files
8
+ Dir['tasks/**/*.rake'].each { |file| load(file) }
11
9
 
12
10
  Bundler::GemHelper.install_tasks
13
11
 
@@ -19,60 +17,7 @@ namespace :test do
19
17
  task full: [:'ragel:rb', :spec]
20
18
  end
21
19
 
22
- namespace :ragel do
23
- desc "Process the ragel source files and output ruby code"
24
- task :rb do
25
- RAGEL_SOURCE_FILES.each do |source_file|
26
- output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
27
- # using faster flat table driven FSM, about 25% larger code, but about 30% faster
28
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
29
-
30
- contents = File.read(output_file)
31
-
32
- File.open(output_file, 'r+') do |file|
33
- contents = "# -*- warn-indent:false; -*-\n" + contents
34
-
35
- file.write(contents)
36
- end
37
- end
38
- end
39
-
40
- desc "Delete the ragel generated source file(s)"
41
- task :clean do
42
- RAGEL_SOURCE_FILES.each do |file|
43
- sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
44
- end
45
- end
46
- end
47
-
48
20
  # Add ragel task as a prerequisite for building the gem to ensure that the
49
21
  # latest scanner code is generated and included in the build.
50
22
  desc "Runs ragel:rb before building the gem"
51
23
  task :build => ['ragel:rb']
52
-
53
- namespace :props do
54
- desc 'Write new property value hashes for the properties scanner'
55
- task :update do
56
- require 'regexp_property_values'
57
- RegexpPropertyValues.update
58
- dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
59
-
60
- write_hash_to_file = ->(hash, path) do
61
- File.open(path, 'w') do |f|
62
- f.puts "# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT",
63
- *hash.sort.map { |pair| pair.join(',') }
64
- end
65
- puts "Wrote #{hash.count} aliases to `#{path}`"
66
- end
67
-
68
- long_names_to_tokens = RegexpPropertyValues.all.map do |val|
69
- [val.identifier, val.full_name.downcase]
70
- end
71
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.csv")
72
-
73
- short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
74
- [k.identifier, v.full_name.downcase]
75
- end
76
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.csv")
77
- end
78
- end
@@ -1,4 +1,4 @@
1
1
  class Regexp::Parser
2
- # base class for all gem-specific errors (inherited but never raised itself)
2
+ # base class for all gem-specific errors
3
3
  class Error < StandardError; end
4
4
  end
@@ -1,29 +1,15 @@
1
1
  module Regexp::Expression
2
2
  class Base
3
- attr_accessor :type, :token
4
- attr_accessor :text, :ts
5
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
-
7
- attr_accessor :quantifier
8
- attr_accessor :options
3
+ include Regexp::Expression::Shared
9
4
 
10
5
  def initialize(token, options = {})
11
- self.type = token.type
12
- self.token = token.token
13
- self.text = token.text
14
- self.ts = token.ts
15
- self.level = token.level
16
- self.set_level = token.set_level
17
- self.conditional_level = token.conditional_level
18
- self.nesting_level = 0
19
- self.quantifier = nil
20
- self.options = options
6
+ init_from_token_and_options(token, options)
21
7
  end
22
8
 
23
9
  def initialize_copy(orig)
24
- self.text = (orig.text ? orig.text.dup : nil)
25
- self.options = (orig.options ? orig.options.dup : nil)
26
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
10
+ self.text = orig.text.dup if orig.text
11
+ self.options = orig.options.dup if orig.options
12
+ self.quantifier = orig.quantifier.clone if orig.quantifier
27
13
  super
28
14
  end
29
15
 
@@ -31,48 +17,14 @@ module Regexp::Expression
31
17
  ::Regexp.new(to_s(format))
32
18
  end
33
19
 
34
- alias :starts_at :ts
35
-
36
- def base_length
37
- to_s(:base).length
38
- end
39
-
40
- def full_length
41
- to_s.length
42
- end
43
-
44
- def offset
45
- [starts_at, full_length]
46
- end
47
-
48
- def coded_offset
49
- '@%d+%d' % offset
50
- end
51
-
52
- def to_s(format = :full)
53
- "#{text}#{quantifier_affix(format)}"
54
- end
55
-
56
- def quantifier_affix(expression_format)
57
- quantifier.to_s if quantified? && expression_format != :base
58
- end
59
-
60
- def terminal?
61
- !respond_to?(:expressions)
62
- end
63
-
64
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
- self.quantifier = Quantifier.new(token, text, min, max, mode)
20
+ def quantify(*args)
21
+ self.quantifier = Quantifier.new(*args)
66
22
  end
67
23
 
68
24
  def unquantified_clone
69
25
  clone.tap { |exp| exp.quantifier = nil }
70
26
  end
71
27
 
72
- def quantified?
73
- !quantifier.nil?
74
- end
75
-
76
28
  # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
29
  def quantity
78
30
  return [nil,nil] unless quantified?
@@ -104,7 +56,7 @@ module Regexp::Expression
104
56
  quantified? and quantifier.possessive?
105
57
  end
106
58
 
107
- def attributes
59
+ def to_h
108
60
  {
109
61
  type: type,
110
62
  token: token,
@@ -118,6 +70,6 @@ module Regexp::Expression
118
70
  quantifier: quantified? ? quantifier.to_h : nil,
119
71
  }
120
72
  end
121
- alias :to_h :attributes
73
+ alias :attributes :to_h
122
74
  end
123
75
  end
@@ -16,8 +16,8 @@ module Regexp::Expression
16
16
  count == 2
17
17
  end
18
18
 
19
- def to_s(_format = :full)
20
- expressions.join(text)
19
+ def parts
20
+ intersperse(expressions, text.dup)
21
21
  end
22
22
  end
23
23
  end
@@ -20,8 +20,8 @@ module Regexp::Expression
20
20
  self.closed = true
21
21
  end
22
22
 
23
- def to_s(format = :full)
24
- "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
23
+ def parts
24
+ ["#{text}#{'^' if negated?}", *expressions, ']']
25
25
  end
26
26
  end
27
27
  end # module Regexp::Expression
@@ -55,8 +55,8 @@ module Regexp::Expression
55
55
  condition.reference
56
56
  end
57
57
 
58
- def to_s(format = :full)
59
- "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
58
+ def parts
59
+ [text.dup, condition, *intersperse(branches, '|'), ')']
60
60
  end
61
61
 
62
62
  def initialize_copy(orig)
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class FreeSpace < Regexp::Expression::Base
3
- def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
3
+ def quantify(*_args)
4
4
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
5
5
  end
6
6
  end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def to_s(format = :full)
5
- "#{text}#{expressions.join})#{quantifier_affix(format)}"
4
+ def parts
5
+ [text.dup, *expressions, ')']
6
6
  end
7
7
 
8
8
  def capturing?; false end
@@ -18,9 +18,9 @@ module Regexp::Expression
18
18
  super
19
19
  end
20
20
 
21
- def to_s(format = :full)
21
+ def parts
22
22
  if implicit?
23
- "#{expressions.join}#{quantifier_affix(format)}"
23
+ expressions
24
24
  else
25
25
  super
26
26
  end
@@ -65,8 +65,8 @@ module Regexp::Expression
65
65
  end
66
66
 
67
67
  class Comment < Group::Base
68
- def to_s(_format = :full)
69
- text.dup
68
+ def parts
69
+ [text.dup]
70
70
  end
71
71
 
72
72
  def comment?; true end
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- class Base
2
+ module Shared
3
3
 
4
4
  # Test if this expression has the given test_type, which can be either
5
5
  # a symbol or an array of symbols to check against the expression's type.
@@ -93,5 +93,14 @@ module Regexp::Expression
93
93
  "Array, Hash, or Symbol expected, #{scope.class.name} given"
94
94
  end
95
95
  end
96
+
97
+ # Deep-compare two expressions for equality.
98
+ def ==(other)
99
+ other.class == self.class &&
100
+ other.to_s == to_s &&
101
+ other.options == options
102
+ end
103
+ alias :=== :==
104
+ alias :eql? :==
96
105
  end
97
106
  end
@@ -1,26 +1,24 @@
1
1
  module Regexp::Expression
2
+ # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
3
+ # call super in #initialize, but raise in #quantifier= and #quantify,
4
+ # or introduce an Expression::Quantifiable intermediate class.
5
+ # Or actually allow chaining as a more concise but tricky solution than PR#69.
2
6
  class Quantifier
7
+ include Regexp::Expression::Shared
8
+
3
9
  MODES = %i[greedy possessive reluctant]
4
10
 
5
- attr_reader :token, :text, :min, :max, :mode
11
+ attr_reader :min, :max, :mode
6
12
 
7
- def initialize(token, text, min, max, mode)
8
- @token = token
9
- @text = text
10
- @mode = mode
11
- @min = min
12
- @max = max
13
- end
14
-
15
- def initialize_copy(orig)
16
- @text = orig.text.dup
17
- super
18
- end
13
+ def initialize(*args)
14
+ deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
19
15
 
20
- def to_s
21
- text.dup
16
+ init_from_token_and_options(*args)
17
+ @mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
18
+ @min, @max = minmax
19
+ # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
+ self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
22
21
  end
23
- alias :to_str :to_s
24
22
 
25
23
  def to_h
26
24
  {
@@ -41,13 +39,32 @@ module Regexp::Expression
41
39
  end
42
40
  alias :lazy? :reluctant?
43
41
 
44
- def ==(other)
45
- other.class == self.class &&
46
- other.token == token &&
47
- other.mode == mode &&
48
- other.min == min &&
49
- other.max == max
42
+ private
43
+
44
+ def deprecated_old_init(token, text, min, max, mode = :greedy)
45
+ warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
+ "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
+ "Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode` "\
48
+ "with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode "\
49
+ "will be derived automatically. \nThis is consistent with how Expression::Base "\
50
+ "instances are created."
51
+ @token = token
52
+ @text = text
53
+ @min = min
54
+ @max = max
55
+ @mode = mode
56
+ end
57
+
58
+ def minmax
59
+ case token
60
+ when /zero_or_one/ then [0, 1]
61
+ when /zero_or_more/ then [0, -1]
62
+ when /one_or_more/ then [1, -1]
63
+ when :interval
64
+ int_min = text[/\{(\d*)/, 1]
65
+ int_max = text[/,?(\d*)\}/, 1]
66
+ [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
67
+ end
50
68
  end
51
- alias :eq :==
52
69
  end
53
70
  end
@@ -39,12 +39,12 @@ module Regexp::Expression
39
39
  end
40
40
  alias :ts :starts_at
41
41
 
42
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
42
+ def quantify(*args)
43
43
  target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
44
44
  target or raise Regexp::Parser::Error,
45
45
  "No valid target found for '#{text}' quantifier"
46
46
 
47
- target.quantify(token, text, min, max, mode)
47
+ target.quantify(*args)
48
48
  end
49
49
  end
50
50
  end
@@ -18,8 +18,8 @@ module Regexp::Expression
18
18
  self.class::OPERAND.add_to(self, {}, active_opts)
19
19
  end
20
20
 
21
- def to_s(format = :full)
22
- sequences.map { |e| e.to_s(format) }.join(text)
21
+ def parts
22
+ intersperse(expressions, text.dup)
23
23
  end
24
24
  end
25
25
  end