regexp_parser 2.2.1 → 2.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +70 -6
- data/Gemfile +2 -1
- data/README.md +23 -9
- data/Rakefile +1 -56
- data/lib/regexp_parser/error.rb +1 -1
- data/lib/regexp_parser/expression/base.rb +9 -57
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -2
- data/lib/regexp_parser/expression/classes/character_set.rb +2 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +6 -6
- data/lib/regexp_parser/expression/methods/tests.rb +10 -1
- data/lib/regexp_parser/expression/quantifier.rb +40 -23
- data/lib/regexp_parser/expression/sequence.rb +2 -2
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
- data/lib/regexp_parser/expression/shared.rb +81 -0
- data/lib/regexp_parser/expression/subexpression.rb +11 -7
- data/lib/regexp_parser/expression.rb +1 -0
- data/lib/regexp_parser/lexer.rb +1 -1
- data/lib/regexp_parser/parser.rb +12 -60
- data/lib/regexp_parser/scanner/properties/long.csv +18 -0
- data/lib/regexp_parser/scanner/properties/short.csv +4 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +42 -31
- data/lib/regexp_parser/scanner.rb +729 -797
- data/lib/regexp_parser/syntax/any.rb +2 -5
- data/lib/regexp_parser/syntax/base.rb +91 -64
- data/lib/regexp_parser/syntax/token/quantifier.rb +4 -4
- data/lib/regexp_parser/syntax/token/unicode_property.rb +26 -5
- data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b84a4bb274f31b8608c7dc9d55ff6f1b8d92d0d147976f38079ae7701a6debe
|
4
|
+
data.tar.gz: 41db5f094d0beafade30a1fac2707cbc827831e818c485ad35d7173f18c6a91a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dcde6135ac42db609402e47e04ee3be1da8854de286d2baad15dafee04d451814fd7a3bae7adc5440a1fced811e242b69f5fd14bcfc4f3bd5091f86769d56be
|
7
|
+
data.tar.gz: 2660d0fb28a972a1de53b71b16f8591e573d4214724b5eea8a452549598ff5d0fc5b731149e8332f65bce01c812f4d0d72135bba7e3016064d9f05202a8b5580
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,67 @@
|
|
1
|
+
## [Unreleased]
|
2
|
+
|
3
|
+
## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Fixed
|
6
|
+
|
7
|
+
- fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
|
8
|
+
- they used to be treated as reluctant or possessive mode indicators
|
9
|
+
- however, Ruby does not support these modes for interval quantifiers
|
10
|
+
- they are now treated as chained quantifiers instead, as Ruby does it
|
11
|
+
- c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
|
12
|
+
- fixed `Expression::Base#nesting_level` for some tree rewrite cases
|
13
|
+
- e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
|
14
|
+
- fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
|
15
|
+
- they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
|
16
|
+
- they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
|
17
|
+
|
18
|
+
### Added
|
19
|
+
|
20
|
+
- added `Expression::Base#==` for (deep) comparison of expressions
|
21
|
+
- added `Expression::Base#parts`
|
22
|
+
- returns the text elements and subexpressions of an expression
|
23
|
+
- e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
|
24
|
+
- added `Expression::Base#te` (a.k.a. token end index)
|
25
|
+
- `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
|
26
|
+
- made some `Expression::Base` methods available on `Quantifier` instances, too
|
27
|
+
- `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
|
28
|
+
- `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
|
29
|
+
- `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
|
30
|
+
- this allows a more unified handling with `Expression::Base` instances
|
31
|
+
- allowed `Quantifier#initialize` to take a token and options Hash like other nodes
|
32
|
+
- added a deprecation warning for initializing Quantifiers with 4+ arguments:
|
33
|
+
|
34
|
+
Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
|
35
|
+
is deprecated.
|
36
|
+
|
37
|
+
It will no longer be supported in regexp_parser v3.0.0.
|
38
|
+
|
39
|
+
Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode`
|
40
|
+
with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode
|
41
|
+
will be derived automatically.
|
42
|
+
|
43
|
+
This is consistent with how Expression::Base instances are created.
|
44
|
+
|
45
|
+
|
46
|
+
## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
|
47
|
+
|
48
|
+
### Fixed
|
49
|
+
|
50
|
+
- removed five inexistent unicode properties from `Syntax#features`
|
51
|
+
- these were never supported by Ruby or the `Regexp::Scanner`
|
52
|
+
- thanks to [Markus Schirp](https://github.com/mbj) for the report
|
53
|
+
|
54
|
+
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
55
|
+
|
56
|
+
### Added
|
57
|
+
|
58
|
+
- improved parsing performance through `Syntax` refactoring
|
59
|
+
- instead of fresh `Syntax` instances, pre-loaded constants are now re-used
|
60
|
+
- this approximately doubles the parsing speed for simple regexps
|
61
|
+
- added methods to `Syntax` classes to show relative feature sets
|
62
|
+
- e.g. `Regexp::Syntax::V3_2_0.added_features`
|
63
|
+
- support for new unicode properties of Ruby 3.2 / Unicode 14.0
|
64
|
+
|
1
65
|
## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
2
66
|
|
3
67
|
### Fixed
|
@@ -167,7 +231,7 @@
|
|
167
231
|
|
168
232
|
### Added
|
169
233
|
|
170
|
-
- `Expression#each_expression` and `#traverse` can now be called without a block
|
234
|
+
- `Expression::Base#each_expression` and `#traverse` can now be called without a block
|
171
235
|
* this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
|
172
236
|
* thanks to [Masataka Kuwabara](https://github.com/pocke)
|
173
237
|
|
@@ -193,7 +257,7 @@
|
|
193
257
|
- Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
|
194
258
|
- Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
|
195
259
|
- Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
|
196
|
-
- Fixed `Expression#match` and `#=~` not working with a single argument
|
260
|
+
- Fixed `Expression::Base#match` and `#=~` not working with a single argument
|
197
261
|
|
198
262
|
### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
|
199
263
|
|
@@ -201,15 +265,15 @@
|
|
201
265
|
|
202
266
|
- Added `#referenced_expression` for backrefs, subexp calls and conditionals
|
203
267
|
* returns the `Group` expression that is being referenced via name or number
|
204
|
-
- Added `Expression#repetitions`
|
268
|
+
- Added `Expression::Base#repetitions`
|
205
269
|
* returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
|
206
270
|
* like `#quantity` but with a more uniform interface
|
207
|
-
- Added `Expression#match_length`
|
271
|
+
- Added `Expression::Base#match_length`
|
208
272
|
* allows to inspect and iterate over String lengths matched by the Expression
|
209
273
|
|
210
274
|
### Fixed
|
211
275
|
|
212
|
-
- Fixed `Expression#clone` "direction"
|
276
|
+
- Fixed `Expression::Base#clone` "direction"
|
213
277
|
* it used to dup ivars onto the callee, leaving only the clone referencing the original objects
|
214
278
|
* this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
|
215
279
|
- Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
|
@@ -371,7 +435,7 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
371
435
|
- Fixed a thread safety issue (issue #45)
|
372
436
|
- Some public class methods that were only reliable for
|
373
437
|
internal use are now private instance methods (PR #46)
|
374
|
-
- Improved the usefulness of Expression#options (issue #43) -
|
438
|
+
- Improved the usefulness of Expression::Base#options (issue #43) -
|
375
439
|
#options and derived methods such as #i?, #m? and #x? are now
|
376
440
|
defined for all Expressions that are affected by such flags.
|
377
441
|
- Fixed scanning of whitespace following (?x) (commit 5c94bd2)
|
data/Gemfile
CHANGED
@@ -5,9 +5,10 @@ gemspec
|
|
5
5
|
group :development, :test do
|
6
6
|
gem 'ice_nine', '~> 0.11.2'
|
7
7
|
gem 'rake', '~> 13.0'
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
8
|
+
gem 'regexp_property_values', '~> 1.3'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
|
+
gem 'benchmark-ips', '~> 2.1'
|
11
12
|
gem 'gouteur'
|
12
13
|
gem 'rubocop', '~> 1.7'
|
13
14
|
end
|
data/README.md
CHANGED
@@ -157,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
|
|
157
157
|
flavor variations. Syntax only comes into play in the lexer.
|
158
158
|
|
159
159
|
#### Example
|
160
|
-
The following
|
160
|
+
The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
|
161
161
|
checks a few of their implementation features.
|
162
162
|
|
163
163
|
```ruby
|
164
164
|
require 'regexp_parser'
|
165
165
|
|
166
|
-
ruby_20 = Regexp::Syntax.
|
166
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0'
|
167
167
|
ruby_20.implements? :quantifier, :zero_or_one # => true
|
168
168
|
ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
|
169
169
|
ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
|
170
170
|
ruby_20.implements? :conditional, :condition # => true
|
171
171
|
|
172
|
-
ruby_19 = Regexp::Syntax.
|
172
|
+
ruby_19 = Regexp::Syntax.for 'ruby/1.9'
|
173
173
|
ruby_19.implements? :quantifier, :zero_or_one # => true
|
174
174
|
ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
|
175
175
|
ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
|
176
176
|
ruby_19.implements? :conditional, :condition # => false
|
177
177
|
|
178
|
-
ruby_18 = Regexp::Syntax.
|
178
|
+
ruby_18 = Regexp::Syntax.for 'ruby/1.8'
|
179
179
|
ruby_18.implements? :quantifier, :zero_or_one # => true
|
180
180
|
ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
|
181
181
|
ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
|
182
182
|
ruby_18.implements? :conditional, :condition # => false
|
183
183
|
```
|
184
184
|
|
185
|
+
Syntax objects can also be queried about their complete and relative feature sets.
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
require 'regexp_parser'
|
189
|
+
|
190
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
|
191
|
+
ruby_20.added_features # => { conditional: [...], ... }
|
192
|
+
ruby_20.removed_features # => { property: [:newline], ... }
|
193
|
+
ruby_20.features # => { anchor: [...], ... }
|
194
|
+
```
|
185
195
|
|
186
196
|
#### Notes
|
187
197
|
* Variations on a token, for example a named group with angle brackets (< and >)
|
@@ -357,12 +367,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
367
|
| **POSIX Classes** | `[:alpha:]`, `[:^digit:]` | ✓ |
|
358
368
|
| **Quantifiers** | | ⋱ |
|
359
369
|
|   _**Greedy**_ | `?`, `*`, `+`, `{m,M}` | ✓ |
|
360
|
-
|   _**Reluctant** (Lazy)_ | `??`, `*?`,
|
361
|
-
|   _**Possessive**_ | `?+`, `*+`,
|
370
|
+
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?` \[1\] | ✓ |
|
371
|
+
|   _**Possessive**_ | `?+`, `*+`, `++` \[1\] | ✓ |
|
362
372
|
| **String Escapes** | | ⋱ |
|
363
|
-
|   _**Control** \[
|
373
|
+
|   _**Control** \[2\]_ | `\C-C`, `\cD` | ✓ |
|
364
374
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
365
|
-
|   _**Meta** \[
|
375
|
+
|   _**Meta** \[2\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
366
376
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
367
377
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
368
378
|
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
@@ -374,7 +384,11 @@ _Note that not all of these are available in all versions of Ruby_
|
|
374
384
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
375
385
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
376
386
|
|
377
|
-
**\[1\]**:
|
387
|
+
**\[1\]**: Ruby does not support lazy or possessive interval quantifiers. Any `+` or `?` that follows an interval
|
388
|
+
quantifier will be treated as another, chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
|
389
|
+
[#69](https://github.com/ammar/regexp_parser/pull/69).
|
390
|
+
|
391
|
+
**\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
378
392
|
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
379
393
|
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
380
394
|
|
data/Rakefile
CHANGED
@@ -5,9 +5,7 @@ require 'rake'
|
|
5
5
|
require 'rake/testtask'
|
6
6
|
require 'rspec/core/rake_task'
|
7
7
|
|
8
|
-
|
9
|
-
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
10
|
-
RAGEL_SOURCE_FILES = %w[scanner] # scanner.rl imports the other files
|
8
|
+
Dir['tasks/**/*.rake'].each { |file| load(file) }
|
11
9
|
|
12
10
|
Bundler::GemHelper.install_tasks
|
13
11
|
|
@@ -19,60 +17,7 @@ namespace :test do
|
|
19
17
|
task full: [:'ragel:rb', :spec]
|
20
18
|
end
|
21
19
|
|
22
|
-
namespace :ragel do
|
23
|
-
desc "Process the ragel source files and output ruby code"
|
24
|
-
task :rb do
|
25
|
-
RAGEL_SOURCE_FILES.each do |source_file|
|
26
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
27
|
-
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
28
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
29
|
-
|
30
|
-
contents = File.read(output_file)
|
31
|
-
|
32
|
-
File.open(output_file, 'r+') do |file|
|
33
|
-
contents = "# -*- warn-indent:false; -*-\n" + contents
|
34
|
-
|
35
|
-
file.write(contents)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
desc "Delete the ragel generated source file(s)"
|
41
|
-
task :clean do
|
42
|
-
RAGEL_SOURCE_FILES.each do |file|
|
43
|
-
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
20
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
49
21
|
# latest scanner code is generated and included in the build.
|
50
22
|
desc "Runs ragel:rb before building the gem"
|
51
23
|
task :build => ['ragel:rb']
|
52
|
-
|
53
|
-
namespace :props do
|
54
|
-
desc 'Write new property value hashes for the properties scanner'
|
55
|
-
task :update do
|
56
|
-
require 'regexp_property_values'
|
57
|
-
RegexpPropertyValues.update
|
58
|
-
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
59
|
-
|
60
|
-
write_hash_to_file = ->(hash, path) do
|
61
|
-
File.open(path, 'w') do |f|
|
62
|
-
f.puts "# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT",
|
63
|
-
*hash.sort.map { |pair| pair.join(',') }
|
64
|
-
end
|
65
|
-
puts "Wrote #{hash.count} aliases to `#{path}`"
|
66
|
-
end
|
67
|
-
|
68
|
-
long_names_to_tokens = RegexpPropertyValues.all.map do |val|
|
69
|
-
[val.identifier, val.full_name.downcase]
|
70
|
-
end
|
71
|
-
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.csv")
|
72
|
-
|
73
|
-
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
74
|
-
[k.identifier, v.full_name.downcase]
|
75
|
-
end
|
76
|
-
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.csv")
|
77
|
-
end
|
78
|
-
end
|
data/lib/regexp_parser/error.rb
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Base
|
3
|
-
|
4
|
-
attr_accessor :text, :ts
|
5
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
-
|
7
|
-
attr_accessor :quantifier
|
8
|
-
attr_accessor :options
|
3
|
+
include Regexp::Expression::Shared
|
9
4
|
|
10
5
|
def initialize(token, options = {})
|
11
|
-
|
12
|
-
self.token = token.token
|
13
|
-
self.text = token.text
|
14
|
-
self.ts = token.ts
|
15
|
-
self.level = token.level
|
16
|
-
self.set_level = token.set_level
|
17
|
-
self.conditional_level = token.conditional_level
|
18
|
-
self.nesting_level = 0
|
19
|
-
self.quantifier = nil
|
20
|
-
self.options = options
|
6
|
+
init_from_token_and_options(token, options)
|
21
7
|
end
|
22
8
|
|
23
9
|
def initialize_copy(orig)
|
24
|
-
self.text =
|
25
|
-
self.options =
|
26
|
-
self.quantifier =
|
10
|
+
self.text = orig.text.dup if orig.text
|
11
|
+
self.options = orig.options.dup if orig.options
|
12
|
+
self.quantifier = orig.quantifier.clone if orig.quantifier
|
27
13
|
super
|
28
14
|
end
|
29
15
|
|
@@ -31,48 +17,14 @@ module Regexp::Expression
|
|
31
17
|
::Regexp.new(to_s(format))
|
32
18
|
end
|
33
19
|
|
34
|
-
|
35
|
-
|
36
|
-
def base_length
|
37
|
-
to_s(:base).length
|
38
|
-
end
|
39
|
-
|
40
|
-
def full_length
|
41
|
-
to_s.length
|
42
|
-
end
|
43
|
-
|
44
|
-
def offset
|
45
|
-
[starts_at, full_length]
|
46
|
-
end
|
47
|
-
|
48
|
-
def coded_offset
|
49
|
-
'@%d+%d' % offset
|
50
|
-
end
|
51
|
-
|
52
|
-
def to_s(format = :full)
|
53
|
-
"#{text}#{quantifier_affix(format)}"
|
54
|
-
end
|
55
|
-
|
56
|
-
def quantifier_affix(expression_format)
|
57
|
-
quantifier.to_s if quantified? && expression_format != :base
|
58
|
-
end
|
59
|
-
|
60
|
-
def terminal?
|
61
|
-
!respond_to?(:expressions)
|
62
|
-
end
|
63
|
-
|
64
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
20
|
+
def quantify(*args)
|
21
|
+
self.quantifier = Quantifier.new(*args)
|
66
22
|
end
|
67
23
|
|
68
24
|
def unquantified_clone
|
69
25
|
clone.tap { |exp| exp.quantifier = nil }
|
70
26
|
end
|
71
27
|
|
72
|
-
def quantified?
|
73
|
-
!quantifier.nil?
|
74
|
-
end
|
75
|
-
|
76
28
|
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
29
|
def quantity
|
78
30
|
return [nil,nil] unless quantified?
|
@@ -104,7 +56,7 @@ module Regexp::Expression
|
|
104
56
|
quantified? and quantifier.possessive?
|
105
57
|
end
|
106
58
|
|
107
|
-
def
|
59
|
+
def to_h
|
108
60
|
{
|
109
61
|
type: type,
|
110
62
|
token: token,
|
@@ -118,6 +70,6 @@ module Regexp::Expression
|
|
118
70
|
quantifier: quantified? ? quantifier.to_h : nil,
|
119
71
|
}
|
120
72
|
end
|
121
|
-
alias :
|
73
|
+
alias :attributes :to_h
|
122
74
|
end
|
123
75
|
end
|
@@ -20,8 +20,8 @@ module Regexp::Expression
|
|
20
20
|
self.closed = true
|
21
21
|
end
|
22
22
|
|
23
|
-
def
|
24
|
-
"#{text}#{'^' if negated?}
|
23
|
+
def parts
|
24
|
+
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end # module Regexp::Expression
|
@@ -55,8 +55,8 @@ module Regexp::Expression
|
|
55
55
|
condition.reference
|
56
56
|
end
|
57
57
|
|
58
|
-
def
|
59
|
-
|
58
|
+
def parts
|
59
|
+
[text.dup, condition, *intersperse(branches, '|'), ')']
|
60
60
|
end
|
61
61
|
|
62
62
|
def initialize_copy(orig)
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
|
4
|
+
def parts
|
5
|
+
[text.dup, *expressions, ')']
|
6
6
|
end
|
7
7
|
|
8
8
|
def capturing?; false end
|
@@ -18,9 +18,9 @@ module Regexp::Expression
|
|
18
18
|
super
|
19
19
|
end
|
20
20
|
|
21
|
-
def
|
21
|
+
def parts
|
22
22
|
if implicit?
|
23
|
-
|
23
|
+
expressions
|
24
24
|
else
|
25
25
|
super
|
26
26
|
end
|
@@ -65,8 +65,8 @@ module Regexp::Expression
|
|
65
65
|
end
|
66
66
|
|
67
67
|
class Comment < Group::Base
|
68
|
-
def
|
69
|
-
text.dup
|
68
|
+
def parts
|
69
|
+
[text.dup]
|
70
70
|
end
|
71
71
|
|
72
72
|
def comment?; true end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
2
|
+
module Shared
|
3
3
|
|
4
4
|
# Test if this expression has the given test_type, which can be either
|
5
5
|
# a symbol or an array of symbols to check against the expression's type.
|
@@ -93,5 +93,14 @@ module Regexp::Expression
|
|
93
93
|
"Array, Hash, or Symbol expected, #{scope.class.name} given"
|
94
94
|
end
|
95
95
|
end
|
96
|
+
|
97
|
+
# Deep-compare two expressions for equality.
|
98
|
+
def ==(other)
|
99
|
+
other.class == self.class &&
|
100
|
+
other.to_s == to_s &&
|
101
|
+
other.options == options
|
102
|
+
end
|
103
|
+
alias :=== :==
|
104
|
+
alias :eql? :==
|
96
105
|
end
|
97
106
|
end
|
@@ -1,26 +1,24 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
3
|
+
# call super in #initialize, but raise in #quantifier= and #quantify,
|
4
|
+
# or introduce an Expression::Quantifiable intermediate class.
|
5
|
+
# Or actually allow chaining as a more concise but tricky solution than PR#69.
|
2
6
|
class Quantifier
|
7
|
+
include Regexp::Expression::Shared
|
8
|
+
|
3
9
|
MODES = %i[greedy possessive reluctant]
|
4
10
|
|
5
|
-
attr_reader :
|
11
|
+
attr_reader :min, :max, :mode
|
6
12
|
|
7
|
-
def initialize(
|
8
|
-
|
9
|
-
@text = text
|
10
|
-
@mode = mode
|
11
|
-
@min = min
|
12
|
-
@max = max
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize_copy(orig)
|
16
|
-
@text = orig.text.dup
|
17
|
-
super
|
18
|
-
end
|
13
|
+
def initialize(*args)
|
14
|
+
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
19
15
|
|
20
|
-
|
21
|
-
|
16
|
+
init_from_token_and_options(*args)
|
17
|
+
@mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
|
+
@min, @max = minmax
|
19
|
+
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
|
+
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
22
21
|
end
|
23
|
-
alias :to_str :to_s
|
24
22
|
|
25
23
|
def to_h
|
26
24
|
{
|
@@ -41,13 +39,32 @@ module Regexp::Expression
|
|
41
39
|
end
|
42
40
|
alias :lazy? :reluctant?
|
43
41
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
42
|
+
private
|
43
|
+
|
44
|
+
def deprecated_old_init(token, text, min, max, mode = :greedy)
|
45
|
+
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
|
+
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
|
+
"Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode` "\
|
48
|
+
"with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode "\
|
49
|
+
"will be derived automatically. \nThis is consistent with how Expression::Base "\
|
50
|
+
"instances are created."
|
51
|
+
@token = token
|
52
|
+
@text = text
|
53
|
+
@min = min
|
54
|
+
@max = max
|
55
|
+
@mode = mode
|
56
|
+
end
|
57
|
+
|
58
|
+
def minmax
|
59
|
+
case token
|
60
|
+
when /zero_or_one/ then [0, 1]
|
61
|
+
when /zero_or_more/ then [0, -1]
|
62
|
+
when /one_or_more/ then [1, -1]
|
63
|
+
when :interval
|
64
|
+
int_min = text[/\{(\d*)/, 1]
|
65
|
+
int_max = text[/,?(\d*)\}/, 1]
|
66
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
67
|
+
end
|
50
68
|
end
|
51
|
-
alias :eq :==
|
52
69
|
end
|
53
70
|
end
|
@@ -39,12 +39,12 @@ module Regexp::Expression
|
|
39
39
|
end
|
40
40
|
alias :ts :starts_at
|
41
41
|
|
42
|
-
def quantify(
|
42
|
+
def quantify(*args)
|
43
43
|
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
44
44
|
target or raise Regexp::Parser::Error,
|
45
45
|
"No valid target found for '#{text}' quantifier"
|
46
46
|
|
47
|
-
target.quantify(
|
47
|
+
target.quantify(*args)
|
48
48
|
end
|
49
49
|
end
|
50
50
|
end
|