regexp_parser 2.2.1 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +70 -6
- data/Gemfile +2 -1
- data/README.md +23 -9
- data/Rakefile +1 -56
- data/lib/regexp_parser/error.rb +1 -1
- data/lib/regexp_parser/expression/base.rb +9 -57
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -2
- data/lib/regexp_parser/expression/classes/character_set.rb +2 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +6 -6
- data/lib/regexp_parser/expression/methods/tests.rb +10 -1
- data/lib/regexp_parser/expression/quantifier.rb +40 -23
- data/lib/regexp_parser/expression/sequence.rb +2 -2
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
- data/lib/regexp_parser/expression/shared.rb +81 -0
- data/lib/regexp_parser/expression/subexpression.rb +11 -7
- data/lib/regexp_parser/expression.rb +1 -0
- data/lib/regexp_parser/lexer.rb +1 -1
- data/lib/regexp_parser/parser.rb +12 -60
- data/lib/regexp_parser/scanner/properties/long.csv +18 -0
- data/lib/regexp_parser/scanner/properties/short.csv +4 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +42 -31
- data/lib/regexp_parser/scanner.rb +729 -797
- data/lib/regexp_parser/syntax/any.rb +2 -5
- data/lib/regexp_parser/syntax/base.rb +91 -64
- data/lib/regexp_parser/syntax/token/quantifier.rb +4 -4
- data/lib/regexp_parser/syntax/token/unicode_property.rb +26 -5
- data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b84a4bb274f31b8608c7dc9d55ff6f1b8d92d0d147976f38079ae7701a6debe
|
4
|
+
data.tar.gz: 41db5f094d0beafade30a1fac2707cbc827831e818c485ad35d7173f18c6a91a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dcde6135ac42db609402e47e04ee3be1da8854de286d2baad15dafee04d451814fd7a3bae7adc5440a1fced811e242b69f5fd14bcfc4f3bd5091f86769d56be
|
7
|
+
data.tar.gz: 2660d0fb28a972a1de53b71b16f8591e573d4214724b5eea8a452549598ff5d0fc5b731149e8332f65bce01c812f4d0d72135bba7e3016064d9f05202a8b5580
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,67 @@
|
|
1
|
+
## [Unreleased]
|
2
|
+
|
3
|
+
## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Fixed
|
6
|
+
|
7
|
+
- fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
|
8
|
+
- they used to be treated as reluctant or possessive mode indicators
|
9
|
+
- however, Ruby does not support these modes for interval quantifiers
|
10
|
+
- they are now treated as chained quantifiers instead, as Ruby does it
|
11
|
+
- c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
|
12
|
+
- fixed `Expression::Base#nesting_level` for some tree rewrite cases
|
13
|
+
- e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
|
14
|
+
- fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
|
15
|
+
- they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
|
16
|
+
- they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
|
17
|
+
|
18
|
+
### Added
|
19
|
+
|
20
|
+
- added `Expression::Base#==` for (deep) comparison of expressions
|
21
|
+
- added `Expression::Base#parts`
|
22
|
+
- returns the text elements and subexpressions of an expression
|
23
|
+
- e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
|
24
|
+
- added `Expression::Base#te` (a.k.a. token end index)
|
25
|
+
- `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
|
26
|
+
- made some `Expression::Base` methods available on `Quantifier` instances, too
|
27
|
+
- `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
|
28
|
+
- `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
|
29
|
+
- `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
|
30
|
+
- this allows a more unified handling with `Expression::Base` instances
|
31
|
+
- allowed `Quantifier#initialize` to take a token and options Hash like other nodes
|
32
|
+
- added a deprecation warning for initializing Quantifiers with 4+ arguments:
|
33
|
+
|
34
|
+
Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
|
35
|
+
is deprecated.
|
36
|
+
|
37
|
+
It will no longer be supported in regexp_parser v3.0.0.
|
38
|
+
|
39
|
+
Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode`
|
40
|
+
with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode
|
41
|
+
will be derived automatically.
|
42
|
+
|
43
|
+
This is consistent with how Expression::Base instances are created.
|
44
|
+
|
45
|
+
|
46
|
+
## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
|
47
|
+
|
48
|
+
### Fixed
|
49
|
+
|
50
|
+
- removed five inexistent unicode properties from `Syntax#features`
|
51
|
+
- these were never supported by Ruby or the `Regexp::Scanner`
|
52
|
+
- thanks to [Markus Schirp](https://github.com/mbj) for the report
|
53
|
+
|
54
|
+
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
55
|
+
|
56
|
+
### Added
|
57
|
+
|
58
|
+
- improved parsing performance through `Syntax` refactoring
|
59
|
+
- instead of fresh `Syntax` instances, pre-loaded constants are now re-used
|
60
|
+
- this approximately doubles the parsing speed for simple regexps
|
61
|
+
- added methods to `Syntax` classes to show relative feature sets
|
62
|
+
- e.g. `Regexp::Syntax::V3_2_0.added_features`
|
63
|
+
- support for new unicode properties of Ruby 3.2 / Unicode 14.0
|
64
|
+
|
1
65
|
## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
2
66
|
|
3
67
|
### Fixed
|
@@ -167,7 +231,7 @@
|
|
167
231
|
|
168
232
|
### Added
|
169
233
|
|
170
|
-
- `Expression#each_expression` and `#traverse` can now be called without a block
|
234
|
+
- `Expression::Base#each_expression` and `#traverse` can now be called without a block
|
171
235
|
* this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
|
172
236
|
* thanks to [Masataka Kuwabara](https://github.com/pocke)
|
173
237
|
|
@@ -193,7 +257,7 @@
|
|
193
257
|
- Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
|
194
258
|
- Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
|
195
259
|
- Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
|
196
|
-
- Fixed `Expression#match` and `#=~` not working with a single argument
|
260
|
+
- Fixed `Expression::Base#match` and `#=~` not working with a single argument
|
197
261
|
|
198
262
|
### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
|
199
263
|
|
@@ -201,15 +265,15 @@
|
|
201
265
|
|
202
266
|
- Added `#referenced_expression` for backrefs, subexp calls and conditionals
|
203
267
|
* returns the `Group` expression that is being referenced via name or number
|
204
|
-
- Added `Expression#repetitions`
|
268
|
+
- Added `Expression::Base#repetitions`
|
205
269
|
* returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
|
206
270
|
* like `#quantity` but with a more uniform interface
|
207
|
-
- Added `Expression#match_length`
|
271
|
+
- Added `Expression::Base#match_length`
|
208
272
|
* allows to inspect and iterate over String lengths matched by the Expression
|
209
273
|
|
210
274
|
### Fixed
|
211
275
|
|
212
|
-
- Fixed `Expression#clone` "direction"
|
276
|
+
- Fixed `Expression::Base#clone` "direction"
|
213
277
|
* it used to dup ivars onto the callee, leaving only the clone referencing the original objects
|
214
278
|
* this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
|
215
279
|
- Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
|
@@ -371,7 +435,7 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
371
435
|
- Fixed a thread safety issue (issue #45)
|
372
436
|
- Some public class methods that were only reliable for
|
373
437
|
internal use are now private instance methods (PR #46)
|
374
|
-
- Improved the usefulness of Expression#options (issue #43) -
|
438
|
+
- Improved the usefulness of Expression::Base#options (issue #43) -
|
375
439
|
#options and derived methods such as #i?, #m? and #x? are now
|
376
440
|
defined for all Expressions that are affected by such flags.
|
377
441
|
- Fixed scanning of whitespace following (?x) (commit 5c94bd2)
|
data/Gemfile
CHANGED
@@ -5,9 +5,10 @@ gemspec
|
|
5
5
|
group :development, :test do
|
6
6
|
gem 'ice_nine', '~> 0.11.2'
|
7
7
|
gem 'rake', '~> 13.0'
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
8
|
+
gem 'regexp_property_values', '~> 1.3'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
|
+
gem 'benchmark-ips', '~> 2.1'
|
11
12
|
gem 'gouteur'
|
12
13
|
gem 'rubocop', '~> 1.7'
|
13
14
|
end
|
data/README.md
CHANGED
@@ -157,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
|
|
157
157
|
flavor variations. Syntax only comes into play in the lexer.
|
158
158
|
|
159
159
|
#### Example
|
160
|
-
The following
|
160
|
+
The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
|
161
161
|
checks a few of their implementation features.
|
162
162
|
|
163
163
|
```ruby
|
164
164
|
require 'regexp_parser'
|
165
165
|
|
166
|
-
ruby_20 = Regexp::Syntax.
|
166
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0'
|
167
167
|
ruby_20.implements? :quantifier, :zero_or_one # => true
|
168
168
|
ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
|
169
169
|
ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
|
170
170
|
ruby_20.implements? :conditional, :condition # => true
|
171
171
|
|
172
|
-
ruby_19 = Regexp::Syntax.
|
172
|
+
ruby_19 = Regexp::Syntax.for 'ruby/1.9'
|
173
173
|
ruby_19.implements? :quantifier, :zero_or_one # => true
|
174
174
|
ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
|
175
175
|
ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
|
176
176
|
ruby_19.implements? :conditional, :condition # => false
|
177
177
|
|
178
|
-
ruby_18 = Regexp::Syntax.
|
178
|
+
ruby_18 = Regexp::Syntax.for 'ruby/1.8'
|
179
179
|
ruby_18.implements? :quantifier, :zero_or_one # => true
|
180
180
|
ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
|
181
181
|
ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
|
182
182
|
ruby_18.implements? :conditional, :condition # => false
|
183
183
|
```
|
184
184
|
|
185
|
+
Syntax objects can also be queried about their complete and relative feature sets.
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
require 'regexp_parser'
|
189
|
+
|
190
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
|
191
|
+
ruby_20.added_features # => { conditional: [...], ... }
|
192
|
+
ruby_20.removed_features # => { property: [:newline], ... }
|
193
|
+
ruby_20.features # => { anchor: [...], ... }
|
194
|
+
```
|
185
195
|
|
186
196
|
#### Notes
|
187
197
|
* Variations on a token, for example a named group with angle brackets (< and >)
|
@@ -357,12 +367,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
367
|
| **POSIX Classes** | `[:alpha:]`, `[:^digit:]` | ✓ |
|
358
368
|
| **Quantifiers** | | ⋱ |
|
359
369
|
|   _**Greedy**_ | `?`, `*`, `+`, `{m,M}` | ✓ |
|
360
|
-
|   _**Reluctant** (Lazy)_ | `??`, `*?`,
|
361
|
-
|   _**Possessive**_ | `?+`, `*+`,
|
370
|
+
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?` \[1\] | ✓ |
|
371
|
+
|   _**Possessive**_ | `?+`, `*+`, `++` \[1\] | ✓ |
|
362
372
|
| **String Escapes** | | ⋱ |
|
363
|
-
|   _**Control** \[
|
373
|
+
|   _**Control** \[2\]_ | `\C-C`, `\cD` | ✓ |
|
364
374
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
365
|
-
|   _**Meta** \[
|
375
|
+
|   _**Meta** \[2\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
366
376
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
367
377
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
368
378
|
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
@@ -374,7 +384,11 @@ _Note that not all of these are available in all versions of Ruby_
|
|
374
384
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
375
385
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
376
386
|
|
377
|
-
**\[1\]**:
|
387
|
+
**\[1\]**: Ruby does not support lazy or possessive interval quantifiers. Any `+` or `?` that follows an interval
|
388
|
+
quantifier will be treated as another, chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
|
389
|
+
[#69](https://github.com/ammar/regexp_parser/pull/69).
|
390
|
+
|
391
|
+
**\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
378
392
|
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
379
393
|
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
380
394
|
|
data/Rakefile
CHANGED
@@ -5,9 +5,7 @@ require 'rake'
|
|
5
5
|
require 'rake/testtask'
|
6
6
|
require 'rspec/core/rake_task'
|
7
7
|
|
8
|
-
|
9
|
-
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
10
|
-
RAGEL_SOURCE_FILES = %w[scanner] # scanner.rl imports the other files
|
8
|
+
Dir['tasks/**/*.rake'].each { |file| load(file) }
|
11
9
|
|
12
10
|
Bundler::GemHelper.install_tasks
|
13
11
|
|
@@ -19,60 +17,7 @@ namespace :test do
|
|
19
17
|
task full: [:'ragel:rb', :spec]
|
20
18
|
end
|
21
19
|
|
22
|
-
namespace :ragel do
|
23
|
-
desc "Process the ragel source files and output ruby code"
|
24
|
-
task :rb do
|
25
|
-
RAGEL_SOURCE_FILES.each do |source_file|
|
26
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
27
|
-
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
28
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
29
|
-
|
30
|
-
contents = File.read(output_file)
|
31
|
-
|
32
|
-
File.open(output_file, 'r+') do |file|
|
33
|
-
contents = "# -*- warn-indent:false; -*-\n" + contents
|
34
|
-
|
35
|
-
file.write(contents)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
desc "Delete the ragel generated source file(s)"
|
41
|
-
task :clean do
|
42
|
-
RAGEL_SOURCE_FILES.each do |file|
|
43
|
-
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
20
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
49
21
|
# latest scanner code is generated and included in the build.
|
50
22
|
desc "Runs ragel:rb before building the gem"
|
51
23
|
task :build => ['ragel:rb']
|
52
|
-
|
53
|
-
namespace :props do
|
54
|
-
desc 'Write new property value hashes for the properties scanner'
|
55
|
-
task :update do
|
56
|
-
require 'regexp_property_values'
|
57
|
-
RegexpPropertyValues.update
|
58
|
-
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
59
|
-
|
60
|
-
write_hash_to_file = ->(hash, path) do
|
61
|
-
File.open(path, 'w') do |f|
|
62
|
-
f.puts "# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT",
|
63
|
-
*hash.sort.map { |pair| pair.join(',') }
|
64
|
-
end
|
65
|
-
puts "Wrote #{hash.count} aliases to `#{path}`"
|
66
|
-
end
|
67
|
-
|
68
|
-
long_names_to_tokens = RegexpPropertyValues.all.map do |val|
|
69
|
-
[val.identifier, val.full_name.downcase]
|
70
|
-
end
|
71
|
-
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.csv")
|
72
|
-
|
73
|
-
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
74
|
-
[k.identifier, v.full_name.downcase]
|
75
|
-
end
|
76
|
-
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.csv")
|
77
|
-
end
|
78
|
-
end
|
data/lib/regexp_parser/error.rb
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Base
|
3
|
-
|
4
|
-
attr_accessor :text, :ts
|
5
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
-
|
7
|
-
attr_accessor :quantifier
|
8
|
-
attr_accessor :options
|
3
|
+
include Regexp::Expression::Shared
|
9
4
|
|
10
5
|
def initialize(token, options = {})
|
11
|
-
|
12
|
-
self.token = token.token
|
13
|
-
self.text = token.text
|
14
|
-
self.ts = token.ts
|
15
|
-
self.level = token.level
|
16
|
-
self.set_level = token.set_level
|
17
|
-
self.conditional_level = token.conditional_level
|
18
|
-
self.nesting_level = 0
|
19
|
-
self.quantifier = nil
|
20
|
-
self.options = options
|
6
|
+
init_from_token_and_options(token, options)
|
21
7
|
end
|
22
8
|
|
23
9
|
def initialize_copy(orig)
|
24
|
-
self.text =
|
25
|
-
self.options =
|
26
|
-
self.quantifier =
|
10
|
+
self.text = orig.text.dup if orig.text
|
11
|
+
self.options = orig.options.dup if orig.options
|
12
|
+
self.quantifier = orig.quantifier.clone if orig.quantifier
|
27
13
|
super
|
28
14
|
end
|
29
15
|
|
@@ -31,48 +17,14 @@ module Regexp::Expression
|
|
31
17
|
::Regexp.new(to_s(format))
|
32
18
|
end
|
33
19
|
|
34
|
-
|
35
|
-
|
36
|
-
def base_length
|
37
|
-
to_s(:base).length
|
38
|
-
end
|
39
|
-
|
40
|
-
def full_length
|
41
|
-
to_s.length
|
42
|
-
end
|
43
|
-
|
44
|
-
def offset
|
45
|
-
[starts_at, full_length]
|
46
|
-
end
|
47
|
-
|
48
|
-
def coded_offset
|
49
|
-
'@%d+%d' % offset
|
50
|
-
end
|
51
|
-
|
52
|
-
def to_s(format = :full)
|
53
|
-
"#{text}#{quantifier_affix(format)}"
|
54
|
-
end
|
55
|
-
|
56
|
-
def quantifier_affix(expression_format)
|
57
|
-
quantifier.to_s if quantified? && expression_format != :base
|
58
|
-
end
|
59
|
-
|
60
|
-
def terminal?
|
61
|
-
!respond_to?(:expressions)
|
62
|
-
end
|
63
|
-
|
64
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
20
|
+
def quantify(*args)
|
21
|
+
self.quantifier = Quantifier.new(*args)
|
66
22
|
end
|
67
23
|
|
68
24
|
def unquantified_clone
|
69
25
|
clone.tap { |exp| exp.quantifier = nil }
|
70
26
|
end
|
71
27
|
|
72
|
-
def quantified?
|
73
|
-
!quantifier.nil?
|
74
|
-
end
|
75
|
-
|
76
28
|
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
29
|
def quantity
|
78
30
|
return [nil,nil] unless quantified?
|
@@ -104,7 +56,7 @@ module Regexp::Expression
|
|
104
56
|
quantified? and quantifier.possessive?
|
105
57
|
end
|
106
58
|
|
107
|
-
def
|
59
|
+
def to_h
|
108
60
|
{
|
109
61
|
type: type,
|
110
62
|
token: token,
|
@@ -118,6 +70,6 @@ module Regexp::Expression
|
|
118
70
|
quantifier: quantified? ? quantifier.to_h : nil,
|
119
71
|
}
|
120
72
|
end
|
121
|
-
alias :
|
73
|
+
alias :attributes :to_h
|
122
74
|
end
|
123
75
|
end
|
@@ -20,8 +20,8 @@ module Regexp::Expression
|
|
20
20
|
self.closed = true
|
21
21
|
end
|
22
22
|
|
23
|
-
def
|
24
|
-
"#{text}#{'^' if negated?}
|
23
|
+
def parts
|
24
|
+
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end # module Regexp::Expression
|
@@ -55,8 +55,8 @@ module Regexp::Expression
|
|
55
55
|
condition.reference
|
56
56
|
end
|
57
57
|
|
58
|
-
def
|
59
|
-
|
58
|
+
def parts
|
59
|
+
[text.dup, condition, *intersperse(branches, '|'), ')']
|
60
60
|
end
|
61
61
|
|
62
62
|
def initialize_copy(orig)
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
|
4
|
+
def parts
|
5
|
+
[text.dup, *expressions, ')']
|
6
6
|
end
|
7
7
|
|
8
8
|
def capturing?; false end
|
@@ -18,9 +18,9 @@ module Regexp::Expression
|
|
18
18
|
super
|
19
19
|
end
|
20
20
|
|
21
|
-
def
|
21
|
+
def parts
|
22
22
|
if implicit?
|
23
|
-
|
23
|
+
expressions
|
24
24
|
else
|
25
25
|
super
|
26
26
|
end
|
@@ -65,8 +65,8 @@ module Regexp::Expression
|
|
65
65
|
end
|
66
66
|
|
67
67
|
class Comment < Group::Base
|
68
|
-
def
|
69
|
-
text.dup
|
68
|
+
def parts
|
69
|
+
[text.dup]
|
70
70
|
end
|
71
71
|
|
72
72
|
def comment?; true end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
2
|
+
module Shared
|
3
3
|
|
4
4
|
# Test if this expression has the given test_type, which can be either
|
5
5
|
# a symbol or an array of symbols to check against the expression's type.
|
@@ -93,5 +93,14 @@ module Regexp::Expression
|
|
93
93
|
"Array, Hash, or Symbol expected, #{scope.class.name} given"
|
94
94
|
end
|
95
95
|
end
|
96
|
+
|
97
|
+
# Deep-compare two expressions for equality.
|
98
|
+
def ==(other)
|
99
|
+
other.class == self.class &&
|
100
|
+
other.to_s == to_s &&
|
101
|
+
other.options == options
|
102
|
+
end
|
103
|
+
alias :=== :==
|
104
|
+
alias :eql? :==
|
96
105
|
end
|
97
106
|
end
|
@@ -1,26 +1,24 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
3
|
+
# call super in #initialize, but raise in #quantifier= and #quantify,
|
4
|
+
# or introduce an Expression::Quantifiable intermediate class.
|
5
|
+
# Or actually allow chaining as a more concise but tricky solution than PR#69.
|
2
6
|
class Quantifier
|
7
|
+
include Regexp::Expression::Shared
|
8
|
+
|
3
9
|
MODES = %i[greedy possessive reluctant]
|
4
10
|
|
5
|
-
attr_reader :
|
11
|
+
attr_reader :min, :max, :mode
|
6
12
|
|
7
|
-
def initialize(
|
8
|
-
|
9
|
-
@text = text
|
10
|
-
@mode = mode
|
11
|
-
@min = min
|
12
|
-
@max = max
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize_copy(orig)
|
16
|
-
@text = orig.text.dup
|
17
|
-
super
|
18
|
-
end
|
13
|
+
def initialize(*args)
|
14
|
+
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
19
15
|
|
20
|
-
|
21
|
-
|
16
|
+
init_from_token_and_options(*args)
|
17
|
+
@mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
|
+
@min, @max = minmax
|
19
|
+
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
|
+
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
22
21
|
end
|
23
|
-
alias :to_str :to_s
|
24
22
|
|
25
23
|
def to_h
|
26
24
|
{
|
@@ -41,13 +39,32 @@ module Regexp::Expression
|
|
41
39
|
end
|
42
40
|
alias :lazy? :reluctant?
|
43
41
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
42
|
+
private
|
43
|
+
|
44
|
+
def deprecated_old_init(token, text, min, max, mode = :greedy)
|
45
|
+
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
|
+
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
|
+
"Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode` "\
|
48
|
+
"with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode "\
|
49
|
+
"will be derived automatically. \nThis is consistent with how Expression::Base "\
|
50
|
+
"instances are created."
|
51
|
+
@token = token
|
52
|
+
@text = text
|
53
|
+
@min = min
|
54
|
+
@max = max
|
55
|
+
@mode = mode
|
56
|
+
end
|
57
|
+
|
58
|
+
def minmax
|
59
|
+
case token
|
60
|
+
when /zero_or_one/ then [0, 1]
|
61
|
+
when /zero_or_more/ then [0, -1]
|
62
|
+
when /one_or_more/ then [1, -1]
|
63
|
+
when :interval
|
64
|
+
int_min = text[/\{(\d*)/, 1]
|
65
|
+
int_max = text[/,?(\d*)\}/, 1]
|
66
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
67
|
+
end
|
50
68
|
end
|
51
|
-
alias :eq :==
|
52
69
|
end
|
53
70
|
end
|
@@ -39,12 +39,12 @@ module Regexp::Expression
|
|
39
39
|
end
|
40
40
|
alias :ts :starts_at
|
41
41
|
|
42
|
-
def quantify(
|
42
|
+
def quantify(*args)
|
43
43
|
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
44
44
|
target or raise Regexp::Parser::Error,
|
45
45
|
"No valid target found for '#{text}' quantifier"
|
46
46
|
|
47
|
-
target.quantify(
|
47
|
+
target.quantify(*args)
|
48
48
|
end
|
49
49
|
end
|
50
50
|
end
|