regexp_parser 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/Gemfile +1 -1
- data/README.md +9 -13
- data/lib/regexp_parser/expression.rb +33 -21
- data/lib/regexp_parser/expression/classes/backref.rb +18 -10
- data/lib/regexp_parser/expression/classes/conditional.rb +4 -0
- data/lib/regexp_parser/expression/classes/group.rb +4 -2
- data/lib/regexp_parser/expression/classes/keep.rb +1 -3
- data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +0 -4
- data/lib/regexp_parser/expression/subexpression.rb +3 -5
- data/lib/regexp_parser/lexer.rb +31 -24
- data/lib/regexp_parser/parser.rb +25 -3
- data/lib/regexp_parser/syntax/tokens.rb +2 -10
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -2
- data/spec/expression/base_spec.rb +80 -0
- data/spec/expression/clone_spec.rb +120 -0
- data/spec/expression/conditional_spec.rb +89 -0
- data/spec/expression/free_space_spec.rb +27 -0
- data/spec/expression/methods/match_length_spec.rb +141 -0
- data/spec/expression/methods/strfregexp_spec.rb +224 -0
- data/spec/expression/methods/tests_spec.rb +97 -0
- data/spec/expression/methods/traverse_spec.rb +140 -0
- data/spec/expression/subexpression_spec.rb +50 -0
- data/spec/expression/to_h_spec.rb +26 -0
- data/spec/expression/to_s_spec.rb +100 -0
- data/spec/lexer/all_spec.rb +22 -0
- data/{test/lexer/test_conditionals.rb → spec/lexer/conditionals_spec.rb} +31 -35
- data/spec/lexer/escapes_spec.rb +38 -0
- data/spec/lexer/keep_spec.rb +22 -0
- data/{test/lexer/test_literals.rb → spec/lexer/literals_spec.rb} +20 -24
- data/{test/lexer/test_nesting.rb → spec/lexer/nesting_spec.rb} +11 -13
- data/spec/lexer/refcalls_spec.rb +54 -0
- data/spec/parser/all_spec.rb +31 -0
- data/spec/parser/alternation_spec.rb +88 -0
- data/{test/parser/test_anchors.rb → spec/parser/anchors_spec.rb} +7 -10
- data/spec/parser/conditionals_spec.rb +179 -0
- data/spec/parser/errors_spec.rb +51 -0
- data/spec/parser/escapes_spec.rb +132 -0
- data/spec/parser/free_space_spec.rb +130 -0
- data/spec/parser/groups_spec.rb +267 -0
- data/spec/parser/keep_spec.rb +19 -0
- data/spec/parser/posix_classes_spec.rb +27 -0
- data/spec/parser/properties_spec.rb +127 -0
- data/spec/parser/quantifiers_spec.rb +293 -0
- data/spec/parser/refcalls_spec.rb +237 -0
- data/spec/parser/set/intersections_spec.rb +127 -0
- data/spec/parser/set/ranges_spec.rb +111 -0
- data/spec/parser/sets_spec.rb +178 -0
- data/{test/parser/test_types.rb → spec/parser/types_spec.rb} +13 -20
- data/spec/scanner/all_spec.rb +18 -0
- data/{test/scanner/test_anchors.rb → spec/scanner/anchors_spec.rb} +8 -10
- data/{test/scanner/test_conditionals.rb → spec/scanner/conditionals_spec.rb} +49 -53
- data/spec/scanner/errors_spec.rb +90 -0
- data/{test/scanner/test_escapes.rb → spec/scanner/escapes_spec.rb} +8 -10
- data/{test/scanner/test_free_space.rb → spec/scanner/free_space_spec.rb} +48 -52
- data/{test/scanner/test_groups.rb → spec/scanner/groups_spec.rb} +33 -41
- data/spec/scanner/keep_spec.rb +33 -0
- data/{test/scanner/test_literals.rb → spec/scanner/literals_spec.rb} +8 -12
- data/{test/scanner/test_meta.rb → spec/scanner/meta_spec.rb} +8 -10
- data/{test/scanner/test_properties.rb → spec/scanner/properties_spec.rb} +14 -19
- data/{test/scanner/test_quantifiers.rb → spec/scanner/quantifiers_spec.rb} +7 -9
- data/{test/scanner/test_refcalls.rb → spec/scanner/refcalls_spec.rb} +9 -9
- data/{test/scanner/test_scripts.rb → spec/scanner/scripts_spec.rb} +8 -12
- data/{test/scanner/test_sets.rb → spec/scanner/sets_spec.rb} +14 -17
- data/spec/scanner/types_spec.rb +29 -0
- data/spec/scanner/unicode_blocks_spec.rb +28 -0
- data/spec/spec_helper.rb +14 -0
- data/{test → spec}/support/runner.rb +9 -8
- data/{test → spec}/support/warning_extractor.rb +5 -7
- data/spec/syntax/syntax_spec.rb +44 -0
- data/spec/syntax/syntax_token_map_spec.rb +23 -0
- data/spec/syntax/versions/1.8.6_spec.rb +38 -0
- data/spec/syntax/versions/1.9.1_spec.rb +23 -0
- data/spec/syntax/versions/1.9.3_spec.rb +22 -0
- data/spec/syntax/versions/2.0.0_spec.rb +28 -0
- data/spec/syntax/versions/2.2.0_spec.rb +22 -0
- data/spec/syntax/versions/aliases_spec.rb +119 -0
- data/spec/token/token_spec.rb +85 -0
- metadata +131 -140
- data/test/expression/test_all.rb +0 -12
- data/test/expression/test_base.rb +0 -90
- data/test/expression/test_clone.rb +0 -89
- data/test/expression/test_conditionals.rb +0 -113
- data/test/expression/test_free_space.rb +0 -35
- data/test/expression/test_set.rb +0 -84
- data/test/expression/test_strfregexp.rb +0 -230
- data/test/expression/test_subexpression.rb +0 -58
- data/test/expression/test_tests.rb +0 -99
- data/test/expression/test_to_h.rb +0 -59
- data/test/expression/test_to_s.rb +0 -104
- data/test/expression/test_traverse.rb +0 -161
- data/test/helpers.rb +0 -10
- data/test/lexer/test_all.rb +0 -41
- data/test/lexer/test_keep.rb +0 -24
- data/test/lexer/test_refcalls.rb +0 -56
- data/test/parser/set/test_intersections.rb +0 -127
- data/test/parser/set/test_ranges.rb +0 -111
- data/test/parser/test_all.rb +0 -64
- data/test/parser/test_alternation.rb +0 -92
- data/test/parser/test_conditionals.rb +0 -187
- data/test/parser/test_errors.rb +0 -63
- data/test/parser/test_escapes.rb +0 -134
- data/test/parser/test_free_space.rb +0 -139
- data/test/parser/test_groups.rb +0 -289
- data/test/parser/test_keep.rb +0 -21
- data/test/parser/test_posix_classes.rb +0 -27
- data/test/parser/test_properties.rb +0 -134
- data/test/parser/test_quantifiers.rb +0 -301
- data/test/parser/test_refcalls.rb +0 -186
- data/test/parser/test_sets.rb +0 -179
- data/test/scanner/test_all.rb +0 -38
- data/test/scanner/test_errors.rb +0 -91
- data/test/scanner/test_keep.rb +0 -35
- data/test/scanner/test_types.rb +0 -35
- data/test/scanner/test_unicode_blocks.rb +0 -30
- data/test/support/disable_autotest.rb +0 -8
- data/test/syntax/test_all.rb +0 -6
- data/test/syntax/test_syntax.rb +0 -61
- data/test/syntax/test_syntax_token_map.rb +0 -25
- data/test/syntax/versions/test_1.8.rb +0 -55
- data/test/syntax/versions/test_1.9.1.rb +0 -36
- data/test/syntax/versions/test_1.9.3.rb +0 -32
- data/test/syntax/versions/test_2.0.0.rb +0 -37
- data/test/syntax/versions/test_2.2.0.rb +0 -32
- data/test/syntax/versions/test_aliases.rb +0 -129
- data/test/syntax/versions/test_all.rb +0 -5
- data/test/test_all.rb +0 -5
- data/test/token/test_all.rb +0 -2
- data/test/token/test_token.rb +0 -107
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 707834a32bc2295b448953730eabddabb11bafc68fdf3148174f7be61d8b1f30
|
4
|
+
data.tar.gz: 72d199d28c342d6aae178a5876a7df6f59abcdf40c6ac4f05ea9dc40d16d9f3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c1402afedc1efb79f633ee93065598b64732519ba587ca3f682eb8bbb4aaa264e31dd916b95f9751f7c6e85e867efa260501b40e55409327efa5b769346a183
|
7
|
+
data.tar.gz: 6335cbc411b08adb64bfca9646eebc3a5c39d4651a2495d34f87fca21927da3a363fc320159a3732cdd9e2d8732986190fcd6c9d523b7308531f91848951ccbd
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,31 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Added
|
6
|
+
|
7
|
+
- Added `#referenced_expression` for backrefs, subexp calls and conditionals
|
8
|
+
* returns the `Group` expression that is being referenced via name or number
|
9
|
+
- Added `Expression#repetitions`
|
10
|
+
* returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
|
11
|
+
* like `#quantity` but with a more uniform interface
|
12
|
+
- Added `Expression#match_length`
|
13
|
+
* allows to inspect and iterate over String lengths matched by the Expression
|
14
|
+
|
15
|
+
### Fixed
|
16
|
+
|
17
|
+
- Fixed `Expression#clone` "direction"
|
18
|
+
* it used to dup ivars onto the callee, leaving only the clone referencing the original objects
|
19
|
+
* this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
|
20
|
+
- Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
|
21
|
+
* the inner `#text` was cloned onto the `Sequence` and thus duplicated
|
22
|
+
* e.g. `Regexp::Parser.parse(/(a|bc)/).clone.to_s # => (aa|bcbc)`
|
23
|
+
- Fixed inconsistent `#to_s` output for `Sequences`
|
24
|
+
* it used to return only the "specific" text, e.g. "|" for an alternation
|
25
|
+
* now it includes nested expressions as it does for all other `Subexpressions`
|
26
|
+
- Fixed quantification of codepoint lists with more than one entry (`\u{62 63 64}+`)
|
27
|
+
* quantifiers apply only to the last entry, so this token is now split up if quantified
|
28
|
+
|
3
29
|
### [1.4.0] - 2019-04-02 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
30
|
|
5
31
|
### Added
|
@@ -75,7 +101,7 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
75
101
|
- Changed `(?m)` and the likes to emit as `:options_switch` token (@4ade4d1)
|
76
102
|
* allows differentiating from group-local `:options`, e.g. `(?m:.)`
|
77
103
|
- Changed name of `Backreference::..NestLevel` to `..RecursionLevel` (@4184339)
|
78
|
-
- Changed
|
104
|
+
- Changed `Backreference::Number#number` from `String` to `Integer` (@40a2231)
|
79
105
|
|
80
106
|
### Added
|
81
107
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -391,28 +391,22 @@ To run the tests simply run rake from the root directory, as 'test' is the defau
|
|
391
391
|
|
392
392
|
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
393
393
|
|
394
|
-
The tests use
|
394
|
+
The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
|
395
395
|
|
396
396
|
```
|
397
397
|
bin/test
|
398
398
|
```
|
399
399
|
|
400
|
-
|
400
|
+
You can run a specific test like so:
|
401
401
|
|
402
402
|
```
|
403
|
-
bin/test
|
403
|
+
bin/test spec/scanner/properties_spec.rb
|
404
404
|
```
|
405
405
|
|
406
|
-
|
406
|
+
Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
|
407
407
|
|
408
408
|
```
|
409
|
-
bin/test
|
410
|
-
```
|
411
|
-
|
412
|
-
Note that changes to Ragel files will not be reflected when using `bin/test`, so you might want to run:
|
413
|
-
|
414
|
-
```
|
415
|
-
rake ragel:rb && bin/test test/scanner/test_properties.rb
|
409
|
+
rake ragel:rb && bin/test spec/scanner/properties_spec.rb
|
416
410
|
```
|
417
411
|
|
418
412
|
## Building
|
@@ -440,7 +434,9 @@ Projects using regexp_parser.
|
|
440
434
|
|
441
435
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
442
436
|
|
443
|
-
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
437
|
+
- [mutant](https://github.com/mbj/mutant) (before v0.9.0) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
438
|
+
|
439
|
+
- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) uses regexp_parser to generate examples of postal codes.
|
444
440
|
|
445
441
|
- [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
|
446
442
|
|
@@ -471,4 +467,4 @@ Documentation and books used while working on this project.
|
|
471
467
|
|
472
468
|
---
|
473
469
|
##### Copyright
|
474
|
-
_Copyright (c) 2010-
|
470
|
+
_Copyright (c) 2010-2019 Ammar Ali. See LICENSE file for details._
|
@@ -21,10 +21,10 @@ module Regexp::Expression
|
|
21
21
|
self.options = options
|
22
22
|
end
|
23
23
|
|
24
|
-
def initialize_clone(
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
def initialize_clone(orig)
|
25
|
+
self.text = (orig.text ? orig.text.dup : nil)
|
26
|
+
self.options = (orig.options ? orig.options.dup : nil)
|
27
|
+
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
28
28
|
super
|
29
29
|
end
|
30
30
|
|
@@ -62,15 +62,28 @@ module Regexp::Expression
|
|
62
62
|
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
63
63
|
end
|
64
64
|
|
65
|
+
def unquantified_clone
|
66
|
+
clone.tap { |exp| exp.quantifier = nil }
|
67
|
+
end
|
68
|
+
|
65
69
|
def quantified?
|
66
70
|
!quantifier.nil?
|
67
71
|
end
|
68
72
|
|
73
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
69
74
|
def quantity
|
70
75
|
return [nil,nil] unless quantified?
|
71
76
|
[quantifier.min, quantifier.max]
|
72
77
|
end
|
73
78
|
|
79
|
+
def repetitions
|
80
|
+
return 1..1 unless quantified?
|
81
|
+
min = quantifier.min
|
82
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
83
|
+
# fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
|
84
|
+
(min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
|
85
|
+
end
|
86
|
+
|
74
87
|
def greedy?
|
75
88
|
quantified? and quantifier.greedy?
|
76
89
|
end
|
@@ -101,22 +114,20 @@ module Regexp::Expression
|
|
101
114
|
alias :x? :free_spacing?
|
102
115
|
alias :extended? :free_spacing?
|
103
116
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
alias :d? :default_classes?
|
117
|
+
def default_classes?
|
118
|
+
options[:d] == true
|
119
|
+
end
|
120
|
+
alias :d? :default_classes?
|
109
121
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
122
|
+
def ascii_classes?
|
123
|
+
options[:a] == true
|
124
|
+
end
|
125
|
+
alias :a? :ascii_classes?
|
114
126
|
|
115
|
-
|
116
|
-
|
117
|
-
end
|
118
|
-
alias :u? :unicode_classes?
|
127
|
+
def unicode_classes?
|
128
|
+
options[:u] == true
|
119
129
|
end
|
130
|
+
alias :u? :unicode_classes?
|
120
131
|
|
121
132
|
def matches?(string)
|
122
133
|
Regexp.new(to_s) =~ string ? true : false
|
@@ -161,10 +172,6 @@ module Regexp::Expression
|
|
161
172
|
|
162
173
|
end # module Regexp::Expression
|
163
174
|
|
164
|
-
require 'regexp_parser/expression/methods/tests'
|
165
|
-
require 'regexp_parser/expression/methods/traverse'
|
166
|
-
require 'regexp_parser/expression/methods/strfregexp'
|
167
|
-
|
168
175
|
require 'regexp_parser/expression/quantifier'
|
169
176
|
require 'regexp_parser/expression/subexpression'
|
170
177
|
require 'regexp_parser/expression/sequence'
|
@@ -186,3 +193,8 @@ require 'regexp_parser/expression/classes/set'
|
|
186
193
|
require 'regexp_parser/expression/classes/set/intersection'
|
187
194
|
require 'regexp_parser/expression/classes/set/range'
|
188
195
|
require 'regexp_parser/expression/classes/type'
|
196
|
+
|
197
|
+
require 'regexp_parser/expression/methods/match_length'
|
198
|
+
require 'regexp_parser/expression/methods/strfregexp'
|
199
|
+
require 'regexp_parser/expression/methods/tests'
|
200
|
+
require 'regexp_parser/expression/methods/traverse'
|
@@ -1,9 +1,12 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Backreference
|
3
|
-
class Base < Regexp::Expression::Base
|
3
|
+
class Base < Regexp::Expression::Base
|
4
|
+
attr_accessor :referenced_expression
|
5
|
+
end
|
4
6
|
|
5
7
|
class Number < Backreference::Base
|
6
8
|
attr_reader :number
|
9
|
+
alias reference number
|
7
10
|
|
8
11
|
def initialize(token, options = {})
|
9
12
|
@number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
|
@@ -13,6 +16,7 @@ module Regexp::Expression
|
|
13
16
|
|
14
17
|
class Name < Backreference::Base
|
15
18
|
attr_reader :name
|
19
|
+
alias reference name
|
16
20
|
|
17
21
|
def initialize(token, options = {})
|
18
22
|
@name = token.text[3..-2]
|
@@ -20,27 +24,31 @@ module Regexp::Expression
|
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
27
|
+
class NumberRelative < Backreference::Number
|
28
|
+
attr_accessor :effective_number
|
29
|
+
alias reference effective_number
|
30
|
+
end
|
31
|
+
|
23
32
|
class NumberCall < Backreference::Number; end
|
24
|
-
class
|
25
|
-
class NumberCallRelative < Backreference::
|
26
|
-
class NameCall < Backreference::Name; end
|
33
|
+
class NameCall < Backreference::Name; end
|
34
|
+
class NumberCallRelative < Backreference::NumberRelative; end
|
27
35
|
|
28
|
-
class NumberRecursionLevel < Backreference::
|
29
|
-
attr_reader :
|
36
|
+
class NumberRecursionLevel < Backreference::Number
|
37
|
+
attr_reader :recursion_level
|
30
38
|
|
31
39
|
def initialize(token, options = {})
|
32
|
-
@number, @recursion_level = token.text[3..-2].split(/(?=[+-])/).map(&:to_i)
|
33
40
|
super
|
41
|
+
@number, @recursion_level = token.text[3..-2].split(/(?=[+-])/).map(&:to_i)
|
34
42
|
end
|
35
43
|
end
|
36
44
|
|
37
|
-
class NameRecursionLevel < Backreference::
|
38
|
-
attr_reader :
|
45
|
+
class NameRecursionLevel < Backreference::Name
|
46
|
+
attr_reader :recursion_level
|
39
47
|
|
40
48
|
def initialize(token, options = {})
|
49
|
+
super
|
41
50
|
@name, recursion_level = token.text[3..-2].split(/(?=[+-])/)
|
42
51
|
@recursion_level = recursion_level.to_i
|
43
|
-
super
|
44
52
|
end
|
45
53
|
end
|
46
54
|
end
|
@@ -7,6 +7,8 @@ module Regexp::Expression
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class Condition < Regexp::Expression::Base
|
10
|
+
attr_accessor :referenced_expression
|
11
|
+
|
10
12
|
# Name or number of the referenced capturing group that determines state.
|
11
13
|
# Returns a String if reference is by name, Integer if by number.
|
12
14
|
def reference
|
@@ -18,6 +20,8 @@ module Regexp::Expression
|
|
18
20
|
class Branch < Regexp::Expression::Sequence; end
|
19
21
|
|
20
22
|
class Expression < Regexp::Expression::Subexpression
|
23
|
+
attr_accessor :referenced_expression
|
24
|
+
|
21
25
|
def <<(exp)
|
22
26
|
expressions.last << exp
|
23
27
|
end
|
@@ -19,20 +19,22 @@ module Regexp::Expression
|
|
19
19
|
|
20
20
|
class Capture < Group::Base
|
21
21
|
attr_accessor :number, :number_at_level
|
22
|
+
alias identifier number
|
22
23
|
|
23
24
|
def capturing?; true end
|
24
25
|
end
|
25
26
|
|
26
27
|
class Named < Group::Capture
|
27
28
|
attr_reader :name
|
29
|
+
alias identifier name
|
28
30
|
|
29
31
|
def initialize(token, options = {})
|
30
32
|
@name = token.text[3..-2]
|
31
33
|
super
|
32
34
|
end
|
33
35
|
|
34
|
-
def initialize_clone(
|
35
|
-
|
36
|
+
def initialize_clone(orig)
|
37
|
+
@name = orig.name.dup
|
36
38
|
super
|
37
39
|
end
|
38
40
|
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
class Regexp::MatchLength
|
2
|
+
include Enumerable
|
3
|
+
|
4
|
+
def self.of(obj)
|
5
|
+
exp = obj.is_a?(Regexp::Expression::Base) ? obj : Regexp::Parser.parse(obj)
|
6
|
+
exp.match_length
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(exp, opts = {})
|
10
|
+
self.exp_class = exp.class
|
11
|
+
self.min_rep = exp.repetitions.min
|
12
|
+
self.max_rep = exp.repetitions.max
|
13
|
+
if base = opts[:base]
|
14
|
+
self.base_min = base
|
15
|
+
self.base_max = base
|
16
|
+
self.reify = ->{ '.' * base }
|
17
|
+
else
|
18
|
+
self.base_min = opts.fetch(:base_min)
|
19
|
+
self.base_max = opts.fetch(:base_max)
|
20
|
+
self.reify = opts.fetch(:reify)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def each(opts = {})
|
25
|
+
return enum_for(__method__) unless block_given?
|
26
|
+
limit = opts[:limit] || 1000
|
27
|
+
yielded = 0
|
28
|
+
(min..max).each do |num|
|
29
|
+
next unless include?(num)
|
30
|
+
yield(num)
|
31
|
+
break if (yielded += 1) >= limit
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def endless_each(&block)
|
36
|
+
return enum_for(__method__) unless block_given?
|
37
|
+
(min..max).each { |num| yield(num) if include?(num) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def include?(length)
|
41
|
+
test_regexp.match?('X' * length)
|
42
|
+
end
|
43
|
+
|
44
|
+
def fixed?
|
45
|
+
min == max
|
46
|
+
end
|
47
|
+
|
48
|
+
def min
|
49
|
+
min_rep * base_min
|
50
|
+
end
|
51
|
+
|
52
|
+
def max
|
53
|
+
max_rep * base_max
|
54
|
+
end
|
55
|
+
|
56
|
+
def minmax
|
57
|
+
[min, max]
|
58
|
+
end
|
59
|
+
|
60
|
+
def inspect
|
61
|
+
type = exp_class.name.sub('Regexp::Expression::', '')
|
62
|
+
"#<#{self.class}<#{type}> min=#{min} max=#{max}>"
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_re
|
66
|
+
"(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
|
+
|
73
|
+
def test_regexp
|
74
|
+
@test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
|
75
|
+
regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
module Regexp::Expression
|
81
|
+
MatchLength = Regexp::MatchLength
|
82
|
+
|
83
|
+
[
|
84
|
+
CharacterSet,
|
85
|
+
CharacterSet::Intersection,
|
86
|
+
CharacterSet::IntersectedSequence,
|
87
|
+
CharacterSet::Range,
|
88
|
+
CharacterType::Base,
|
89
|
+
EscapeSequence::Base,
|
90
|
+
PosixClass,
|
91
|
+
UnicodeProperty::Base,
|
92
|
+
].each do |klass|
|
93
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
94
|
+
def match_length
|
95
|
+
MatchLength.new(self, base: 1)
|
96
|
+
end
|
97
|
+
RUBY
|
98
|
+
end
|
99
|
+
|
100
|
+
class Literal
|
101
|
+
def match_length
|
102
|
+
MatchLength.new(self, base: text.length)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
class Subexpression
|
107
|
+
def match_length
|
108
|
+
MatchLength.new(self,
|
109
|
+
base_min: map { |exp| exp.match_length.min }.inject(0, :+),
|
110
|
+
base_max: map { |exp| exp.match_length.max }.inject(0, :+),
|
111
|
+
reify: ->{ map { |exp| exp.match_length.to_re }.join })
|
112
|
+
end
|
113
|
+
|
114
|
+
def inner_match_length
|
115
|
+
dummy = Regexp::Expression::Root.build
|
116
|
+
dummy.expressions = expressions.map(&:clone)
|
117
|
+
dummy.quantifier = quantifier && quantifier.clone
|
118
|
+
dummy.match_length
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
[
|
123
|
+
Alternation,
|
124
|
+
Conditional::Expression,
|
125
|
+
].each do |klass|
|
126
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
127
|
+
def match_length
|
128
|
+
MatchLength.new(self,
|
129
|
+
base_min: map { |exp| exp.match_length.min }.min,
|
130
|
+
base_max: map { |exp| exp.match_length.max }.max,
|
131
|
+
reify: ->{ map { |exp| exp.match_length.to_re }.join('|') })
|
132
|
+
end
|
133
|
+
RUBY
|
134
|
+
end
|
135
|
+
|
136
|
+
[
|
137
|
+
Anchor::Base,
|
138
|
+
Assertion::Base,
|
139
|
+
Conditional::Condition,
|
140
|
+
FreeSpace,
|
141
|
+
Keep::Mark,
|
142
|
+
].each do |klass|
|
143
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
144
|
+
def match_length
|
145
|
+
MatchLength.new(self, base: 0)
|
146
|
+
end
|
147
|
+
RUBY
|
148
|
+
end
|
149
|
+
|
150
|
+
class Backreference::Base
|
151
|
+
def match_length
|
152
|
+
if referenced_expression.nil?
|
153
|
+
raise ArgumentError, 'Missing referenced_expression - not parsed?'
|
154
|
+
end
|
155
|
+
referenced_expression.unquantified_clone.match_length
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
class EscapeSequence::CodepointList
|
160
|
+
def match_length
|
161
|
+
MatchLength.new(self, base: codepoints.count)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Special case. Absence group can match 0.. chars, irrespective of content.
|
166
|
+
# TODO: in theory, they *can* exclude match lengths with `.`: `(?~.{3})`
|
167
|
+
class Group::Absence
|
168
|
+
def match_length
|
169
|
+
MatchLength.new(self, base_min: 0, base_max: Float::INFINITY, reify: ->{ '.*' })
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|