regexp_parser 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/Gemfile +1 -1
- data/README.md +9 -13
- data/lib/regexp_parser/expression.rb +33 -21
- data/lib/regexp_parser/expression/classes/backref.rb +18 -10
- data/lib/regexp_parser/expression/classes/conditional.rb +4 -0
- data/lib/regexp_parser/expression/classes/group.rb +4 -2
- data/lib/regexp_parser/expression/classes/keep.rb +1 -3
- data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +0 -4
- data/lib/regexp_parser/expression/subexpression.rb +3 -5
- data/lib/regexp_parser/lexer.rb +31 -24
- data/lib/regexp_parser/parser.rb +25 -3
- data/lib/regexp_parser/syntax/tokens.rb +2 -10
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -2
- data/spec/expression/base_spec.rb +80 -0
- data/spec/expression/clone_spec.rb +120 -0
- data/spec/expression/conditional_spec.rb +89 -0
- data/spec/expression/free_space_spec.rb +27 -0
- data/spec/expression/methods/match_length_spec.rb +141 -0
- data/spec/expression/methods/strfregexp_spec.rb +224 -0
- data/spec/expression/methods/tests_spec.rb +97 -0
- data/spec/expression/methods/traverse_spec.rb +140 -0
- data/spec/expression/subexpression_spec.rb +50 -0
- data/spec/expression/to_h_spec.rb +26 -0
- data/spec/expression/to_s_spec.rb +100 -0
- data/spec/lexer/all_spec.rb +22 -0
- data/{test/lexer/test_conditionals.rb → spec/lexer/conditionals_spec.rb} +31 -35
- data/spec/lexer/escapes_spec.rb +38 -0
- data/spec/lexer/keep_spec.rb +22 -0
- data/{test/lexer/test_literals.rb → spec/lexer/literals_spec.rb} +20 -24
- data/{test/lexer/test_nesting.rb → spec/lexer/nesting_spec.rb} +11 -13
- data/spec/lexer/refcalls_spec.rb +54 -0
- data/spec/parser/all_spec.rb +31 -0
- data/spec/parser/alternation_spec.rb +88 -0
- data/{test/parser/test_anchors.rb → spec/parser/anchors_spec.rb} +7 -10
- data/spec/parser/conditionals_spec.rb +179 -0
- data/spec/parser/errors_spec.rb +51 -0
- data/spec/parser/escapes_spec.rb +132 -0
- data/spec/parser/free_space_spec.rb +130 -0
- data/spec/parser/groups_spec.rb +267 -0
- data/spec/parser/keep_spec.rb +19 -0
- data/spec/parser/posix_classes_spec.rb +27 -0
- data/spec/parser/properties_spec.rb +127 -0
- data/spec/parser/quantifiers_spec.rb +293 -0
- data/spec/parser/refcalls_spec.rb +237 -0
- data/spec/parser/set/intersections_spec.rb +127 -0
- data/spec/parser/set/ranges_spec.rb +111 -0
- data/spec/parser/sets_spec.rb +178 -0
- data/{test/parser/test_types.rb → spec/parser/types_spec.rb} +13 -20
- data/spec/scanner/all_spec.rb +18 -0
- data/{test/scanner/test_anchors.rb → spec/scanner/anchors_spec.rb} +8 -10
- data/{test/scanner/test_conditionals.rb → spec/scanner/conditionals_spec.rb} +49 -53
- data/spec/scanner/errors_spec.rb +90 -0
- data/{test/scanner/test_escapes.rb → spec/scanner/escapes_spec.rb} +8 -10
- data/{test/scanner/test_free_space.rb → spec/scanner/free_space_spec.rb} +48 -52
- data/{test/scanner/test_groups.rb → spec/scanner/groups_spec.rb} +33 -41
- data/spec/scanner/keep_spec.rb +33 -0
- data/{test/scanner/test_literals.rb → spec/scanner/literals_spec.rb} +8 -12
- data/{test/scanner/test_meta.rb → spec/scanner/meta_spec.rb} +8 -10
- data/{test/scanner/test_properties.rb → spec/scanner/properties_spec.rb} +14 -19
- data/{test/scanner/test_quantifiers.rb → spec/scanner/quantifiers_spec.rb} +7 -9
- data/{test/scanner/test_refcalls.rb → spec/scanner/refcalls_spec.rb} +9 -9
- data/{test/scanner/test_scripts.rb → spec/scanner/scripts_spec.rb} +8 -12
- data/{test/scanner/test_sets.rb → spec/scanner/sets_spec.rb} +14 -17
- data/spec/scanner/types_spec.rb +29 -0
- data/spec/scanner/unicode_blocks_spec.rb +28 -0
- data/spec/spec_helper.rb +14 -0
- data/{test → spec}/support/runner.rb +9 -8
- data/{test → spec}/support/warning_extractor.rb +5 -7
- data/spec/syntax/syntax_spec.rb +44 -0
- data/spec/syntax/syntax_token_map_spec.rb +23 -0
- data/spec/syntax/versions/1.8.6_spec.rb +38 -0
- data/spec/syntax/versions/1.9.1_spec.rb +23 -0
- data/spec/syntax/versions/1.9.3_spec.rb +22 -0
- data/spec/syntax/versions/2.0.0_spec.rb +28 -0
- data/spec/syntax/versions/2.2.0_spec.rb +22 -0
- data/spec/syntax/versions/aliases_spec.rb +119 -0
- data/spec/token/token_spec.rb +85 -0
- metadata +131 -140
- data/test/expression/test_all.rb +0 -12
- data/test/expression/test_base.rb +0 -90
- data/test/expression/test_clone.rb +0 -89
- data/test/expression/test_conditionals.rb +0 -113
- data/test/expression/test_free_space.rb +0 -35
- data/test/expression/test_set.rb +0 -84
- data/test/expression/test_strfregexp.rb +0 -230
- data/test/expression/test_subexpression.rb +0 -58
- data/test/expression/test_tests.rb +0 -99
- data/test/expression/test_to_h.rb +0 -59
- data/test/expression/test_to_s.rb +0 -104
- data/test/expression/test_traverse.rb +0 -161
- data/test/helpers.rb +0 -10
- data/test/lexer/test_all.rb +0 -41
- data/test/lexer/test_keep.rb +0 -24
- data/test/lexer/test_refcalls.rb +0 -56
- data/test/parser/set/test_intersections.rb +0 -127
- data/test/parser/set/test_ranges.rb +0 -111
- data/test/parser/test_all.rb +0 -64
- data/test/parser/test_alternation.rb +0 -92
- data/test/parser/test_conditionals.rb +0 -187
- data/test/parser/test_errors.rb +0 -63
- data/test/parser/test_escapes.rb +0 -134
- data/test/parser/test_free_space.rb +0 -139
- data/test/parser/test_groups.rb +0 -289
- data/test/parser/test_keep.rb +0 -21
- data/test/parser/test_posix_classes.rb +0 -27
- data/test/parser/test_properties.rb +0 -134
- data/test/parser/test_quantifiers.rb +0 -301
- data/test/parser/test_refcalls.rb +0 -186
- data/test/parser/test_sets.rb +0 -179
- data/test/scanner/test_all.rb +0 -38
- data/test/scanner/test_errors.rb +0 -91
- data/test/scanner/test_keep.rb +0 -35
- data/test/scanner/test_types.rb +0 -35
- data/test/scanner/test_unicode_blocks.rb +0 -30
- data/test/support/disable_autotest.rb +0 -8
- data/test/syntax/test_all.rb +0 -6
- data/test/syntax/test_syntax.rb +0 -61
- data/test/syntax/test_syntax_token_map.rb +0 -25
- data/test/syntax/versions/test_1.8.rb +0 -55
- data/test/syntax/versions/test_1.9.1.rb +0 -36
- data/test/syntax/versions/test_1.9.3.rb +0 -32
- data/test/syntax/versions/test_2.0.0.rb +0 -37
- data/test/syntax/versions/test_2.2.0.rb +0 -32
- data/test/syntax/versions/test_aliases.rb +0 -129
- data/test/syntax/versions/test_all.rb +0 -5
- data/test/test_all.rb +0 -5
- data/test/token/test_all.rb +0 -2
- data/test/token/test_token.rb +0 -107
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 707834a32bc2295b448953730eabddabb11bafc68fdf3148174f7be61d8b1f30
|
4
|
+
data.tar.gz: 72d199d28c342d6aae178a5876a7df6f59abcdf40c6ac4f05ea9dc40d16d9f3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c1402afedc1efb79f633ee93065598b64732519ba587ca3f682eb8bbb4aaa264e31dd916b95f9751f7c6e85e867efa260501b40e55409327efa5b769346a183
|
7
|
+
data.tar.gz: 6335cbc411b08adb64bfca9646eebc3a5c39d4651a2495d34f87fca21927da3a363fc320159a3732cdd9e2d8732986190fcd6c9d523b7308531f91848951ccbd
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,31 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Added
|
6
|
+
|
7
|
+
- Added `#referenced_expression` for backrefs, subexp calls and conditionals
|
8
|
+
* returns the `Group` expression that is being referenced via name or number
|
9
|
+
- Added `Expression#repetitions`
|
10
|
+
* returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
|
11
|
+
* like `#quantity` but with a more uniform interface
|
12
|
+
- Added `Expression#match_length`
|
13
|
+
* allows to inspect and iterate over String lengths matched by the Expression
|
14
|
+
|
15
|
+
### Fixed
|
16
|
+
|
17
|
+
- Fixed `Expression#clone` "direction"
|
18
|
+
* it used to dup ivars onto the callee, leaving only the clone referencing the original objects
|
19
|
+
* this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
|
20
|
+
- Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
|
21
|
+
* the inner `#text` was cloned onto the `Sequence` and thus duplicated
|
22
|
+
* e.g. `Regexp::Parser.parse(/(a|bc)/).clone.to_s # => (aa|bcbc)`
|
23
|
+
- Fixed inconsistent `#to_s` output for `Sequences`
|
24
|
+
* it used to return only the "specific" text, e.g. "|" for an alternation
|
25
|
+
* now it includes nested expressions as it does for all other `Subexpressions`
|
26
|
+
- Fixed quantification of codepoint lists with more than one entry (`\u{62 63 64}+`)
|
27
|
+
* quantifiers apply only to the last entry, so this token is now split up if quantified
|
28
|
+
|
3
29
|
### [1.4.0] - 2019-04-02 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
30
|
|
5
31
|
### Added
|
@@ -75,7 +101,7 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
75
101
|
- Changed `(?m)` and the likes to emit as `:options_switch` token (@4ade4d1)
|
76
102
|
* allows differentiating from group-local `:options`, e.g. `(?m:.)`
|
77
103
|
- Changed name of `Backreference::..NestLevel` to `..RecursionLevel` (@4184339)
|
78
|
-
- Changed
|
104
|
+
- Changed `Backreference::Number#number` from `String` to `Integer` (@40a2231)
|
79
105
|
|
80
106
|
### Added
|
81
107
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -391,28 +391,22 @@ To run the tests simply run rake from the root directory, as 'test' is the defau
|
|
391
391
|
|
392
392
|
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
393
393
|
|
394
|
-
The tests use
|
394
|
+
The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
|
395
395
|
|
396
396
|
```
|
397
397
|
bin/test
|
398
398
|
```
|
399
399
|
|
400
|
-
|
400
|
+
You can run a specific test like so:
|
401
401
|
|
402
402
|
```
|
403
|
-
bin/test
|
403
|
+
bin/test spec/scanner/properties_spec.rb
|
404
404
|
```
|
405
405
|
|
406
|
-
|
406
|
+
Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
|
407
407
|
|
408
408
|
```
|
409
|
-
bin/test
|
410
|
-
```
|
411
|
-
|
412
|
-
Note that changes to Ragel files will not be reflected when using `bin/test`, so you might want to run:
|
413
|
-
|
414
|
-
```
|
415
|
-
rake ragel:rb && bin/test test/scanner/test_properties.rb
|
409
|
+
rake ragel:rb && bin/test spec/scanner/properties_spec.rb
|
416
410
|
```
|
417
411
|
|
418
412
|
## Building
|
@@ -440,7 +434,9 @@ Projects using regexp_parser.
|
|
440
434
|
|
441
435
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
442
436
|
|
443
|
-
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
437
|
+
- [mutant](https://github.com/mbj/mutant) (before v0.9.0) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
438
|
+
|
439
|
+
- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) uses regexp_parser to generate examples of postal codes.
|
444
440
|
|
445
441
|
- [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
|
446
442
|
|
@@ -471,4 +467,4 @@ Documentation and books used while working on this project.
|
|
471
467
|
|
472
468
|
---
|
473
469
|
##### Copyright
|
474
|
-
_Copyright (c) 2010-
|
470
|
+
_Copyright (c) 2010-2019 Ammar Ali. See LICENSE file for details._
|
@@ -21,10 +21,10 @@ module Regexp::Expression
|
|
21
21
|
self.options = options
|
22
22
|
end
|
23
23
|
|
24
|
-
def initialize_clone(
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
def initialize_clone(orig)
|
25
|
+
self.text = (orig.text ? orig.text.dup : nil)
|
26
|
+
self.options = (orig.options ? orig.options.dup : nil)
|
27
|
+
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
28
28
|
super
|
29
29
|
end
|
30
30
|
|
@@ -62,15 +62,28 @@ module Regexp::Expression
|
|
62
62
|
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
63
63
|
end
|
64
64
|
|
65
|
+
def unquantified_clone
|
66
|
+
clone.tap { |exp| exp.quantifier = nil }
|
67
|
+
end
|
68
|
+
|
65
69
|
def quantified?
|
66
70
|
!quantifier.nil?
|
67
71
|
end
|
68
72
|
|
73
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
69
74
|
def quantity
|
70
75
|
return [nil,nil] unless quantified?
|
71
76
|
[quantifier.min, quantifier.max]
|
72
77
|
end
|
73
78
|
|
79
|
+
def repetitions
|
80
|
+
return 1..1 unless quantified?
|
81
|
+
min = quantifier.min
|
82
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
83
|
+
# fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
|
84
|
+
(min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
|
85
|
+
end
|
86
|
+
|
74
87
|
def greedy?
|
75
88
|
quantified? and quantifier.greedy?
|
76
89
|
end
|
@@ -101,22 +114,20 @@ module Regexp::Expression
|
|
101
114
|
alias :x? :free_spacing?
|
102
115
|
alias :extended? :free_spacing?
|
103
116
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
alias :d? :default_classes?
|
117
|
+
def default_classes?
|
118
|
+
options[:d] == true
|
119
|
+
end
|
120
|
+
alias :d? :default_classes?
|
109
121
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
122
|
+
def ascii_classes?
|
123
|
+
options[:a] == true
|
124
|
+
end
|
125
|
+
alias :a? :ascii_classes?
|
114
126
|
|
115
|
-
|
116
|
-
|
117
|
-
end
|
118
|
-
alias :u? :unicode_classes?
|
127
|
+
def unicode_classes?
|
128
|
+
options[:u] == true
|
119
129
|
end
|
130
|
+
alias :u? :unicode_classes?
|
120
131
|
|
121
132
|
def matches?(string)
|
122
133
|
Regexp.new(to_s) =~ string ? true : false
|
@@ -161,10 +172,6 @@ module Regexp::Expression
|
|
161
172
|
|
162
173
|
end # module Regexp::Expression
|
163
174
|
|
164
|
-
require 'regexp_parser/expression/methods/tests'
|
165
|
-
require 'regexp_parser/expression/methods/traverse'
|
166
|
-
require 'regexp_parser/expression/methods/strfregexp'
|
167
|
-
|
168
175
|
require 'regexp_parser/expression/quantifier'
|
169
176
|
require 'regexp_parser/expression/subexpression'
|
170
177
|
require 'regexp_parser/expression/sequence'
|
@@ -186,3 +193,8 @@ require 'regexp_parser/expression/classes/set'
|
|
186
193
|
require 'regexp_parser/expression/classes/set/intersection'
|
187
194
|
require 'regexp_parser/expression/classes/set/range'
|
188
195
|
require 'regexp_parser/expression/classes/type'
|
196
|
+
|
197
|
+
require 'regexp_parser/expression/methods/match_length'
|
198
|
+
require 'regexp_parser/expression/methods/strfregexp'
|
199
|
+
require 'regexp_parser/expression/methods/tests'
|
200
|
+
require 'regexp_parser/expression/methods/traverse'
|
@@ -1,9 +1,12 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Backreference
|
3
|
-
class Base < Regexp::Expression::Base
|
3
|
+
class Base < Regexp::Expression::Base
|
4
|
+
attr_accessor :referenced_expression
|
5
|
+
end
|
4
6
|
|
5
7
|
class Number < Backreference::Base
|
6
8
|
attr_reader :number
|
9
|
+
alias reference number
|
7
10
|
|
8
11
|
def initialize(token, options = {})
|
9
12
|
@number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
|
@@ -13,6 +16,7 @@ module Regexp::Expression
|
|
13
16
|
|
14
17
|
class Name < Backreference::Base
|
15
18
|
attr_reader :name
|
19
|
+
alias reference name
|
16
20
|
|
17
21
|
def initialize(token, options = {})
|
18
22
|
@name = token.text[3..-2]
|
@@ -20,27 +24,31 @@ module Regexp::Expression
|
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
27
|
+
class NumberRelative < Backreference::Number
|
28
|
+
attr_accessor :effective_number
|
29
|
+
alias reference effective_number
|
30
|
+
end
|
31
|
+
|
23
32
|
class NumberCall < Backreference::Number; end
|
24
|
-
class
|
25
|
-
class NumberCallRelative < Backreference::
|
26
|
-
class NameCall < Backreference::Name; end
|
33
|
+
class NameCall < Backreference::Name; end
|
34
|
+
class NumberCallRelative < Backreference::NumberRelative; end
|
27
35
|
|
28
|
-
class NumberRecursionLevel < Backreference::
|
29
|
-
attr_reader :
|
36
|
+
class NumberRecursionLevel < Backreference::Number
|
37
|
+
attr_reader :recursion_level
|
30
38
|
|
31
39
|
def initialize(token, options = {})
|
32
|
-
@number, @recursion_level = token.text[3..-2].split(/(?=[+-])/).map(&:to_i)
|
33
40
|
super
|
41
|
+
@number, @recursion_level = token.text[3..-2].split(/(?=[+-])/).map(&:to_i)
|
34
42
|
end
|
35
43
|
end
|
36
44
|
|
37
|
-
class NameRecursionLevel < Backreference::
|
38
|
-
attr_reader :
|
45
|
+
class NameRecursionLevel < Backreference::Name
|
46
|
+
attr_reader :recursion_level
|
39
47
|
|
40
48
|
def initialize(token, options = {})
|
49
|
+
super
|
41
50
|
@name, recursion_level = token.text[3..-2].split(/(?=[+-])/)
|
42
51
|
@recursion_level = recursion_level.to_i
|
43
|
-
super
|
44
52
|
end
|
45
53
|
end
|
46
54
|
end
|
@@ -7,6 +7,8 @@ module Regexp::Expression
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class Condition < Regexp::Expression::Base
|
10
|
+
attr_accessor :referenced_expression
|
11
|
+
|
10
12
|
# Name or number of the referenced capturing group that determines state.
|
11
13
|
# Returns a String if reference is by name, Integer if by number.
|
12
14
|
def reference
|
@@ -18,6 +20,8 @@ module Regexp::Expression
|
|
18
20
|
class Branch < Regexp::Expression::Sequence; end
|
19
21
|
|
20
22
|
class Expression < Regexp::Expression::Subexpression
|
23
|
+
attr_accessor :referenced_expression
|
24
|
+
|
21
25
|
def <<(exp)
|
22
26
|
expressions.last << exp
|
23
27
|
end
|
@@ -19,20 +19,22 @@ module Regexp::Expression
|
|
19
19
|
|
20
20
|
class Capture < Group::Base
|
21
21
|
attr_accessor :number, :number_at_level
|
22
|
+
alias identifier number
|
22
23
|
|
23
24
|
def capturing?; true end
|
24
25
|
end
|
25
26
|
|
26
27
|
class Named < Group::Capture
|
27
28
|
attr_reader :name
|
29
|
+
alias identifier name
|
28
30
|
|
29
31
|
def initialize(token, options = {})
|
30
32
|
@name = token.text[3..-2]
|
31
33
|
super
|
32
34
|
end
|
33
35
|
|
34
|
-
def initialize_clone(
|
35
|
-
|
36
|
+
def initialize_clone(orig)
|
37
|
+
@name = orig.name.dup
|
36
38
|
super
|
37
39
|
end
|
38
40
|
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
class Regexp::MatchLength
|
2
|
+
include Enumerable
|
3
|
+
|
4
|
+
def self.of(obj)
|
5
|
+
exp = obj.is_a?(Regexp::Expression::Base) ? obj : Regexp::Parser.parse(obj)
|
6
|
+
exp.match_length
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(exp, opts = {})
|
10
|
+
self.exp_class = exp.class
|
11
|
+
self.min_rep = exp.repetitions.min
|
12
|
+
self.max_rep = exp.repetitions.max
|
13
|
+
if base = opts[:base]
|
14
|
+
self.base_min = base
|
15
|
+
self.base_max = base
|
16
|
+
self.reify = ->{ '.' * base }
|
17
|
+
else
|
18
|
+
self.base_min = opts.fetch(:base_min)
|
19
|
+
self.base_max = opts.fetch(:base_max)
|
20
|
+
self.reify = opts.fetch(:reify)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def each(opts = {})
|
25
|
+
return enum_for(__method__) unless block_given?
|
26
|
+
limit = opts[:limit] || 1000
|
27
|
+
yielded = 0
|
28
|
+
(min..max).each do |num|
|
29
|
+
next unless include?(num)
|
30
|
+
yield(num)
|
31
|
+
break if (yielded += 1) >= limit
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def endless_each(&block)
|
36
|
+
return enum_for(__method__) unless block_given?
|
37
|
+
(min..max).each { |num| yield(num) if include?(num) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def include?(length)
|
41
|
+
test_regexp.match?('X' * length)
|
42
|
+
end
|
43
|
+
|
44
|
+
def fixed?
|
45
|
+
min == max
|
46
|
+
end
|
47
|
+
|
48
|
+
def min
|
49
|
+
min_rep * base_min
|
50
|
+
end
|
51
|
+
|
52
|
+
def max
|
53
|
+
max_rep * base_max
|
54
|
+
end
|
55
|
+
|
56
|
+
def minmax
|
57
|
+
[min, max]
|
58
|
+
end
|
59
|
+
|
60
|
+
def inspect
|
61
|
+
type = exp_class.name.sub('Regexp::Expression::', '')
|
62
|
+
"#<#{self.class}<#{type}> min=#{min} max=#{max}>"
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_re
|
66
|
+
"(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
|
+
|
73
|
+
def test_regexp
|
74
|
+
@test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
|
75
|
+
regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
module Regexp::Expression
|
81
|
+
MatchLength = Regexp::MatchLength
|
82
|
+
|
83
|
+
[
|
84
|
+
CharacterSet,
|
85
|
+
CharacterSet::Intersection,
|
86
|
+
CharacterSet::IntersectedSequence,
|
87
|
+
CharacterSet::Range,
|
88
|
+
CharacterType::Base,
|
89
|
+
EscapeSequence::Base,
|
90
|
+
PosixClass,
|
91
|
+
UnicodeProperty::Base,
|
92
|
+
].each do |klass|
|
93
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
94
|
+
def match_length
|
95
|
+
MatchLength.new(self, base: 1)
|
96
|
+
end
|
97
|
+
RUBY
|
98
|
+
end
|
99
|
+
|
100
|
+
class Literal
|
101
|
+
def match_length
|
102
|
+
MatchLength.new(self, base: text.length)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
class Subexpression
|
107
|
+
def match_length
|
108
|
+
MatchLength.new(self,
|
109
|
+
base_min: map { |exp| exp.match_length.min }.inject(0, :+),
|
110
|
+
base_max: map { |exp| exp.match_length.max }.inject(0, :+),
|
111
|
+
reify: ->{ map { |exp| exp.match_length.to_re }.join })
|
112
|
+
end
|
113
|
+
|
114
|
+
def inner_match_length
|
115
|
+
dummy = Regexp::Expression::Root.build
|
116
|
+
dummy.expressions = expressions.map(&:clone)
|
117
|
+
dummy.quantifier = quantifier && quantifier.clone
|
118
|
+
dummy.match_length
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
[
|
123
|
+
Alternation,
|
124
|
+
Conditional::Expression,
|
125
|
+
].each do |klass|
|
126
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
127
|
+
def match_length
|
128
|
+
MatchLength.new(self,
|
129
|
+
base_min: map { |exp| exp.match_length.min }.min,
|
130
|
+
base_max: map { |exp| exp.match_length.max }.max,
|
131
|
+
reify: ->{ map { |exp| exp.match_length.to_re }.join('|') })
|
132
|
+
end
|
133
|
+
RUBY
|
134
|
+
end
|
135
|
+
|
136
|
+
[
|
137
|
+
Anchor::Base,
|
138
|
+
Assertion::Base,
|
139
|
+
Conditional::Condition,
|
140
|
+
FreeSpace,
|
141
|
+
Keep::Mark,
|
142
|
+
].each do |klass|
|
143
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
144
|
+
def match_length
|
145
|
+
MatchLength.new(self, base: 0)
|
146
|
+
end
|
147
|
+
RUBY
|
148
|
+
end
|
149
|
+
|
150
|
+
class Backreference::Base
|
151
|
+
def match_length
|
152
|
+
if referenced_expression.nil?
|
153
|
+
raise ArgumentError, 'Missing referenced_expression - not parsed?'
|
154
|
+
end
|
155
|
+
referenced_expression.unquantified_clone.match_length
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
class EscapeSequence::CodepointList
|
160
|
+
def match_length
|
161
|
+
MatchLength.new(self, base: codepoints.count)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Special case. Absence group can match 0.. chars, irrespective of content.
|
166
|
+
# TODO: in theory, they *can* exclude match lengths with `.`: `(?~.{3})`
|
167
|
+
class Group::Absence
|
168
|
+
def match_length
|
169
|
+
MatchLength.new(self, base_min: 0, base_max: Float::INFINITY, reify: ->{ '.*' })
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|