regexp_parser 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -1
  3. data/Gemfile +1 -1
  4. data/README.md +9 -13
  5. data/lib/regexp_parser/expression.rb +33 -21
  6. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  7. data/lib/regexp_parser/expression/classes/conditional.rb +4 -0
  8. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  9. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  10. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  11. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  12. data/lib/regexp_parser/expression/sequence.rb +0 -4
  13. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  14. data/lib/regexp_parser/lexer.rb +31 -24
  15. data/lib/regexp_parser/parser.rb +25 -3
  16. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  17. data/lib/regexp_parser/version.rb +1 -1
  18. data/regexp_parser.gemspec +2 -2
  19. data/spec/expression/base_spec.rb +80 -0
  20. data/spec/expression/clone_spec.rb +120 -0
  21. data/spec/expression/conditional_spec.rb +89 -0
  22. data/spec/expression/free_space_spec.rb +27 -0
  23. data/spec/expression/methods/match_length_spec.rb +141 -0
  24. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  25. data/spec/expression/methods/tests_spec.rb +97 -0
  26. data/spec/expression/methods/traverse_spec.rb +140 -0
  27. data/spec/expression/subexpression_spec.rb +50 -0
  28. data/spec/expression/to_h_spec.rb +26 -0
  29. data/spec/expression/to_s_spec.rb +100 -0
  30. data/spec/lexer/all_spec.rb +22 -0
  31. data/{test/lexer/test_conditionals.rb → spec/lexer/conditionals_spec.rb} +31 -35
  32. data/spec/lexer/escapes_spec.rb +38 -0
  33. data/spec/lexer/keep_spec.rb +22 -0
  34. data/{test/lexer/test_literals.rb → spec/lexer/literals_spec.rb} +20 -24
  35. data/{test/lexer/test_nesting.rb → spec/lexer/nesting_spec.rb} +11 -13
  36. data/spec/lexer/refcalls_spec.rb +54 -0
  37. data/spec/parser/all_spec.rb +31 -0
  38. data/spec/parser/alternation_spec.rb +88 -0
  39. data/{test/parser/test_anchors.rb → spec/parser/anchors_spec.rb} +7 -10
  40. data/spec/parser/conditionals_spec.rb +179 -0
  41. data/spec/parser/errors_spec.rb +51 -0
  42. data/spec/parser/escapes_spec.rb +132 -0
  43. data/spec/parser/free_space_spec.rb +130 -0
  44. data/spec/parser/groups_spec.rb +267 -0
  45. data/spec/parser/keep_spec.rb +19 -0
  46. data/spec/parser/posix_classes_spec.rb +27 -0
  47. data/spec/parser/properties_spec.rb +127 -0
  48. data/spec/parser/quantifiers_spec.rb +293 -0
  49. data/spec/parser/refcalls_spec.rb +237 -0
  50. data/spec/parser/set/intersections_spec.rb +127 -0
  51. data/spec/parser/set/ranges_spec.rb +111 -0
  52. data/spec/parser/sets_spec.rb +178 -0
  53. data/{test/parser/test_types.rb → spec/parser/types_spec.rb} +13 -20
  54. data/spec/scanner/all_spec.rb +18 -0
  55. data/{test/scanner/test_anchors.rb → spec/scanner/anchors_spec.rb} +8 -10
  56. data/{test/scanner/test_conditionals.rb → spec/scanner/conditionals_spec.rb} +49 -53
  57. data/spec/scanner/errors_spec.rb +90 -0
  58. data/{test/scanner/test_escapes.rb → spec/scanner/escapes_spec.rb} +8 -10
  59. data/{test/scanner/test_free_space.rb → spec/scanner/free_space_spec.rb} +48 -52
  60. data/{test/scanner/test_groups.rb → spec/scanner/groups_spec.rb} +33 -41
  61. data/spec/scanner/keep_spec.rb +33 -0
  62. data/{test/scanner/test_literals.rb → spec/scanner/literals_spec.rb} +8 -12
  63. data/{test/scanner/test_meta.rb → spec/scanner/meta_spec.rb} +8 -10
  64. data/{test/scanner/test_properties.rb → spec/scanner/properties_spec.rb} +14 -19
  65. data/{test/scanner/test_quantifiers.rb → spec/scanner/quantifiers_spec.rb} +7 -9
  66. data/{test/scanner/test_refcalls.rb → spec/scanner/refcalls_spec.rb} +9 -9
  67. data/{test/scanner/test_scripts.rb → spec/scanner/scripts_spec.rb} +8 -12
  68. data/{test/scanner/test_sets.rb → spec/scanner/sets_spec.rb} +14 -17
  69. data/spec/scanner/types_spec.rb +29 -0
  70. data/spec/scanner/unicode_blocks_spec.rb +28 -0
  71. data/spec/spec_helper.rb +14 -0
  72. data/{test → spec}/support/runner.rb +9 -8
  73. data/{test → spec}/support/warning_extractor.rb +5 -7
  74. data/spec/syntax/syntax_spec.rb +44 -0
  75. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  76. data/spec/syntax/versions/1.8.6_spec.rb +38 -0
  77. data/spec/syntax/versions/1.9.1_spec.rb +23 -0
  78. data/spec/syntax/versions/1.9.3_spec.rb +22 -0
  79. data/spec/syntax/versions/2.0.0_spec.rb +28 -0
  80. data/spec/syntax/versions/2.2.0_spec.rb +22 -0
  81. data/spec/syntax/versions/aliases_spec.rb +119 -0
  82. data/spec/token/token_spec.rb +85 -0
  83. metadata +131 -140
  84. data/test/expression/test_all.rb +0 -12
  85. data/test/expression/test_base.rb +0 -90
  86. data/test/expression/test_clone.rb +0 -89
  87. data/test/expression/test_conditionals.rb +0 -113
  88. data/test/expression/test_free_space.rb +0 -35
  89. data/test/expression/test_set.rb +0 -84
  90. data/test/expression/test_strfregexp.rb +0 -230
  91. data/test/expression/test_subexpression.rb +0 -58
  92. data/test/expression/test_tests.rb +0 -99
  93. data/test/expression/test_to_h.rb +0 -59
  94. data/test/expression/test_to_s.rb +0 -104
  95. data/test/expression/test_traverse.rb +0 -161
  96. data/test/helpers.rb +0 -10
  97. data/test/lexer/test_all.rb +0 -41
  98. data/test/lexer/test_keep.rb +0 -24
  99. data/test/lexer/test_refcalls.rb +0 -56
  100. data/test/parser/set/test_intersections.rb +0 -127
  101. data/test/parser/set/test_ranges.rb +0 -111
  102. data/test/parser/test_all.rb +0 -64
  103. data/test/parser/test_alternation.rb +0 -92
  104. data/test/parser/test_conditionals.rb +0 -187
  105. data/test/parser/test_errors.rb +0 -63
  106. data/test/parser/test_escapes.rb +0 -134
  107. data/test/parser/test_free_space.rb +0 -139
  108. data/test/parser/test_groups.rb +0 -289
  109. data/test/parser/test_keep.rb +0 -21
  110. data/test/parser/test_posix_classes.rb +0 -27
  111. data/test/parser/test_properties.rb +0 -134
  112. data/test/parser/test_quantifiers.rb +0 -301
  113. data/test/parser/test_refcalls.rb +0 -186
  114. data/test/parser/test_sets.rb +0 -179
  115. data/test/scanner/test_all.rb +0 -38
  116. data/test/scanner/test_errors.rb +0 -91
  117. data/test/scanner/test_keep.rb +0 -35
  118. data/test/scanner/test_types.rb +0 -35
  119. data/test/scanner/test_unicode_blocks.rb +0 -30
  120. data/test/support/disable_autotest.rb +0 -8
  121. data/test/syntax/test_all.rb +0 -6
  122. data/test/syntax/test_syntax.rb +0 -61
  123. data/test/syntax/test_syntax_token_map.rb +0 -25
  124. data/test/syntax/versions/test_1.8.rb +0 -55
  125. data/test/syntax/versions/test_1.9.1.rb +0 -36
  126. data/test/syntax/versions/test_1.9.3.rb +0 -32
  127. data/test/syntax/versions/test_2.0.0.rb +0 -37
  128. data/test/syntax/versions/test_2.2.0.rb +0 -32
  129. data/test/syntax/versions/test_aliases.rb +0 -129
  130. data/test/syntax/versions/test_all.rb +0 -5
  131. data/test/test_all.rb +0 -5
  132. data/test/token/test_all.rb +0 -2
  133. data/test/token/test_token.rb +0 -107
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0ff2b9541be8d00d5a0f8a355ebb9ab6bc5bc2ac50ffa14df13144bf2d239b42
4
- data.tar.gz: b5d4c720eaa3606a7973b110251a5fb1fe87e11714fed5a195908678098a4cbe
3
+ metadata.gz: 707834a32bc2295b448953730eabddabb11bafc68fdf3148174f7be61d8b1f30
4
+ data.tar.gz: 72d199d28c342d6aae178a5876a7df6f59abcdf40c6ac4f05ea9dc40d16d9f3a
5
5
  SHA512:
6
- metadata.gz: e8759d373fdea7bbd455a5e2ff96ce1a64cb81f35c325fad49a886a99388897486c1904a847a072b14b245e6da0dded81c3ef031e74944b2fe5d8c67a4cffaab
7
- data.tar.gz: 6e39afe8a277eced992c0508a99d022dba939401925e6e3e793cab364d0b3b2143cfade3a433b9d0445c49004dad781dc04f7a99c69229c471dff7095823d065
6
+ metadata.gz: 4c1402afedc1efb79f633ee93065598b64732519ba587ca3f682eb8bbb4aaa264e31dd916b95f9751f7c6e85e867efa260501b40e55409327efa5b769346a183
7
+ data.tar.gz: 6335cbc411b08adb64bfca9646eebc3a5c39d4651a2495d34f87fca21927da3a363fc320159a3732cdd9e2d8732986190fcd6c9d523b7308531f91848951ccbd
@@ -1,5 +1,31 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
4
+
5
+ ### Added
6
+
7
+ - Added `#referenced_expression` for backrefs, subexp calls and conditionals
8
+ * returns the `Group` expression that is being referenced via name or number
9
+ - Added `Expression#repetitions`
10
+ * returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
11
+ * like `#quantity` but with a more uniform interface
12
+ - Added `Expression#match_length`
13
+ * allows to inspect and iterate over String lengths matched by the Expression
14
+
15
+ ### Fixed
16
+
17
+ - Fixed `Expression#clone` "direction"
18
+ * it used to dup ivars onto the callee, leaving only the clone referencing the original objects
19
+ * this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
20
+ - Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
21
+ * the inner `#text` was cloned onto the `Sequence` and thus duplicated
22
+ * e.g. `Regexp::Parser.parse(/(a|bc)/).clone.to_s # => (aa|bcbc)`
23
+ - Fixed inconsistent `#to_s` output for `Sequences`
24
+ * it used to return only the "specific" text, e.g. "|" for an alternation
25
+ * now it includes nested expressions as it does for all other `Subexpressions`
26
+ - Fixed quantification of codepoint lists with more than one entry (`\u{62 63 64}+`)
27
+ * quantifiers apply only to the last entry, so this token is now split up if quantified
28
+
3
29
  ### [1.4.0] - 2019-04-02 - [Janosch Müller](mailto:janosch84@gmail.com)
4
30
 
5
31
  ### Added
@@ -75,7 +101,7 @@ This release includes several breaking changes, mostly to character sets, #map a
75
101
  - Changed `(?m)` and the likes to emit as `:options_switch` token (@4ade4d1)
76
102
  * allows differentiating from group-local `:options`, e.g. `(?m:.)`
77
103
  - Changed name of `Backreference::..NestLevel` to `..RecursionLevel` (@4184339)
78
- - Changed B`ackreference::Number#number` from `String` to `Integer` (@40a2231)
104
+ - Changed `Backreference::Number#number` from `String` to `Integer` (@40a2231)
79
105
 
80
106
  ### Added
81
107
 
data/Gemfile CHANGED
@@ -5,5 +5,5 @@ gemspec
5
5
  group :development, :test do
6
6
  gem 'rake'
7
7
  gem 'regexp_property_values'
8
- gem 'test-unit'
8
+ gem 'rspec'
9
9
  end
data/README.md CHANGED
@@ -391,28 +391,22 @@ To run the tests simply run rake from the root directory, as 'test' is the defau
391
391
 
392
392
  It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
393
393
 
394
- The tests use Ruby's test/unit. They can also be run with:
394
+ The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
395
395
 
396
396
  ```
397
397
  bin/test
398
398
  ```
399
399
 
400
- The test runner accepts all arguments accepted by test/unit. You can run a specific test like so:
400
+ You can run a specific test like so:
401
401
 
402
402
  ```
403
- bin/test test/scanner/test_properties.rb
403
+ bin/test spec/scanner/properties_spec.rb
404
404
  ```
405
405
 
406
- It is sometimes helpful during development to focus on a specific test case, for example:
406
+ Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
407
407
 
408
408
  ```
409
- bin/test test/expression/test_base.rb -n test_expression_to_re
410
- ```
411
-
412
- Note that changes to Ragel files will not be reflected when using `bin/test`, so you might want to run:
413
-
414
- ```
415
- rake ragel:rb && bin/test test/scanner/test_properties.rb
409
+ rake ragel:rb && bin/test spec/scanner/properties_spec.rb
416
410
  ```
417
411
 
418
412
  ## Building
@@ -440,7 +434,9 @@ Projects using regexp_parser.
440
434
 
441
435
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
442
436
 
443
- - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
437
+ - [mutant](https://github.com/mbj/mutant) (before v0.9.0) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
438
+
439
+ - [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) uses regexp_parser to generate examples of postal codes.
444
440
 
445
441
  - [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
446
442
 
@@ -471,4 +467,4 @@ Documentation and books used while working on this project.
471
467
 
472
468
  ---
473
469
  ##### Copyright
474
- _Copyright (c) 2010-2016 Ammar Ali. See LICENSE file for details._
470
+ _Copyright (c) 2010-2019 Ammar Ali. See LICENSE file for details._
@@ -21,10 +21,10 @@ module Regexp::Expression
21
21
  self.options = options
22
22
  end
23
23
 
24
- def initialize_clone(other)
25
- other.text = (text ? text.dup : nil)
26
- other.options = (options ? options.dup : nil)
27
- other.quantifier = (quantifier ? quantifier.clone : nil)
24
+ def initialize_clone(orig)
25
+ self.text = (orig.text ? orig.text.dup : nil)
26
+ self.options = (orig.options ? orig.options.dup : nil)
27
+ self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
28
28
  super
29
29
  end
30
30
 
@@ -62,15 +62,28 @@ module Regexp::Expression
62
62
  self.quantifier = Quantifier.new(token, text, min, max, mode)
63
63
  end
64
64
 
65
+ def unquantified_clone
66
+ clone.tap { |exp| exp.quantifier = nil }
67
+ end
68
+
65
69
  def quantified?
66
70
  !quantifier.nil?
67
71
  end
68
72
 
73
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
69
74
  def quantity
70
75
  return [nil,nil] unless quantified?
71
76
  [quantifier.min, quantifier.max]
72
77
  end
73
78
 
79
+ def repetitions
80
+ return 1..1 unless quantified?
81
+ min = quantifier.min
82
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
83
+ # fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
84
+ (min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
85
+ end
86
+
74
87
  def greedy?
75
88
  quantified? and quantifier.greedy?
76
89
  end
@@ -101,22 +114,20 @@ module Regexp::Expression
101
114
  alias :x? :free_spacing?
102
115
  alias :extended? :free_spacing?
103
116
 
104
- if RUBY_VERSION >= '2.0'
105
- def default_classes?
106
- options[:d] == true
107
- end
108
- alias :d? :default_classes?
117
+ def default_classes?
118
+ options[:d] == true
119
+ end
120
+ alias :d? :default_classes?
109
121
 
110
- def ascii_classes?
111
- options[:a] == true
112
- end
113
- alias :a? :ascii_classes?
122
+ def ascii_classes?
123
+ options[:a] == true
124
+ end
125
+ alias :a? :ascii_classes?
114
126
 
115
- def unicode_classes?
116
- options[:u] == true
117
- end
118
- alias :u? :unicode_classes?
127
+ def unicode_classes?
128
+ options[:u] == true
119
129
  end
130
+ alias :u? :unicode_classes?
120
131
 
121
132
  def matches?(string)
122
133
  Regexp.new(to_s) =~ string ? true : false
@@ -161,10 +172,6 @@ module Regexp::Expression
161
172
 
162
173
  end # module Regexp::Expression
163
174
 
164
- require 'regexp_parser/expression/methods/tests'
165
- require 'regexp_parser/expression/methods/traverse'
166
- require 'regexp_parser/expression/methods/strfregexp'
167
-
168
175
  require 'regexp_parser/expression/quantifier'
169
176
  require 'regexp_parser/expression/subexpression'
170
177
  require 'regexp_parser/expression/sequence'
@@ -186,3 +193,8 @@ require 'regexp_parser/expression/classes/set'
186
193
  require 'regexp_parser/expression/classes/set/intersection'
187
194
  require 'regexp_parser/expression/classes/set/range'
188
195
  require 'regexp_parser/expression/classes/type'
196
+
197
+ require 'regexp_parser/expression/methods/match_length'
198
+ require 'regexp_parser/expression/methods/strfregexp'
199
+ require 'regexp_parser/expression/methods/tests'
200
+ require 'regexp_parser/expression/methods/traverse'
@@ -1,9 +1,12 @@
1
1
  module Regexp::Expression
2
2
  module Backreference
3
- class Base < Regexp::Expression::Base; end
3
+ class Base < Regexp::Expression::Base
4
+ attr_accessor :referenced_expression
5
+ end
4
6
 
5
7
  class Number < Backreference::Base
6
8
  attr_reader :number
9
+ alias reference number
7
10
 
8
11
  def initialize(token, options = {})
9
12
  @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
@@ -13,6 +16,7 @@ module Regexp::Expression
13
16
 
14
17
  class Name < Backreference::Base
15
18
  attr_reader :name
19
+ alias reference name
16
20
 
17
21
  def initialize(token, options = {})
18
22
  @name = token.text[3..-2]
@@ -20,27 +24,31 @@ module Regexp::Expression
20
24
  end
21
25
  end
22
26
 
27
+ class NumberRelative < Backreference::Number
28
+ attr_accessor :effective_number
29
+ alias reference effective_number
30
+ end
31
+
23
32
  class NumberCall < Backreference::Number; end
24
- class NumberRelative < Backreference::Number; end
25
- class NumberCallRelative < Backreference::Number; end
26
- class NameCall < Backreference::Name; end
33
+ class NameCall < Backreference::Name; end
34
+ class NumberCallRelative < Backreference::NumberRelative; end
27
35
 
28
- class NumberRecursionLevel < Backreference::Base
29
- attr_reader :number, :recursion_level
36
+ class NumberRecursionLevel < Backreference::Number
37
+ attr_reader :recursion_level
30
38
 
31
39
  def initialize(token, options = {})
32
- @number, @recursion_level = token.text[3..-2].split(/(?=[+-])/).map(&:to_i)
33
40
  super
41
+ @number, @recursion_level = token.text[3..-2].split(/(?=[+-])/).map(&:to_i)
34
42
  end
35
43
  end
36
44
 
37
- class NameRecursionLevel < Backreference::Base
38
- attr_reader :name, :recursion_level
45
+ class NameRecursionLevel < Backreference::Name
46
+ attr_reader :recursion_level
39
47
 
40
48
  def initialize(token, options = {})
49
+ super
41
50
  @name, recursion_level = token.text[3..-2].split(/(?=[+-])/)
42
51
  @recursion_level = recursion_level.to_i
43
- super
44
52
  end
45
53
  end
46
54
  end
@@ -7,6 +7,8 @@ module Regexp::Expression
7
7
  end
8
8
 
9
9
  class Condition < Regexp::Expression::Base
10
+ attr_accessor :referenced_expression
11
+
10
12
  # Name or number of the referenced capturing group that determines state.
11
13
  # Returns a String if reference is by name, Integer if by number.
12
14
  def reference
@@ -18,6 +20,8 @@ module Regexp::Expression
18
20
  class Branch < Regexp::Expression::Sequence; end
19
21
 
20
22
  class Expression < Regexp::Expression::Subexpression
23
+ attr_accessor :referenced_expression
24
+
21
25
  def <<(exp)
22
26
  expressions.last << exp
23
27
  end
@@ -19,20 +19,22 @@ module Regexp::Expression
19
19
 
20
20
  class Capture < Group::Base
21
21
  attr_accessor :number, :number_at_level
22
+ alias identifier number
22
23
 
23
24
  def capturing?; true end
24
25
  end
25
26
 
26
27
  class Named < Group::Capture
27
28
  attr_reader :name
29
+ alias identifier name
28
30
 
29
31
  def initialize(token, options = {})
30
32
  @name = token.text[3..-2]
31
33
  super
32
34
  end
33
35
 
34
- def initialize_clone(other)
35
- other.instance_variable_set(:@name, name.dup)
36
+ def initialize_clone(orig)
37
+ @name = orig.name.dup
36
38
  super
37
39
  end
38
40
  end
@@ -1,7 +1,5 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Keep
4
- class Mark < Regexp::Expression::Base; end
3
+ class Mark < Regexp::Expression::Base; end
5
4
  end
6
-
7
5
  end
@@ -0,0 +1,172 @@
1
+ class Regexp::MatchLength
2
+ include Enumerable
3
+
4
+ def self.of(obj)
5
+ exp = obj.is_a?(Regexp::Expression::Base) ? obj : Regexp::Parser.parse(obj)
6
+ exp.match_length
7
+ end
8
+
9
+ def initialize(exp, opts = {})
10
+ self.exp_class = exp.class
11
+ self.min_rep = exp.repetitions.min
12
+ self.max_rep = exp.repetitions.max
13
+ if base = opts[:base]
14
+ self.base_min = base
15
+ self.base_max = base
16
+ self.reify = ->{ '.' * base }
17
+ else
18
+ self.base_min = opts.fetch(:base_min)
19
+ self.base_max = opts.fetch(:base_max)
20
+ self.reify = opts.fetch(:reify)
21
+ end
22
+ end
23
+
24
+ def each(opts = {})
25
+ return enum_for(__method__) unless block_given?
26
+ limit = opts[:limit] || 1000
27
+ yielded = 0
28
+ (min..max).each do |num|
29
+ next unless include?(num)
30
+ yield(num)
31
+ break if (yielded += 1) >= limit
32
+ end
33
+ end
34
+
35
+ def endless_each(&block)
36
+ return enum_for(__method__) unless block_given?
37
+ (min..max).each { |num| yield(num) if include?(num) }
38
+ end
39
+
40
+ def include?(length)
41
+ test_regexp.match?('X' * length)
42
+ end
43
+
44
+ def fixed?
45
+ min == max
46
+ end
47
+
48
+ def min
49
+ min_rep * base_min
50
+ end
51
+
52
+ def max
53
+ max_rep * base_max
54
+ end
55
+
56
+ def minmax
57
+ [min, max]
58
+ end
59
+
60
+ def inspect
61
+ type = exp_class.name.sub('Regexp::Expression::', '')
62
+ "#<#{self.class}<#{type}> min=#{min} max=#{max}>"
63
+ end
64
+
65
+ def to_re
66
+ "(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
67
+ end
68
+
69
+ private
70
+
71
+ attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
72
+
73
+ def test_regexp
74
+ @test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
75
+ regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
76
+ end
77
+ end
78
+ end
79
+
80
+ module Regexp::Expression
81
+ MatchLength = Regexp::MatchLength
82
+
83
+ [
84
+ CharacterSet,
85
+ CharacterSet::Intersection,
86
+ CharacterSet::IntersectedSequence,
87
+ CharacterSet::Range,
88
+ CharacterType::Base,
89
+ EscapeSequence::Base,
90
+ PosixClass,
91
+ UnicodeProperty::Base,
92
+ ].each do |klass|
93
+ klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
94
+ def match_length
95
+ MatchLength.new(self, base: 1)
96
+ end
97
+ RUBY
98
+ end
99
+
100
+ class Literal
101
+ def match_length
102
+ MatchLength.new(self, base: text.length)
103
+ end
104
+ end
105
+
106
+ class Subexpression
107
+ def match_length
108
+ MatchLength.new(self,
109
+ base_min: map { |exp| exp.match_length.min }.inject(0, :+),
110
+ base_max: map { |exp| exp.match_length.max }.inject(0, :+),
111
+ reify: ->{ map { |exp| exp.match_length.to_re }.join })
112
+ end
113
+
114
+ def inner_match_length
115
+ dummy = Regexp::Expression::Root.build
116
+ dummy.expressions = expressions.map(&:clone)
117
+ dummy.quantifier = quantifier && quantifier.clone
118
+ dummy.match_length
119
+ end
120
+ end
121
+
122
+ [
123
+ Alternation,
124
+ Conditional::Expression,
125
+ ].each do |klass|
126
+ klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
127
+ def match_length
128
+ MatchLength.new(self,
129
+ base_min: map { |exp| exp.match_length.min }.min,
130
+ base_max: map { |exp| exp.match_length.max }.max,
131
+ reify: ->{ map { |exp| exp.match_length.to_re }.join('|') })
132
+ end
133
+ RUBY
134
+ end
135
+
136
+ [
137
+ Anchor::Base,
138
+ Assertion::Base,
139
+ Conditional::Condition,
140
+ FreeSpace,
141
+ Keep::Mark,
142
+ ].each do |klass|
143
+ klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
144
+ def match_length
145
+ MatchLength.new(self, base: 0)
146
+ end
147
+ RUBY
148
+ end
149
+
150
+ class Backreference::Base
151
+ def match_length
152
+ if referenced_expression.nil?
153
+ raise ArgumentError, 'Missing referenced_expression - not parsed?'
154
+ end
155
+ referenced_expression.unquantified_clone.match_length
156
+ end
157
+ end
158
+
159
+ class EscapeSequence::CodepointList
160
+ def match_length
161
+ MatchLength.new(self, base: codepoints.count)
162
+ end
163
+ end
164
+
165
+ # Special case. Absence group can match 0.. chars, irrespective of content.
166
+ # TODO: in theory, they *can* exclude match lengths with `.`: `(?~.{3})`
167
+ class Group::Absence
168
+ def match_length
169
+ MatchLength.new(self, base_min: 0, base_max: Float::INFINITY, reify: ->{ '.*' })
170
+ end
171
+ end
172
+ end