regexp_parser 2.6.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/LICENSE +1 -1
  4. data/lib/regexp_parser/expression/base.rb +0 -7
  5. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  6. data/lib/regexp_parser/expression/classes/backreference.rb +5 -10
  7. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  8. data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
  9. data/lib/regexp_parser/expression/classes/conditional.rb +2 -20
  10. data/lib/regexp_parser/expression/classes/escape_sequence.rb +21 -91
  11. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  13. data/lib/regexp_parser/expression/classes/keep.rb +1 -1
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +5 -0
  18. data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +68 -0
  19. data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
  20. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  21. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  22. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  23. data/lib/regexp_parser/expression/methods/referenced_expressions.rb +28 -0
  24. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  25. data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
  26. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  27. data/lib/regexp_parser/expression/sequence.rb +5 -10
  28. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  29. data/lib/regexp_parser/expression/shared.rb +37 -20
  30. data/lib/regexp_parser/expression/subexpression.rb +20 -15
  31. data/lib/regexp_parser/expression.rb +37 -31
  32. data/lib/regexp_parser/lexer.rb +76 -36
  33. data/lib/regexp_parser/parser.rb +107 -103
  34. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  35. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  36. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  37. data/lib/regexp_parser/scanner/properties/long.csv +29 -0
  38. data/lib/regexp_parser/scanner/properties/short.csv +3 -0
  39. data/lib/regexp_parser/scanner/property.rl +2 -2
  40. data/lib/regexp_parser/scanner/scanner.rl +101 -172
  41. data/lib/regexp_parser/scanner.rb +1171 -1365
  42. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  43. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  44. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  45. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  46. data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
  47. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  48. data/lib/regexp_parser/syntax/token.rb +13 -13
  49. data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
  50. data/lib/regexp_parser/syntax/versions.rb +3 -1
  51. data/lib/regexp_parser/syntax.rb +1 -1
  52. data/lib/regexp_parser/version.rb +1 -1
  53. data/lib/regexp_parser.rb +6 -6
  54. data/regexp_parser.gemspec +5 -5
  55. metadata +17 -8
  56. data/CHANGELOG.md +0 -601
  57. data/README.md +0 -503
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cadf1761e17469c6bf76db652a4f6fc97a3d33b7eaa46e6ea16f95ee6661743d
4
- data.tar.gz: 3d6252f67f201b3cb6a3b94721c65b39abfe7b13bf0097fc9144498f6fdf8837
3
+ metadata.gz: f6ed5457d89738fa1076cf3875cd2d009973f02857ea68e055ef3ef74a78dc91
4
+ data.tar.gz: d67eb5f0cb37ad106574b2ae327eefcfc13c9d585cddec6661898f4d8166ebcc
5
5
  SHA512:
6
- metadata.gz: 3fb24f56b5d8da354aa5825dc2e9432c7e8bd836c9c2a7009c8883e367fb8ca61020a04854c714cacff913281b1156b4663334696edcb1d7e9239d8c8184d439
7
- data.tar.gz: e793b72a9394e26bf0b9e6cb58c7536b72c30562382713f8b60735969f3b3b9b3aea78bf45efa661397d7141c2684a6df2b32cc8b449c413ea9d11c90c5396db
6
+ metadata.gz: 6b8adbc3c4707fc4c823456ae1d7547f17568802de03008a17fef18a5f95af08b0e42d48ccdfab25a740603a58ab89c036d70cec94405701201e5a5af51ce392
7
+ data.tar.gz: 9bea98a42ab64a9b45ddc5564cd077d7eb6d2ddc293844759bb8001aa9fefd8aa26b0e03fff7a286ccde9f7aeacacda9fbb187fe04082749d3c2605e0cece7b9
data/Gemfile CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
- gem 'ice_nine', '~> 0.11.2'
7
- gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.3'
6
+ gem 'leto', '~> 2.1'
7
+ gem 'rake', '~> 13.1'
8
+ gem 'regexp_property_values', '~> 1.5'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
11
  gem 'benchmark-ips', '~> 2.1'
12
- gem 'gouteur'
13
- gem 'rubocop', '~> 1.7'
12
+ gem 'gouteur', '~> 1.1'
13
+ gem 'rubocop', '~> 1.59'
14
14
  end
15
15
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2022, Ammar Ali
1
+ Copyright (c) 2010, 2012-2024, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
@@ -6,13 +6,6 @@ module Regexp::Expression
6
6
  init_from_token_and_options(token, options)
7
7
  end
8
8
 
9
- def initialize_copy(orig)
10
- self.text = orig.text.dup if orig.text
11
- self.options = orig.options.dup if orig.options
12
- self.quantifier = orig.quantifier.clone if orig.quantifier
13
- super
14
- end
15
-
16
9
  def to_re(format = :full)
17
10
  if set_level > 0
18
11
  warn "Calling #to_re on character set members is deprecated - "\
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
2
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
3
  class Alternative < Regexp::Expression::Sequence; end
4
4
 
5
5
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,21 +1,13 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :backref, one way or the other, in v3.0.0
3
2
  module Backreference
4
- class Base < Regexp::Expression::Base
5
- attr_accessor :referenced_expression
6
-
7
- def initialize_copy(orig)
8
- self.referenced_expression = orig.referenced_expression.dup
9
- super
10
- end
11
- end
3
+ class Base < Regexp::Expression::Base; end
12
4
 
13
5
  class Number < Backreference::Base
14
6
  attr_reader :number
15
7
  alias reference number
16
8
 
17
9
  def initialize(token, options = {})
18
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
10
+ @number = token.text[/-?\d+/].to_i
19
11
  super
20
12
  end
21
13
  end
@@ -58,4 +50,7 @@ module Regexp::Expression
58
50
  end
59
51
  end
60
52
  end
53
+
54
+ # alias for symmetry between token symbol and Expression class name
55
+ Backref = Backreference
61
56
  end
@@ -1,10 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
9
  complete? and raise Regexp::Parser::Error,
@@ -15,10 +14,6 @@ module Regexp::Expression
15
14
  def complete?
16
15
  count == 2
17
16
  end
18
-
19
- def parts
20
- intersperse(expressions, text.dup)
21
- end
22
17
  end
23
18
  end
24
19
  end
@@ -1,10 +1,7 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  attr_accessor :closed, :negative
4
-
5
- alias :negative? :negative
6
- alias :negated? :negative
7
- alias :closed? :closed
4
+ alias :closed? :closed
8
5
 
9
6
  def initialize(token, options = {})
10
7
  self.negative = false
@@ -19,9 +16,8 @@ module Regexp::Expression
19
16
  def close
20
17
  self.closed = true
21
18
  end
22
-
23
- def parts
24
- ["#{text}#{'^' if negated?}", *expressions, ']']
25
- end
26
19
  end
20
+
21
+ # alias for symmetry between token symbol and Expression class name
22
+ Set = CharacterSet
27
23
  end # module Regexp::Expression
@@ -7,33 +7,24 @@ module Regexp::Expression
7
7
  end
8
8
 
9
9
  class Condition < Regexp::Expression::Base
10
- attr_accessor :referenced_expression
11
-
12
10
  # Name or number of the referenced capturing group that determines state.
13
11
  # Returns a String if reference is by name, Integer if by number.
14
12
  def reference
15
13
  ref = text.tr("'<>()", "")
16
14
  ref =~ /\D/ ? ref : Integer(ref)
17
15
  end
18
-
19
- def initialize_copy(orig)
20
- self.referenced_expression = orig.referenced_expression.dup
21
- super
22
- end
23
16
  end
24
17
 
25
18
  class Branch < Regexp::Expression::Sequence; end
26
19
 
27
20
  class Expression < Regexp::Expression::Subexpression
28
- attr_accessor :referenced_expression
29
-
30
21
  def <<(exp)
31
22
  expressions.last << exp
32
23
  end
33
24
 
34
- def add_sequence(active_opts = {})
25
+ def add_sequence(active_opts = {}, params = { ts: 0 })
35
26
  raise TooManyBranches.new if branches.length == 2
36
- params = { conditional_level: conditional_level + 1 }
27
+ params = params.merge({ conditional_level: conditional_level + 1 })
37
28
  Branch.add_to(self, params, active_opts)
38
29
  end
39
30
  alias :branch :add_sequence
@@ -54,15 +45,6 @@ module Regexp::Expression
54
45
  def reference
55
46
  condition.reference
56
47
  end
57
-
58
- def parts
59
- [text.dup, condition, *intersperse(branches, '|'), ')']
60
- end
61
-
62
- def initialize_copy(orig)
63
- self.referenced_expression = orig.referenced_expression.dup
64
- super
65
- end
66
48
  end
67
49
  end
68
50
  end
@@ -1,100 +1,30 @@
1
1
  module Regexp::Expression
2
- # TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
3
2
  module EscapeSequence
4
- class Base < Regexp::Expression::Base
5
- def codepoint
6
- char.ord
7
- end
3
+ Base = Class.new(Regexp::Expression::Base)
8
4
 
9
- if ''.respond_to?(:undump)
10
- def char
11
- %("#{text}").undump
12
- end
13
- else
14
- # poor man's unescape without using eval
15
- require 'yaml'
16
- def char
17
- YAML.load(%Q(---\n"#{text}"\n))
18
- end
19
- end
20
- end
5
+ AsciiEscape = Class.new(Base) # \e
6
+ Backspace = Class.new(Base) # \b
7
+ Bell = Class.new(Base) # \a
8
+ FormFeed = Class.new(Base) # \f
9
+ Newline = Class.new(Base) # \n
10
+ Return = Class.new(Base) # \r
11
+ Tab = Class.new(Base) # \t
12
+ VerticalTab = Class.new(Base) # \v
21
13
 
22
- class Literal < EscapeSequence::Base
23
- def char
24
- text[1..-1]
25
- end
26
- end
14
+ Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
27
15
 
28
- class AsciiEscape < EscapeSequence::Base; end
29
- class Backspace < EscapeSequence::Base; end
30
- class Bell < EscapeSequence::Base; end
31
- class FormFeed < EscapeSequence::Base; end
32
- class Newline < EscapeSequence::Base; end
33
- class Return < EscapeSequence::Base; end
34
- class Tab < EscapeSequence::Base; end
35
- class VerticalTab < EscapeSequence::Base; end
16
+ Octal = Class.new(Base) # e.g. \012
17
+ Hex = Class.new(Base) # e.g. \x0A
18
+ Codepoint = Class.new(Base) # e.g. \u000A
36
19
 
37
- class Hex < EscapeSequence::Base; end
38
- class Codepoint < EscapeSequence::Base; end
20
+ CodepointList = Class.new(Base) # e.g. \u{A B}
39
21
 
40
- class CodepointList < EscapeSequence::Base
41
- def char
42
- raise NoMethodError, 'CodepointList responds only to #chars'
43
- end
44
-
45
- def codepoint
46
- raise NoMethodError, 'CodepointList responds only to #codepoints'
47
- end
48
-
49
- def chars
50
- codepoints.map { |cp| cp.chr('utf-8') }
51
- end
52
-
53
- def codepoints
54
- text.scan(/\h+/).map(&:hex)
55
- end
56
- end
57
-
58
- class Octal < EscapeSequence::Base
59
- def char
60
- text[1..-1].to_i(8).chr('utf-8')
61
- end
62
- end
63
-
64
- class AbstractMetaControlSequence < EscapeSequence::Base
65
- def char
66
- codepoint.chr('utf-8')
67
- end
68
-
69
- private
70
-
71
- def control_sequence_to_s(control_sequence)
72
- five_lsb = control_sequence.unpack('B*').first[-5..-1]
73
- ["000#{five_lsb}"].pack('B*')
74
- end
75
-
76
- def meta_char_to_codepoint(meta_char)
77
- byte_value = meta_char.ord
78
- byte_value < 128 ? byte_value + 128 : byte_value
79
- end
80
- end
81
-
82
- class Control < AbstractMetaControlSequence
83
- def codepoint
84
- control_sequence_to_s(text).ord
85
- end
86
- end
87
-
88
- class Meta < AbstractMetaControlSequence
89
- def codepoint
90
- meta_char_to_codepoint(text[-1])
91
- end
92
- end
93
-
94
- class MetaControl < AbstractMetaControlSequence
95
- def codepoint
96
- meta_char_to_codepoint(control_sequence_to_s(text))
97
- end
98
- end
22
+ AbstractMetaControlSequence = Class.new(Base)
23
+ Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
24
+ Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
25
+ MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
99
26
  end
27
+
28
+ # alias for symmetry between Token::* and Expression::*
29
+ Escape = EscapeSequence
100
30
  end
@@ -5,10 +5,12 @@ module Regexp::Expression
5
5
  end
6
6
  end
7
7
 
8
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
9
10
 
10
11
  class WhiteSpace < Regexp::Expression::FreeSpace
11
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
12
14
  text << exp.text
13
15
  end
14
16
  end
@@ -1,13 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def parts
5
- [text.dup, *expressions, ')']
6
- end
7
-
8
- def capturing?; false end
9
-
10
- def comment?; false end
11
4
  end
12
5
 
13
6
  class Passive < Group::Base
@@ -18,14 +11,6 @@ module Regexp::Expression
18
11
  super
19
12
  end
20
13
 
21
- def parts
22
- if implicit?
23
- expressions
24
- else
25
- super
26
- end
27
- end
28
-
29
14
  def implicit?
30
15
  @implicit
31
16
  end
@@ -55,8 +40,6 @@ module Regexp::Expression
55
40
  class Capture < Group::Base
56
41
  attr_accessor :number, :number_at_level
57
42
  alias identifier number
58
-
59
- def capturing?; true end
60
43
  end
61
44
 
62
45
  class Named < Group::Capture
@@ -75,11 +58,6 @@ module Regexp::Expression
75
58
  end
76
59
 
77
60
  class Comment < Group::Base
78
- def parts
79
- [text.dup]
80
- end
81
-
82
- def comment?; true end
83
61
  end
84
62
  end
85
63
 
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
- # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
3
+ # TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
4
  # that contains all expressions to its left.
5
5
  class Mark < Regexp::Expression::Base; end
6
6
  end
@@ -1,11 +1,11 @@
1
1
  module Regexp::Expression
2
2
  class PosixClass < Regexp::Expression::Base
3
- def negative?
4
- type == :nonposixclass
5
- end
6
-
7
3
  def name
8
- token.to_s
4
+ text[/\w+/]
9
5
  end
10
6
  end
7
+
8
+ # alias for symmetry between token symbol and Expression class name
9
+ Posixclass = PosixClass
10
+ Nonposixclass = PosixClass
11
11
  end
@@ -1,17 +1,12 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :property, one way or the other, in v3.0.0
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
- def negative?
6
- type == :nonproperty
7
- end
8
-
9
4
  def name
10
5
  text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
6
  end
12
7
 
13
8
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
9
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
10
  end
16
11
  end
17
12
 
@@ -110,10 +105,15 @@ module Regexp::Expression
110
105
  class Unassigned < Codepoint::Base; end
111
106
  end
112
107
 
113
- class Age < UnicodeProperty::Base; end
114
- class Derived < UnicodeProperty::Base; end
115
- class Emoji < UnicodeProperty::Base; end
116
- class Script < UnicodeProperty::Base; end
117
- class Block < UnicodeProperty::Base; end
108
+ class Age < UnicodeProperty::Base; end
109
+ class Block < UnicodeProperty::Base; end
110
+ class Derived < UnicodeProperty::Base; end
111
+ class Emoji < UnicodeProperty::Base; end
112
+ class Enumerated < UnicodeProperty::Base; end
113
+ class Script < UnicodeProperty::Base; end
118
114
  end
115
+
116
+ # alias for symmetry between token symbol and Expression class name
117
+ Property = UnicodeProperty
118
+ Nonproperty = UnicodeProperty
119
119
  end # module Regexp::Expression
@@ -25,11 +25,9 @@ module Regexp::Expression
25
25
  def token_class
26
26
  if self == Root || self < Sequence
27
27
  nil # no token class because these objects are Parser-generated
28
- # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
- elsif self == Alternation || self == CharacterType::Any
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
30
  Regexp::Syntax::Token::Meta
31
- elsif self <= EscapeSequence::Base
32
- Regexp::Syntax::Token::Escape
33
31
  else
34
32
  Regexp::Syntax::Token.const_get(name.split('::')[2])
35
33
  end
@@ -0,0 +1,5 @@
1
+ Regexp::Expression::EscapeSequence::Base.class_eval do
2
+ def char
3
+ codepoint.chr('utf-8')
4
+ end
5
+ end
@@ -0,0 +1,68 @@
1
+ module Regexp::Expression::EscapeSequence
2
+ AsciiEscape.class_eval { def codepoint; 0x1B end }
3
+ Backspace.class_eval { def codepoint; 0x8 end }
4
+ Bell.class_eval { def codepoint; 0x7 end }
5
+ FormFeed.class_eval { def codepoint; 0xC end }
6
+ Newline.class_eval { def codepoint; 0xA end }
7
+ Return.class_eval { def codepoint; 0xD end }
8
+ Tab.class_eval { def codepoint; 0x9 end }
9
+ VerticalTab.class_eval { def codepoint; 0xB end }
10
+
11
+ Literal.class_eval { def codepoint; text[1].ord end }
12
+
13
+ Octal.class_eval { def codepoint; text[/\d+/].to_i(8) end }
14
+
15
+ Hex.class_eval { def codepoint; text[/\h+/].hex end }
16
+ Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
17
+
18
+ CodepointList.class_eval do
19
+ # Maybe this should be a unique top-level expression class?
20
+ def char
21
+ raise NoMethodError, 'CodepointList responds only to #chars'
22
+ end
23
+
24
+ def codepoint
25
+ raise NoMethodError, 'CodepointList responds only to #codepoints'
26
+ end
27
+
28
+ def chars
29
+ codepoints.map { |cp| cp.chr('utf-8') }
30
+ end
31
+
32
+ def codepoints
33
+ text.scan(/\h+/).map(&:hex)
34
+ end
35
+ end
36
+
37
+ AbstractMetaControlSequence.class_eval do
38
+ private
39
+
40
+ def control_sequence_to_s(control_sequence)
41
+ five_lsb = control_sequence.unpack('B*').first[-5..-1]
42
+ ["000#{five_lsb}"].pack('B*')
43
+ end
44
+
45
+ def meta_char_to_codepoint(meta_char)
46
+ byte_value = meta_char.ord
47
+ byte_value < 128 ? byte_value + 128 : byte_value
48
+ end
49
+ end
50
+
51
+ Control.class_eval do
52
+ def codepoint
53
+ control_sequence_to_s(text).ord
54
+ end
55
+ end
56
+
57
+ Meta.class_eval do
58
+ def codepoint
59
+ meta_char_to_codepoint(text[-1])
60
+ end
61
+ end
62
+
63
+ MetaControl.class_eval do
64
+ def codepoint
65
+ meta_char_to_codepoint(control_sequence_to_s(text))
66
+ end
67
+ end
68
+ end
@@ -63,16 +63,20 @@ class Regexp::MatchLength
63
63
  end
64
64
 
65
65
  def to_re
66
- "(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
66
+ /(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
67
67
  end
68
68
 
69
69
  private
70
70
 
71
71
  attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
72
72
 
73
- def test_regexp
74
- @test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
75
- regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
73
+ if Regexp.method_defined?(:match?) # ruby >= 2.4
74
+ def test_regexp
75
+ @test_regexp ||= /^#{to_re}$/
76
+ end
77
+ else
78
+ def test_regexp
79
+ @test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
76
80
  end
77
81
  end
78
82
  end
@@ -0,0 +1,20 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def negative?
4
+ false
5
+ end
6
+
7
+ # not an alias so as to respect overrides of #negative?
8
+ def negated?
9
+ negative?
10
+ end
11
+ end
12
+
13
+ Anchor::NonWordBoundary.class_eval { def negative?; true end }
14
+ Assertion::NegativeLookahead.class_eval { def negative?; true end }
15
+ Assertion::NegativeLookbehind.class_eval { def negative?; true end }
16
+ CharacterSet.class_eval { def negative?; negative end }
17
+ CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
18
+ PosixClass.class_eval { def negative?; type == :nonposixclass end }
19
+ UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
20
+ end
@@ -0,0 +1,23 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation
4
+ def parts
5
+ [text.dup]
6
+ end
7
+
8
+ private
9
+
10
+ def intersperse(expressions, separator)
11
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
12
+ end
13
+ end
14
+
15
+ CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
16
+ CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
17
+ Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
18
+ Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
19
+ Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
20
+ Group::Comment.class_eval { def parts; [text.dup] end }
21
+ Subexpression.class_eval { def parts; expressions end }
22
+ SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
23
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def inspect
4
+ [
5
+ "#<#{self.class}",
6
+ pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
7
+ ">"
8
+ ].join
9
+ end
10
+
11
+ # Make pretty-print work despite #inspect implementation.
12
+ def pretty_print(q)
13
+ q.pp_object(self)
14
+ end
15
+
16
+ # Called by pretty_print (ruby/pp) and #inspect.
17
+ def pretty_print_instance_variables
18
+ [
19
+ (:@text unless text.to_s.empty?),
20
+ (:@quantifier if quantified?),
21
+ (:@options unless options.empty?),
22
+ (:@expressions unless terminal?),
23
+ ].compact
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,28 @@
1
+ module Regexp::Expression
2
+ module ReferencedExpressions
3
+ attr_accessor :referenced_expressions
4
+
5
+ def referenced_expression
6
+ referenced_expressions && referenced_expressions.first
7
+ end
8
+
9
+ def initialize_copy(orig)
10
+ exp_id = [self.class, self.starts_at]
11
+
12
+ # prevent infinite recursion for recursive subexp calls
13
+ copied = self.class.instance_eval { @copied_ref_exps ||= {} }
14
+ self.referenced_expressions =
15
+ if copied[exp_id]
16
+ orig.referenced_expressions
17
+ else
18
+ copied[exp_id] = true
19
+ orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
20
+ end
21
+ copied.clear
22
+
23
+ super
24
+ end
25
+ end
26
+
27
+ Base.include ReferencedExpressions
28
+ end