regexp_parser 2.6.0 → 2.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/LICENSE +1 -1
  4. data/lib/regexp_parser/expression/base.rb +0 -7
  5. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  6. data/lib/regexp_parser/expression/classes/backreference.rb +5 -10
  7. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  8. data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
  9. data/lib/regexp_parser/expression/classes/conditional.rb +2 -20
  10. data/lib/regexp_parser/expression/classes/escape_sequence.rb +21 -91
  11. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  13. data/lib/regexp_parser/expression/classes/keep.rb +1 -1
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +5 -0
  18. data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +68 -0
  19. data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
  20. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  21. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  22. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  23. data/lib/regexp_parser/expression/methods/referenced_expressions.rb +28 -0
  24. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  25. data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
  26. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  27. data/lib/regexp_parser/expression/sequence.rb +5 -10
  28. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  29. data/lib/regexp_parser/expression/shared.rb +37 -20
  30. data/lib/regexp_parser/expression/subexpression.rb +20 -15
  31. data/lib/regexp_parser/expression.rb +37 -31
  32. data/lib/regexp_parser/lexer.rb +76 -36
  33. data/lib/regexp_parser/parser.rb +107 -103
  34. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  35. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  36. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  37. data/lib/regexp_parser/scanner/properties/long.csv +29 -0
  38. data/lib/regexp_parser/scanner/properties/short.csv +3 -0
  39. data/lib/regexp_parser/scanner/property.rl +2 -2
  40. data/lib/regexp_parser/scanner/scanner.rl +101 -172
  41. data/lib/regexp_parser/scanner.rb +1171 -1365
  42. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  43. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  44. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  45. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  46. data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
  47. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  48. data/lib/regexp_parser/syntax/token.rb +13 -13
  49. data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
  50. data/lib/regexp_parser/syntax/versions.rb +3 -1
  51. data/lib/regexp_parser/syntax.rb +1 -1
  52. data/lib/regexp_parser/version.rb +1 -1
  53. data/lib/regexp_parser.rb +6 -6
  54. data/regexp_parser.gemspec +5 -5
  55. metadata +17 -8
  56. data/CHANGELOG.md +0 -601
  57. data/README.md +0 -503
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cadf1761e17469c6bf76db652a4f6fc97a3d33b7eaa46e6ea16f95ee6661743d
4
- data.tar.gz: 3d6252f67f201b3cb6a3b94721c65b39abfe7b13bf0097fc9144498f6fdf8837
3
+ metadata.gz: f6ed5457d89738fa1076cf3875cd2d009973f02857ea68e055ef3ef74a78dc91
4
+ data.tar.gz: d67eb5f0cb37ad106574b2ae327eefcfc13c9d585cddec6661898f4d8166ebcc
5
5
  SHA512:
6
- metadata.gz: 3fb24f56b5d8da354aa5825dc2e9432c7e8bd836c9c2a7009c8883e367fb8ca61020a04854c714cacff913281b1156b4663334696edcb1d7e9239d8c8184d439
7
- data.tar.gz: e793b72a9394e26bf0b9e6cb58c7536b72c30562382713f8b60735969f3b3b9b3aea78bf45efa661397d7141c2684a6df2b32cc8b449c413ea9d11c90c5396db
6
+ metadata.gz: 6b8adbc3c4707fc4c823456ae1d7547f17568802de03008a17fef18a5f95af08b0e42d48ccdfab25a740603a58ab89c036d70cec94405701201e5a5af51ce392
7
+ data.tar.gz: 9bea98a42ab64a9b45ddc5564cd077d7eb6d2ddc293844759bb8001aa9fefd8aa26b0e03fff7a286ccde9f7aeacacda9fbb187fe04082749d3c2605e0cece7b9
data/Gemfile CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
- gem 'ice_nine', '~> 0.11.2'
7
- gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.3'
6
+ gem 'leto', '~> 2.1'
7
+ gem 'rake', '~> 13.1'
8
+ gem 'regexp_property_values', '~> 1.5'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
11
  gem 'benchmark-ips', '~> 2.1'
12
- gem 'gouteur'
13
- gem 'rubocop', '~> 1.7'
12
+ gem 'gouteur', '~> 1.1'
13
+ gem 'rubocop', '~> 1.59'
14
14
  end
15
15
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2022, Ammar Ali
1
+ Copyright (c) 2010, 2012-2024, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
@@ -6,13 +6,6 @@ module Regexp::Expression
6
6
  init_from_token_and_options(token, options)
7
7
  end
8
8
 
9
- def initialize_copy(orig)
10
- self.text = orig.text.dup if orig.text
11
- self.options = orig.options.dup if orig.options
12
- self.quantifier = orig.quantifier.clone if orig.quantifier
13
- super
14
- end
15
-
16
9
  def to_re(format = :full)
17
10
  if set_level > 0
18
11
  warn "Calling #to_re on character set members is deprecated - "\
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
2
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
3
  class Alternative < Regexp::Expression::Sequence; end
4
4
 
5
5
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,21 +1,13 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :backref, one way or the other, in v3.0.0
3
2
  module Backreference
4
- class Base < Regexp::Expression::Base
5
- attr_accessor :referenced_expression
6
-
7
- def initialize_copy(orig)
8
- self.referenced_expression = orig.referenced_expression.dup
9
- super
10
- end
11
- end
3
+ class Base < Regexp::Expression::Base; end
12
4
 
13
5
  class Number < Backreference::Base
14
6
  attr_reader :number
15
7
  alias reference number
16
8
 
17
9
  def initialize(token, options = {})
18
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
10
+ @number = token.text[/-?\d+/].to_i
19
11
  super
20
12
  end
21
13
  end
@@ -58,4 +50,7 @@ module Regexp::Expression
58
50
  end
59
51
  end
60
52
  end
53
+
54
+ # alias for symmetry between token symbol and Expression class name
55
+ Backref = Backreference
61
56
  end
@@ -1,10 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
9
  complete? and raise Regexp::Parser::Error,
@@ -15,10 +14,6 @@ module Regexp::Expression
15
14
  def complete?
16
15
  count == 2
17
16
  end
18
-
19
- def parts
20
- intersperse(expressions, text.dup)
21
- end
22
17
  end
23
18
  end
24
19
  end
@@ -1,10 +1,7 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  attr_accessor :closed, :negative
4
-
5
- alias :negative? :negative
6
- alias :negated? :negative
7
- alias :closed? :closed
4
+ alias :closed? :closed
8
5
 
9
6
  def initialize(token, options = {})
10
7
  self.negative = false
@@ -19,9 +16,8 @@ module Regexp::Expression
19
16
  def close
20
17
  self.closed = true
21
18
  end
22
-
23
- def parts
24
- ["#{text}#{'^' if negated?}", *expressions, ']']
25
- end
26
19
  end
20
+
21
+ # alias for symmetry between token symbol and Expression class name
22
+ Set = CharacterSet
27
23
  end # module Regexp::Expression
@@ -7,33 +7,24 @@ module Regexp::Expression
7
7
  end
8
8
 
9
9
  class Condition < Regexp::Expression::Base
10
- attr_accessor :referenced_expression
11
-
12
10
  # Name or number of the referenced capturing group that determines state.
13
11
  # Returns a String if reference is by name, Integer if by number.
14
12
  def reference
15
13
  ref = text.tr("'<>()", "")
16
14
  ref =~ /\D/ ? ref : Integer(ref)
17
15
  end
18
-
19
- def initialize_copy(orig)
20
- self.referenced_expression = orig.referenced_expression.dup
21
- super
22
- end
23
16
  end
24
17
 
25
18
  class Branch < Regexp::Expression::Sequence; end
26
19
 
27
20
  class Expression < Regexp::Expression::Subexpression
28
- attr_accessor :referenced_expression
29
-
30
21
  def <<(exp)
31
22
  expressions.last << exp
32
23
  end
33
24
 
34
- def add_sequence(active_opts = {})
25
+ def add_sequence(active_opts = {}, params = { ts: 0 })
35
26
  raise TooManyBranches.new if branches.length == 2
36
- params = { conditional_level: conditional_level + 1 }
27
+ params = params.merge({ conditional_level: conditional_level + 1 })
37
28
  Branch.add_to(self, params, active_opts)
38
29
  end
39
30
  alias :branch :add_sequence
@@ -54,15 +45,6 @@ module Regexp::Expression
54
45
  def reference
55
46
  condition.reference
56
47
  end
57
-
58
- def parts
59
- [text.dup, condition, *intersperse(branches, '|'), ')']
60
- end
61
-
62
- def initialize_copy(orig)
63
- self.referenced_expression = orig.referenced_expression.dup
64
- super
65
- end
66
48
  end
67
49
  end
68
50
  end
@@ -1,100 +1,30 @@
1
1
  module Regexp::Expression
2
- # TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
3
2
  module EscapeSequence
4
- class Base < Regexp::Expression::Base
5
- def codepoint
6
- char.ord
7
- end
3
+ Base = Class.new(Regexp::Expression::Base)
8
4
 
9
- if ''.respond_to?(:undump)
10
- def char
11
- %("#{text}").undump
12
- end
13
- else
14
- # poor man's unescape without using eval
15
- require 'yaml'
16
- def char
17
- YAML.load(%Q(---\n"#{text}"\n))
18
- end
19
- end
20
- end
5
+ AsciiEscape = Class.new(Base) # \e
6
+ Backspace = Class.new(Base) # \b
7
+ Bell = Class.new(Base) # \a
8
+ FormFeed = Class.new(Base) # \f
9
+ Newline = Class.new(Base) # \n
10
+ Return = Class.new(Base) # \r
11
+ Tab = Class.new(Base) # \t
12
+ VerticalTab = Class.new(Base) # \v
21
13
 
22
- class Literal < EscapeSequence::Base
23
- def char
24
- text[1..-1]
25
- end
26
- end
14
+ Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
27
15
 
28
- class AsciiEscape < EscapeSequence::Base; end
29
- class Backspace < EscapeSequence::Base; end
30
- class Bell < EscapeSequence::Base; end
31
- class FormFeed < EscapeSequence::Base; end
32
- class Newline < EscapeSequence::Base; end
33
- class Return < EscapeSequence::Base; end
34
- class Tab < EscapeSequence::Base; end
35
- class VerticalTab < EscapeSequence::Base; end
16
+ Octal = Class.new(Base) # e.g. \012
17
+ Hex = Class.new(Base) # e.g. \x0A
18
+ Codepoint = Class.new(Base) # e.g. \u000A
36
19
 
37
- class Hex < EscapeSequence::Base; end
38
- class Codepoint < EscapeSequence::Base; end
20
+ CodepointList = Class.new(Base) # e.g. \u{A B}
39
21
 
40
- class CodepointList < EscapeSequence::Base
41
- def char
42
- raise NoMethodError, 'CodepointList responds only to #chars'
43
- end
44
-
45
- def codepoint
46
- raise NoMethodError, 'CodepointList responds only to #codepoints'
47
- end
48
-
49
- def chars
50
- codepoints.map { |cp| cp.chr('utf-8') }
51
- end
52
-
53
- def codepoints
54
- text.scan(/\h+/).map(&:hex)
55
- end
56
- end
57
-
58
- class Octal < EscapeSequence::Base
59
- def char
60
- text[1..-1].to_i(8).chr('utf-8')
61
- end
62
- end
63
-
64
- class AbstractMetaControlSequence < EscapeSequence::Base
65
- def char
66
- codepoint.chr('utf-8')
67
- end
68
-
69
- private
70
-
71
- def control_sequence_to_s(control_sequence)
72
- five_lsb = control_sequence.unpack('B*').first[-5..-1]
73
- ["000#{five_lsb}"].pack('B*')
74
- end
75
-
76
- def meta_char_to_codepoint(meta_char)
77
- byte_value = meta_char.ord
78
- byte_value < 128 ? byte_value + 128 : byte_value
79
- end
80
- end
81
-
82
- class Control < AbstractMetaControlSequence
83
- def codepoint
84
- control_sequence_to_s(text).ord
85
- end
86
- end
87
-
88
- class Meta < AbstractMetaControlSequence
89
- def codepoint
90
- meta_char_to_codepoint(text[-1])
91
- end
92
- end
93
-
94
- class MetaControl < AbstractMetaControlSequence
95
- def codepoint
96
- meta_char_to_codepoint(control_sequence_to_s(text))
97
- end
98
- end
22
+ AbstractMetaControlSequence = Class.new(Base)
23
+ Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
24
+ Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
25
+ MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
99
26
  end
27
+
28
+ # alias for symmetry between Token::* and Expression::*
29
+ Escape = EscapeSequence
100
30
  end
@@ -5,10 +5,12 @@ module Regexp::Expression
5
5
  end
6
6
  end
7
7
 
8
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
9
10
 
10
11
  class WhiteSpace < Regexp::Expression::FreeSpace
11
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
12
14
  text << exp.text
13
15
  end
14
16
  end
@@ -1,13 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def parts
5
- [text.dup, *expressions, ')']
6
- end
7
-
8
- def capturing?; false end
9
-
10
- def comment?; false end
11
4
  end
12
5
 
13
6
  class Passive < Group::Base
@@ -18,14 +11,6 @@ module Regexp::Expression
18
11
  super
19
12
  end
20
13
 
21
- def parts
22
- if implicit?
23
- expressions
24
- else
25
- super
26
- end
27
- end
28
-
29
14
  def implicit?
30
15
  @implicit
31
16
  end
@@ -55,8 +40,6 @@ module Regexp::Expression
55
40
  class Capture < Group::Base
56
41
  attr_accessor :number, :number_at_level
57
42
  alias identifier number
58
-
59
- def capturing?; true end
60
43
  end
61
44
 
62
45
  class Named < Group::Capture
@@ -75,11 +58,6 @@ module Regexp::Expression
75
58
  end
76
59
 
77
60
  class Comment < Group::Base
78
- def parts
79
- [text.dup]
80
- end
81
-
82
- def comment?; true end
83
61
  end
84
62
  end
85
63
 
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
- # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
3
+ # TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
4
  # that contains all expressions to its left.
5
5
  class Mark < Regexp::Expression::Base; end
6
6
  end
@@ -1,11 +1,11 @@
1
1
  module Regexp::Expression
2
2
  class PosixClass < Regexp::Expression::Base
3
- def negative?
4
- type == :nonposixclass
5
- end
6
-
7
3
  def name
8
- token.to_s
4
+ text[/\w+/]
9
5
  end
10
6
  end
7
+
8
+ # alias for symmetry between token symbol and Expression class name
9
+ Posixclass = PosixClass
10
+ Nonposixclass = PosixClass
11
11
  end
@@ -1,17 +1,12 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :property, one way or the other, in v3.0.0
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
- def negative?
6
- type == :nonproperty
7
- end
8
-
9
4
  def name
10
5
  text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
6
  end
12
7
 
13
8
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
9
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
10
  end
16
11
  end
17
12
 
@@ -110,10 +105,15 @@ module Regexp::Expression
110
105
  class Unassigned < Codepoint::Base; end
111
106
  end
112
107
 
113
- class Age < UnicodeProperty::Base; end
114
- class Derived < UnicodeProperty::Base; end
115
- class Emoji < UnicodeProperty::Base; end
116
- class Script < UnicodeProperty::Base; end
117
- class Block < UnicodeProperty::Base; end
108
+ class Age < UnicodeProperty::Base; end
109
+ class Block < UnicodeProperty::Base; end
110
+ class Derived < UnicodeProperty::Base; end
111
+ class Emoji < UnicodeProperty::Base; end
112
+ class Enumerated < UnicodeProperty::Base; end
113
+ class Script < UnicodeProperty::Base; end
118
114
  end
115
+
116
+ # alias for symmetry between token symbol and Expression class name
117
+ Property = UnicodeProperty
118
+ Nonproperty = UnicodeProperty
119
119
  end # module Regexp::Expression
@@ -25,11 +25,9 @@ module Regexp::Expression
25
25
  def token_class
26
26
  if self == Root || self < Sequence
27
27
  nil # no token class because these objects are Parser-generated
28
- # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
- elsif self == Alternation || self == CharacterType::Any
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
30
  Regexp::Syntax::Token::Meta
31
- elsif self <= EscapeSequence::Base
32
- Regexp::Syntax::Token::Escape
33
31
  else
34
32
  Regexp::Syntax::Token.const_get(name.split('::')[2])
35
33
  end
@@ -0,0 +1,5 @@
1
+ Regexp::Expression::EscapeSequence::Base.class_eval do
2
+ def char
3
+ codepoint.chr('utf-8')
4
+ end
5
+ end
@@ -0,0 +1,68 @@
1
+ module Regexp::Expression::EscapeSequence
2
+ AsciiEscape.class_eval { def codepoint; 0x1B end }
3
+ Backspace.class_eval { def codepoint; 0x8 end }
4
+ Bell.class_eval { def codepoint; 0x7 end }
5
+ FormFeed.class_eval { def codepoint; 0xC end }
6
+ Newline.class_eval { def codepoint; 0xA end }
7
+ Return.class_eval { def codepoint; 0xD end }
8
+ Tab.class_eval { def codepoint; 0x9 end }
9
+ VerticalTab.class_eval { def codepoint; 0xB end }
10
+
11
+ Literal.class_eval { def codepoint; text[1].ord end }
12
+
13
+ Octal.class_eval { def codepoint; text[/\d+/].to_i(8) end }
14
+
15
+ Hex.class_eval { def codepoint; text[/\h+/].hex end }
16
+ Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
17
+
18
+ CodepointList.class_eval do
19
+ # Maybe this should be a unique top-level expression class?
20
+ def char
21
+ raise NoMethodError, 'CodepointList responds only to #chars'
22
+ end
23
+
24
+ def codepoint
25
+ raise NoMethodError, 'CodepointList responds only to #codepoints'
26
+ end
27
+
28
+ def chars
29
+ codepoints.map { |cp| cp.chr('utf-8') }
30
+ end
31
+
32
+ def codepoints
33
+ text.scan(/\h+/).map(&:hex)
34
+ end
35
+ end
36
+
37
+ AbstractMetaControlSequence.class_eval do
38
+ private
39
+
40
+ def control_sequence_to_s(control_sequence)
41
+ five_lsb = control_sequence.unpack('B*').first[-5..-1]
42
+ ["000#{five_lsb}"].pack('B*')
43
+ end
44
+
45
+ def meta_char_to_codepoint(meta_char)
46
+ byte_value = meta_char.ord
47
+ byte_value < 128 ? byte_value + 128 : byte_value
48
+ end
49
+ end
50
+
51
+ Control.class_eval do
52
+ def codepoint
53
+ control_sequence_to_s(text).ord
54
+ end
55
+ end
56
+
57
+ Meta.class_eval do
58
+ def codepoint
59
+ meta_char_to_codepoint(text[-1])
60
+ end
61
+ end
62
+
63
+ MetaControl.class_eval do
64
+ def codepoint
65
+ meta_char_to_codepoint(control_sequence_to_s(text))
66
+ end
67
+ end
68
+ end
@@ -63,16 +63,20 @@ class Regexp::MatchLength
63
63
  end
64
64
 
65
65
  def to_re
66
- "(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
66
+ /(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
67
67
  end
68
68
 
69
69
  private
70
70
 
71
71
  attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
72
72
 
73
- def test_regexp
74
- @test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
75
- regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
73
+ if Regexp.method_defined?(:match?) # ruby >= 2.4
74
+ def test_regexp
75
+ @test_regexp ||= /^#{to_re}$/
76
+ end
77
+ else
78
+ def test_regexp
79
+ @test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
76
80
  end
77
81
  end
78
82
  end
@@ -0,0 +1,20 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def negative?
4
+ false
5
+ end
6
+
7
+ # not an alias so as to respect overrides of #negative?
8
+ def negated?
9
+ negative?
10
+ end
11
+ end
12
+
13
+ Anchor::NonWordBoundary.class_eval { def negative?; true end }
14
+ Assertion::NegativeLookahead.class_eval { def negative?; true end }
15
+ Assertion::NegativeLookbehind.class_eval { def negative?; true end }
16
+ CharacterSet.class_eval { def negative?; negative end }
17
+ CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
18
+ PosixClass.class_eval { def negative?; type == :nonposixclass end }
19
+ UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
20
+ end
@@ -0,0 +1,23 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation
4
+ def parts
5
+ [text.dup]
6
+ end
7
+
8
+ private
9
+
10
+ def intersperse(expressions, separator)
11
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
12
+ end
13
+ end
14
+
15
+ CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
16
+ CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
17
+ Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
18
+ Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
19
+ Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
20
+ Group::Comment.class_eval { def parts; [text.dup] end }
21
+ Subexpression.class_eval { def parts; expressions end }
22
+ SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
23
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def inspect
4
+ [
5
+ "#<#{self.class}",
6
+ pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
7
+ ">"
8
+ ].join
9
+ end
10
+
11
+ # Make pretty-print work despite #inspect implementation.
12
+ def pretty_print(q)
13
+ q.pp_object(self)
14
+ end
15
+
16
+ # Called by pretty_print (ruby/pp) and #inspect.
17
+ def pretty_print_instance_variables
18
+ [
19
+ (:@text unless text.to_s.empty?),
20
+ (:@quantifier if quantified?),
21
+ (:@options unless options.empty?),
22
+ (:@expressions unless terminal?),
23
+ ].compact
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,28 @@
1
+ module Regexp::Expression
2
+ module ReferencedExpressions
3
+ attr_accessor :referenced_expressions
4
+
5
+ def referenced_expression
6
+ referenced_expressions && referenced_expressions.first
7
+ end
8
+
9
+ def initialize_copy(orig)
10
+ exp_id = [self.class, self.starts_at]
11
+
12
+ # prevent infinite recursion for recursive subexp calls
13
+ copied = self.class.instance_eval { @copied_ref_exps ||= {} }
14
+ self.referenced_expressions =
15
+ if copied[exp_id]
16
+ orig.referenced_expressions
17
+ else
18
+ copied[exp_id] = true
19
+ orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
20
+ end
21
+ copied.clear
22
+
23
+ super
24
+ end
25
+ end
26
+
27
+ Base.include ReferencedExpressions
28
+ end