regexp_parser 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,8 @@ module Regexp::Syntax
3
3
  # is useful during development, testing, and should be useful for some types
4
4
  # of transformations as well.
5
5
  class Any < Base
6
- def initialize # rubocop:disable Lint/MissingSuper
7
- @implements = { :* => %i[*] }
8
- end
6
+ implements :*, [:*]
9
7
 
10
- def implements?(_type, _token) true end
11
- def implements!(_type, _token) true end
8
+ def self.implements?(_type, _token) true end
12
9
  end
13
10
  end
@@ -1,7 +1,7 @@
1
1
  module Regexp::Syntax
2
2
  class NotImplementedError < Regexp::Syntax::SyntaxError
3
3
  def initialize(syntax, type, token)
4
- super "#{syntax.class.name} does not implement: [#{type}:#{token}]"
4
+ super "#{syntax} does not implement: [#{type}:#{token}]"
5
5
  end
6
6
  end
7
7
 
@@ -9,85 +9,112 @@ module Regexp::Syntax
9
9
  class Base
10
10
  include Regexp::Syntax::Token
11
11
 
12
- def initialize
13
- @implements = {}
12
+ class << self
13
+ attr_accessor :features
14
14
 
15
- implements Token::Literal::Type, Token::Literal::All
16
- implements Token::FreeSpace::Type, Token::FreeSpace::All
17
- end
15
+ # automatically inherit features through the syntax class hierarchy
16
+ def inherited(subclass)
17
+ super
18
+ subclass.features = features.to_h.map { |k, v| [k, v.dup] }.to_h
19
+ end
18
20
 
19
- def features
20
- @implements
21
- end
21
+ def implements(type, tokens)
22
+ (features[type] ||= []).concat(tokens)
23
+ added_features[type] = tokens
24
+ end
22
25
 
23
- def implementations(type)
24
- @implements[type] ||= []
25
- end
26
+ def excludes(type, tokens)
27
+ tokens.each { |tok| features[type].delete(tok) }
28
+ removed_features[type] = tokens
29
+ end
26
30
 
27
- def implements(type, tokens)
28
- implementations(type).concat(Array(tokens))
29
- end
31
+ def implements?(type, token)
32
+ implementations(type).include?(token)
33
+ end
34
+ alias :check? :implements?
30
35
 
31
- def excludes(type, tokens)
32
- Array(tokens).each { |tok| implementations(type).delete(tok) }
33
- end
36
+ def implementations(type)
37
+ features[type] || []
38
+ end
34
39
 
35
- def implements?(type, token)
36
- implementations(type).include?(token)
37
- end
38
- alias :check? :implements?
40
+ def implements!(type, token)
41
+ raise NotImplementedError.new(self, type, token) unless
42
+ implements?(type, token)
43
+ end
44
+ alias :check! :implements!
39
45
 
40
- def implements!(type, token)
41
- raise NotImplementedError.new(self, type, token) unless
42
- implements?(type, token)
43
- end
44
- alias :check! :implements!
45
-
46
- def normalize(type, token)
47
- case type
48
- when :group
49
- normalize_group(type, token)
50
- when :backref
51
- normalize_backref(type, token)
52
- else
53
- [type, token]
46
+ def added_features
47
+ @added_features ||= {}
54
48
  end
55
- end
56
49
 
57
- def normalize_group(type, token)
58
- case token
59
- when :named_ab, :named_sq
60
- %i[group named]
61
- else
62
- [type, token]
50
+ def removed_features
51
+ @removed_features ||= {}
63
52
  end
53
+
54
+ def normalize(type, token)
55
+ case type
56
+ when :group
57
+ normalize_group(type, token)
58
+ when :backref
59
+ normalize_backref(type, token)
60
+ else
61
+ [type, token]
62
+ end
63
+ end
64
+
65
+ def normalize_group(type, token)
66
+ case token
67
+ when :named_ab, :named_sq
68
+ %i[group named]
69
+ else
70
+ [type, token]
71
+ end
72
+ end
73
+
74
+ def normalize_backref(type, token)
75
+ case token
76
+ when :name_ref_ab, :name_ref_sq
77
+ %i[backref name_ref]
78
+ when :name_call_ab, :name_call_sq
79
+ %i[backref name_call]
80
+ when :name_recursion_ref_ab, :name_recursion_ref_sq
81
+ %i[backref name_recursion_ref]
82
+ when :number_ref_ab, :number_ref_sq
83
+ %i[backref number_ref]
84
+ when :number_call_ab, :number_call_sq
85
+ %i[backref number_call]
86
+ when :number_rel_ref_ab, :number_rel_ref_sq
87
+ %i[backref number_rel_ref]
88
+ when :number_rel_call_ab, :number_rel_call_sq
89
+ %i[backref number_rel_call]
90
+ when :number_recursion_ref_ab, :number_recursion_ref_sq
91
+ %i[backref number_recursion_ref]
92
+ else
93
+ [type, token]
94
+ end
95
+ end
96
+ end
97
+
98
+ # TODO: drop this backwards compatibility code in v3.0.0, do `private :new`
99
+ def initialize
100
+ warn 'Using instances of Regexp::Parser::Syntax is deprecated ' \
101
+ "and will no longer be supported in v3.0.0."
64
102
  end
65
103
 
66
- def normalize_backref(type, token)
67
- case token
68
- when :name_ref_ab, :name_ref_sq
69
- %i[backref name_ref]
70
- when :name_call_ab, :name_call_sq
71
- %i[backref name_call]
72
- when :name_recursion_ref_ab, :name_recursion_ref_sq
73
- %i[backref name_recursion_ref]
74
- when :number_ref_ab, :number_ref_sq
75
- %i[backref number_ref]
76
- when :number_call_ab, :number_call_sq
77
- %i[backref number_call]
78
- when :number_rel_ref_ab, :number_rel_ref_sq
79
- %i[backref number_rel_ref]
80
- when :number_rel_call_ab, :number_rel_call_sq
81
- %i[backref number_rel_call]
82
- when :number_recursion_ref_ab, :number_recursion_ref_sq
83
- %i[backref number_recursion_ref]
104
+ def method_missing(name, *args)
105
+ if self.class.respond_to?(name)
106
+ warn 'Using instances of Regexp::Parser::Syntax is deprecated ' \
107
+ "and will no longer be supported in v3.0.0. Please call "\
108
+ "methods on the class directly, e.g.: #{self.class}.#{name}"
109
+ self.class.send(name, *args)
84
110
  else
85
- [type, token]
111
+ super
86
112
  end
87
113
  end
88
114
 
89
- def self.inspect
90
- "#{super} (feature set of #{ancestors[1].to_s.split('::').last})"
115
+ def respond_to_missing?(name, include_private = false)
116
+ self.class.respond_to?(name) || super
91
117
  end
118
+ # end of backwards compatibility code
92
119
  end
93
120
  end
@@ -1,7 +1,7 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Quantifier
4
- Greedy = %i[
4
+ Greedy = %i[
5
5
  zero_or_one
6
6
  zero_or_more
7
7
  one_or_more
@@ -13,7 +13,7 @@ module Regexp::Syntax
13
13
  one_or_more_reluctant
14
14
  ]
15
15
 
16
- Possessive = %i[
16
+ Possessive = %i[
17
17
  zero_or_one_possessive
18
18
  zero_or_more_possessive
19
19
  one_or_more_possessive
@@ -23,9 +23,9 @@ module Regexp::Syntax
23
23
  IntervalReluctant = %i[interval_reluctant]
24
24
  IntervalPossessive = %i[interval_possessive]
25
25
 
26
- IntervalAll = Interval + IntervalReluctant +
27
- IntervalPossessive
26
+ IntervalAll = Interval + IntervalReluctant + IntervalPossessive
28
27
 
28
+ V1_8_6 = Greedy + Reluctant + Interval + IntervalReluctant
29
29
  All = Greedy + Reluctant + Possessive + IntervalAll
30
30
  Type = :quantifier
31
31
  end
@@ -59,6 +59,8 @@ module Regexp::Syntax
59
59
 
60
60
  Age_V3_1_0 = %i[age=13.0]
61
61
 
62
+ Age_V3_2_0 = %i[age=14.0]
63
+
62
64
  Age = all[:Age_V]
63
65
 
64
66
  Derived_V1_9_0 = %i[
@@ -322,6 +324,14 @@ module Regexp::Syntax
322
324
  yezidi
323
325
  ]
324
326
 
327
+ Script_V3_2_0 = %i[
328
+ cypro_minoan
329
+ old_uyghur
330
+ tangsa
331
+ toto
332
+ vithkuqi
333
+ ]
334
+
325
335
  Script = all[:Script_V]
326
336
 
327
337
  UnicodeBlock_V1_9_0 = %i[
@@ -660,6 +670,21 @@ module Regexp::Syntax
660
670
  in_yezidi
661
671
  ]
662
672
 
673
+ UnicodeBlock_V3_2_0 = %i[
674
+ in_arabic_extended_b
675
+ in_cypro_minoan
676
+ in_ethiopic_extended_b
677
+ in_kana_extended_b
678
+ in_latin_extended_f
679
+ in_latin_extended_g
680
+ in_old_uyghur
681
+ in_tangsa
682
+ in_toto
683
+ in_unified_canadian_aboriginal_syllabics_extended_a
684
+ in_vithkuqi
685
+ in_znamenny_musical_notation
686
+ ]
687
+
663
688
  UnicodeBlock = all[:UnicodeBlock_V]
664
689
 
665
690
  Emoji_V2_5_0 = %i[
@@ -683,6 +708,7 @@ module Regexp::Syntax
683
708
  V2_6_2 = all[:V2_6_2]
684
709
  V2_6_3 = all[:V2_6_3]
685
710
  V3_1_0 = all[:V3_1_0]
711
+ V3_2_0 = all[:V3_2_0]
686
712
 
687
713
  All = all[/^V\d+_\d+_\d+$/]
688
714
 
@@ -17,29 +17,31 @@ module Regexp::Syntax
17
17
 
18
18
  module_function
19
19
 
20
- # Loads and instantiates an instance of the syntax specification class for
21
- # the given syntax version name. The special names 'any' and '*' return an
22
- # instance of Syntax::Any.
20
+ # Returns the syntax specification class for the given syntax
21
+ # version name. The special names 'any' and '*' return Syntax::Any.
22
+ def for(name)
23
+ (@alias_map ||= {})[name] ||= version_class(name)
24
+ end
25
+
23
26
  def new(name)
24
- return Regexp::Syntax::Any.new if ['*', 'any'].include?(name.to_s)
25
- version_class(name).new
27
+ warn 'Regexp::Syntax.new is deprecated in favor of Regexp::Syntax.for. '\
28
+ 'It does not return distinct instances and will be removed in v3.0.0.'
29
+ self.for(name)
26
30
  end
27
31
 
28
32
  def supported?(name)
29
- name =~ VERSION_REGEXP &&
30
- comparable_version(name) >= comparable_version('1.8.6')
33
+ name =~ VERSION_REGEXP && comparable(name) >= comparable('1.8.6')
31
34
  end
32
35
 
33
36
  def version_class(version)
37
+ return Regexp::Syntax::Any if ['*', 'any'].include?(version.to_s)
38
+
34
39
  version =~ VERSION_REGEXP || raise(InvalidVersionNameError, version)
35
- version_const_name = version_const_name(version)
40
+ warn_if_future_version(version)
41
+ version_const_name = "V#{version.to_s.scan(/\d+/).join('_')}"
36
42
  const_get(version_const_name) || raise(UnknownSyntaxNameError, version)
37
43
  end
38
44
 
39
- def version_const_name(version_string)
40
- "V#{version_string.to_s.scan(/\d+/).join('_')}"
41
- end
42
-
43
45
  def const_missing(const_name)
44
46
  if const_name =~ VERSION_CONST_REGEXP
45
47
  return fallback_version_class(const_name)
@@ -48,35 +50,24 @@ module Regexp::Syntax
48
50
  end
49
51
 
50
52
  def fallback_version_class(version)
51
- sorted_versions = (specified_versions + [version])
52
- .sort_by { |name| comparable_version(name) }
53
- return if (version_index = sorted_versions.index(version)) < 1
54
-
55
- next_lower_version = sorted_versions[version_index - 1]
56
- inherit_from_version(next_lower_version, version)
57
- end
58
-
59
- def inherit_from_version(parent_version, new_version)
60
- new_const = version_const_name(new_version)
61
- parent = const_get(version_const_name(parent_version))
62
- const_defined?(new_const) || const_set(new_const, Class.new(parent))
63
- warn_if_future_version(new_const)
64
- const_get(new_const)
53
+ sorted = (specified_versions + [version]).sort_by { |ver| comparable(ver) }
54
+ index = sorted.index(version)
55
+ index > 0 && const_get(sorted[index - 1])
65
56
  end
66
57
 
67
58
  def specified_versions
68
59
  constants.select { |const_name| const_name =~ VERSION_CONST_REGEXP }
69
60
  end
70
61
 
71
- def comparable_version(name)
62
+ def comparable(name)
72
63
  # add .99 to treat versions without a patch value as latest patch version
73
64
  Gem::Version.new((name.to_s.scan(/\d+/) << 99).join('.'))
74
65
  end
75
66
 
76
67
  def warn_if_future_version(const_name)
77
- return if comparable_version(const_name) < comparable_version('4.0.0')
68
+ return if comparable(const_name) < comparable('4.0.0')
78
69
 
79
70
  warn('This library has only been tested up to Ruby 3.x, '\
80
- "but you are running with #{const_get(const_name).inspect}")
71
+ "but you are running with #{const_name}")
81
72
  end
82
73
  end
@@ -1,21 +1,14 @@
1
- module Regexp::Syntax
2
- class V1_8_6 < Regexp::Syntax::Base
3
- def initialize
4
- super
5
-
6
- implements :anchor, Anchor::All
7
- implements :assertion, Assertion::Lookahead
8
- implements :backref, Backreference::V1_8_6
9
- implements :posixclass, PosixClass::Standard
10
- implements :group, Group::V1_8_6
11
- implements :meta, Meta::Extended
12
- implements :set, CharacterSet::All
13
- implements :type, CharacterType::Extended
14
- implements :escape,
15
- Escape::Basic + Escape::ASCII + Escape::Meta + Escape::Control
16
- implements :quantifier,
17
- Quantifier::Greedy + Quantifier::Reluctant +
18
- Quantifier::Interval + Quantifier::IntervalReluctant
19
- end
20
- end
1
+ class Regexp::Syntax::V1_8_6 < Regexp::Syntax::Base
2
+ implements :anchor, Anchor::All
3
+ implements :assertion, Assertion::Lookahead
4
+ implements :backref, Backreference::V1_8_6
5
+ implements :escape, Escape::Basic + Escape::ASCII + Escape::Meta + Escape::Control
6
+ implements :free_space, FreeSpace::All
7
+ implements :group, Group::V1_8_6
8
+ implements :literal, Literal::All
9
+ implements :meta, Meta::Extended
10
+ implements :posixclass, PosixClass::Standard
11
+ implements :quantifier, Quantifier::V1_8_6
12
+ implements :set, CharacterSet::All
13
+ implements :type, CharacterType::Extended
21
14
  end
@@ -1,18 +1,11 @@
1
- module Regexp::Syntax
2
- class V1_9_1 < Regexp::Syntax::V1_8_6
3
- def initialize
4
- super
5
-
6
- implements :assertion, Assertion::Lookbehind
7
- implements :backref, Backreference::V1_9_1 + SubexpressionCall::All
8
- implements :posixclass, PosixClass::Extensions
9
- implements :nonposixclass, PosixClass::All
10
- implements :escape, Escape::Unicode + Escape::Hex + Escape::Octal
11
- implements :type, CharacterType::Hex
12
- implements :property, UnicodeProperty::V1_9_0
13
- implements :nonproperty, UnicodeProperty::V1_9_0
14
- implements :quantifier,
15
- Quantifier::Possessive + Quantifier::IntervalPossessive
16
- end
17
- end
1
+ class Regexp::Syntax::V1_9_1 < Regexp::Syntax::V1_8_6
2
+ implements :assertion, Assertion::Lookbehind
3
+ implements :backref, Backreference::V1_9_1 + SubexpressionCall::All
4
+ implements :escape, Escape::Unicode + Escape::Hex + Escape::Octal
5
+ implements :posixclass, PosixClass::Extensions
6
+ implements :nonposixclass, PosixClass::All
7
+ implements :property, UnicodeProperty::V1_9_0
8
+ implements :nonproperty, UnicodeProperty::V1_9_0
9
+ implements :quantifier, Quantifier::Possessive + Quantifier::IntervalPossessive
10
+ implements :type, CharacterType::Hex
18
11
  end
@@ -1,11 +1,4 @@
1
- module Regexp::Syntax
2
- class V1_9_3 < Regexp::Syntax::V1_9_1
3
- def initialize
4
- super
5
-
6
- # these were added with update of Oniguruma to Unicode 6.0
7
- implements :property, UnicodeProperty::V1_9_3
8
- implements :nonproperty, UnicodeProperty::V1_9_3
9
- end
10
- end
1
+ class Regexp::Syntax::V1_9_3 < Regexp::Syntax::V1_9_1
2
+ implements :property, UnicodeProperty::V1_9_3
3
+ implements :nonproperty, UnicodeProperty::V1_9_3
11
4
  end
@@ -1,17 +1,10 @@
1
- module Regexp::Syntax
2
- # use the last 1.9 release as the base
3
- class V2_0_0 < Regexp::Syntax::V1_9
4
- def initialize
5
- super
1
+ class Regexp::Syntax::V2_0_0 < Regexp::Syntax::V1_9_3
2
+ implements :keep, Keep::All
3
+ implements :conditional, Conditional::All
4
+ implements :property, UnicodeProperty::V2_0_0
5
+ implements :nonproperty, UnicodeProperty::V2_0_0
6
+ implements :type, CharacterType::Clustered
6
7
 
7
- implements :keep, Keep::All
8
- implements :conditional, Conditional::All
9
- implements :property, UnicodeProperty::V2_0_0
10
- implements :nonproperty, UnicodeProperty::V2_0_0
11
- implements :type, CharacterType::Clustered
12
-
13
- excludes :property, :newline
14
- excludes :nonproperty, :newline
15
- end
16
- end
8
+ excludes :property, %i[newline]
9
+ excludes :nonproperty, %i[newline]
17
10
  end
@@ -1,10 +1,4 @@
1
- module Regexp::Syntax
2
- class V2_2_0 < Regexp::Syntax::V2_1
3
- def initialize
4
- super
5
-
6
- implements :property, UnicodeProperty::V2_2_0
7
- implements :nonproperty, UnicodeProperty::V2_2_0
8
- end
9
- end
1
+ class Regexp::Syntax::V2_2_0 < Regexp::Syntax::V2_0_0
2
+ implements :property, UnicodeProperty::V2_2_0
3
+ implements :nonproperty, UnicodeProperty::V2_2_0
10
4
  end
@@ -1,10 +1,4 @@
1
- module Regexp::Syntax
2
- class V2_3_0 < Regexp::Syntax::V2_2
3
- def initialize
4
- super
5
-
6
- implements :property, UnicodeProperty::V2_3_0
7
- implements :nonproperty, UnicodeProperty::V2_3_0
8
- end
9
- end
1
+ class Regexp::Syntax::V2_3_0 < Regexp::Syntax::V2_2_0
2
+ implements :property, UnicodeProperty::V2_3_0
3
+ implements :nonproperty, UnicodeProperty::V2_3_0
10
4
  end
@@ -1,10 +1,4 @@
1
- module Regexp::Syntax
2
- class V2_4_0 < Regexp::Syntax::V2_3
3
- def initialize
4
- super
5
-
6
- implements :property, UnicodeProperty::V2_4_0
7
- implements :nonproperty, UnicodeProperty::V2_4_0
8
- end
9
- end
1
+ class Regexp::Syntax::V2_4_0 < Regexp::Syntax::V2_3_0
2
+ implements :property, UnicodeProperty::V2_4_0
3
+ implements :nonproperty, UnicodeProperty::V2_4_0
10
4
  end
@@ -1,9 +1,3 @@
1
- module Regexp::Syntax
2
- class V2_4_1 < Regexp::Syntax::V2_4_0
3
- def initialize
4
- super
5
-
6
- implements :group, Group::V2_4_1
7
- end
8
- end
1
+ class Regexp::Syntax::V2_4_1 < Regexp::Syntax::V2_4_0
2
+ implements :group, Group::V2_4_1
9
3
  end
@@ -1,10 +1,4 @@
1
- module Regexp::Syntax
2
- class V2_5_0 < Regexp::Syntax::V2_4
3
- def initialize
4
- super
5
-
6
- implements :property, UnicodeProperty::V2_5_0
7
- implements :nonproperty, UnicodeProperty::V2_5_0
8
- end
9
- end
1
+ class Regexp::Syntax::V2_5_0 < Regexp::Syntax::V2_4_1
2
+ implements :property, UnicodeProperty::V2_5_0
3
+ implements :nonproperty, UnicodeProperty::V2_5_0
10
4
  end
@@ -1,10 +1,4 @@
1
- module Regexp::Syntax
2
- class V2_6_0 < Regexp::Syntax::V2_5
3
- def initialize
4
- super
5
-
6
- implements :property, UnicodeProperty::V2_6_0
7
- implements :nonproperty, UnicodeProperty::V2_6_0
8
- end
9
- end
1
+ class Regexp::Syntax::V2_6_0 < Regexp::Syntax::V2_5_0
2
+ implements :property, UnicodeProperty::V2_6_0
3
+ implements :nonproperty, UnicodeProperty::V2_6_0
10
4
  end
@@ -1,10 +1,4 @@
1
- module Regexp::Syntax
2
- class V2_6_2 < Regexp::Syntax::V2_6_0
3
- def initialize
4
- super
5
-
6
- implements :property, UnicodeProperty::V2_6_2
7
- implements :nonproperty, UnicodeProperty::V2_6_2
8
- end
9
- end
1
+ class Regexp::Syntax::V2_6_2 < Regexp::Syntax::V2_6_0
2
+ implements :property, UnicodeProperty::V2_6_2
3
+ implements :nonproperty, UnicodeProperty::V2_6_2
10
4
  end
@@ -1,10 +1,4 @@
1
- module Regexp::Syntax
2
- class V2_6_3 < Regexp::Syntax::V2_6_2
3
- def initialize
4
- super
5
-
6
- implements :property, UnicodeProperty::V2_6_3
7
- implements :nonproperty, UnicodeProperty::V2_6_3
8
- end
9
- end
1
+ class Regexp::Syntax::V2_6_3 < Regexp::Syntax::V2_6_2
2
+ implements :property, UnicodeProperty::V2_6_3
3
+ implements :nonproperty, UnicodeProperty::V2_6_3
10
4
  end
@@ -1,10 +1,4 @@
1
- module Regexp::Syntax
2
- class V3_1_0 < Regexp::Syntax::V2_6_3
3
- def initialize
4
- super
5
-
6
- implements :property, UnicodeProperty::V3_1_0
7
- implements :nonproperty, UnicodeProperty::V3_1_0
8
- end
9
- end
1
+ class Regexp::Syntax::V3_1_0 < Regexp::Syntax::V2_6_3
2
+ implements :property, UnicodeProperty::V3_1_0
3
+ implements :nonproperty, UnicodeProperty::V3_1_0
10
4
  end
@@ -0,0 +1,4 @@
1
+ class Regexp::Syntax::V3_2_0 < Regexp::Syntax::V3_1_0
2
+ implements :property, UnicodeProperty::V3_2_0
3
+ implements :nonproperty, UnicodeProperty::V3_2_0
4
+ end
@@ -1,4 +1,4 @@
1
- # Ruby 1.8.x is no longer a supported runtime,
1
+ # Ruby 1.x is no longer a supported runtime,
2
2
  # but its regex features are still recognized.
3
3
  #
4
4
  # Aliases for the latest patch version are provided as 'ruby/n.n',
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '2.2.1'
3
+ VERSION = '2.3.0'
4
4
  end
5
5
  end