regexp_parser 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +54 -0
  3. data/Gemfile +5 -1
  4. data/README.md +15 -21
  5. data/Rakefile +11 -17
  6. data/lib/regexp_parser/error.rb +4 -0
  7. data/lib/regexp_parser/expression/base.rb +123 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  15. data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
  16. data/lib/regexp_parser/expression/classes/group.rb +6 -1
  17. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  18. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  19. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  20. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  21. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  22. data/lib/regexp_parser/expression/sequence.rb +3 -10
  23. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  24. data/lib/regexp_parser/expression.rb +7 -130
  25. data/lib/regexp_parser/lexer.rb +7 -5
  26. data/lib/regexp_parser/parser.rb +282 -334
  27. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  28. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  29. data/lib/regexp_parser/scanner/scanner.rl +64 -87
  30. data/lib/regexp_parser/scanner.rb +1024 -1073
  31. data/lib/regexp_parser/syntax/any.rb +2 -4
  32. data/lib/regexp_parser/syntax/base.rb +10 -10
  33. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  34. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  35. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  36. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  37. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  38. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  39. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  41. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  42. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  44. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  45. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  46. data/lib/regexp_parser/syntax/token.rb +45 -0
  47. data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
  48. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  49. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  50. data/lib/regexp_parser/syntax.rb +8 -6
  51. data/lib/regexp_parser/token.rb +9 -20
  52. data/lib/regexp_parser/version.rb +1 -1
  53. data/lib/regexp_parser.rb +0 -2
  54. data/spec/expression/clone_spec.rb +36 -4
  55. data/spec/expression/free_space_spec.rb +2 -2
  56. data/spec/expression/methods/match_length_spec.rb +2 -2
  57. data/spec/lexer/nesting_spec.rb +2 -2
  58. data/spec/lexer/refcalls_spec.rb +5 -0
  59. data/spec/parser/all_spec.rb +2 -2
  60. data/spec/parser/escapes_spec.rb +43 -31
  61. data/spec/parser/properties_spec.rb +6 -4
  62. data/spec/parser/refcalls_spec.rb +5 -0
  63. data/spec/parser/set/ranges_spec.rb +26 -16
  64. data/spec/scanner/escapes_spec.rb +29 -20
  65. data/spec/scanner/refcalls_spec.rb +19 -0
  66. data/spec/scanner/sets_spec.rb +66 -23
  67. data/spec/spec_helper.rb +13 -1
  68. data/spec/support/capturing_stderr.rb +9 -0
  69. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  70. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  71. data/spec/syntax/versions/aliases_spec.rb +1 -0
  72. metadata +27 -26
  73. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  74. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  75. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  76. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  77. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  78. data/spec/support/runner.rb +0 -42
  79. data/spec/support/warning_extractor.rb +0 -60
@@ -1,129 +1,6 @@
1
- module Regexp::Expression
2
-
3
- class Base
4
- attr_accessor :type, :token
5
- attr_accessor :text, :ts
6
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
7
-
8
- attr_accessor :quantifier
9
- attr_accessor :options
10
-
11
- def initialize(token, options = {})
12
- self.type = token.type
13
- self.token = token.token
14
- self.text = token.text
15
- self.ts = token.ts
16
- self.level = token.level
17
- self.set_level = token.set_level
18
- self.conditional_level = token.conditional_level
19
- self.nesting_level = 0
20
- self.quantifier = nil
21
- self.options = options
22
- end
23
-
24
- def initialize_clone(orig)
25
- self.text = (orig.text ? orig.text.dup : nil)
26
- self.options = (orig.options ? orig.options.dup : nil)
27
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
28
- super
29
- end
30
-
31
- def to_re(format = :full)
32
- ::Regexp.new(to_s(format))
33
- end
34
-
35
- alias :starts_at :ts
36
-
37
- def base_length
38
- to_s(:base).length
39
- end
40
-
41
- def full_length
42
- to_s.length
43
- end
44
-
45
- def offset
46
- [starts_at, full_length]
47
- end
48
-
49
- def coded_offset
50
- '@%d+%d' % offset
51
- end
52
-
53
- def to_s(format = :full)
54
- "#{text}#{quantifier_affix(format)}"
55
- end
56
-
57
- def quantifier_affix(expression_format)
58
- quantifier.to_s if quantified? && expression_format != :base
59
- end
60
-
61
- def terminal?
62
- !respond_to?(:expressions)
63
- end
64
-
65
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
66
- self.quantifier = Quantifier.new(token, text, min, max, mode)
67
- end
68
-
69
- def unquantified_clone
70
- clone.tap { |exp| exp.quantifier = nil }
71
- end
72
-
73
- def quantified?
74
- !quantifier.nil?
75
- end
76
-
77
- # Deprecated. Prefer `#repetitions` which has a more uniform interface.
78
- def quantity
79
- return [nil,nil] unless quantified?
80
- [quantifier.min, quantifier.max]
81
- end
82
-
83
- def repetitions
84
- return 1..1 unless quantified?
85
- min = quantifier.min
86
- max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
87
- range = min..max
88
- # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
89
- if RUBY_VERSION.to_f < 2.7
90
- range.define_singleton_method(:minmax) { [min, max] }
91
- end
92
- range
93
- end
94
-
95
- def greedy?
96
- quantified? and quantifier.greedy?
97
- end
98
-
99
- def reluctant?
100
- quantified? and quantifier.reluctant?
101
- end
102
- alias :lazy? :reluctant?
103
-
104
- def possessive?
105
- quantified? and quantifier.possessive?
106
- end
107
-
108
- def attributes
109
- {
110
- type: type,
111
- token: token,
112
- text: to_s(:base),
113
- starts_at: ts,
114
- length: full_length,
115
- level: level,
116
- set_level: set_level,
117
- conditional_level: conditional_level,
118
- options: options,
119
- quantifier: quantified? ? quantifier.to_h : nil,
120
- }
121
- end
122
- alias :to_h :attributes
123
- end
124
-
125
- end # module Regexp::Expression
1
+ require 'regexp_parser/error'
126
2
 
3
+ require 'regexp_parser/expression/base'
127
4
  require 'regexp_parser/expression/quantifier'
128
5
  require 'regexp_parser/expression/subexpression'
129
6
  require 'regexp_parser/expression/sequence'
@@ -131,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
131
8
 
132
9
  require 'regexp_parser/expression/classes/alternation'
133
10
  require 'regexp_parser/expression/classes/anchor'
134
- require 'regexp_parser/expression/classes/backref'
11
+ require 'regexp_parser/expression/classes/backreference'
12
+ require 'regexp_parser/expression/classes/character_set'
13
+ require 'regexp_parser/expression/classes/character_set/intersection'
14
+ require 'regexp_parser/expression/classes/character_set/range'
135
15
  require 'regexp_parser/expression/classes/conditional'
136
- require 'regexp_parser/expression/classes/escape'
16
+ require 'regexp_parser/expression/classes/escape_sequence'
137
17
  require 'regexp_parser/expression/classes/free_space'
138
18
  require 'regexp_parser/expression/classes/group'
139
19
  require 'regexp_parser/expression/classes/keep'
@@ -141,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
141
21
  require 'regexp_parser/expression/classes/posix_class'
142
22
  require 'regexp_parser/expression/classes/property'
143
23
  require 'regexp_parser/expression/classes/root'
144
- require 'regexp_parser/expression/classes/set'
145
- require 'regexp_parser/expression/classes/set/intersection'
146
- require 'regexp_parser/expression/classes/set/range'
147
24
  require 'regexp_parser/expression/classes/type'
148
25
 
149
26
  require 'regexp_parser/expression/methods/match'
@@ -4,12 +4,14 @@
4
4
  # given syntax flavor.
5
5
  class Regexp::Lexer
6
6
 
7
- OPENING_TOKENS = [
8
- :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
9
- :atomic, :options, :options_switch, :named, :absence
7
+ OPENING_TOKENS = %i[
8
+ capture passive lookahead nlookahead lookbehind nlookbehind
9
+ atomic options options_switch named absence
10
10
  ].freeze
11
11
 
12
- CLOSING_TOKENS = [:close].freeze
12
+ CLOSING_TOKENS = %i[close].freeze
13
+
14
+ CONDITION_TOKENS = %i[condition condition_close].freeze
13
15
 
14
16
  def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
15
17
  new.lex(input, syntax, options: options, &block)
@@ -40,7 +42,7 @@ class Regexp::Lexer
40
42
  nesting, set_nesting, conditional_nesting)
41
43
 
42
44
  current = merge_condition(current) if type == :conditional and
43
- [:condition, :condition_close].include?(token)
45
+ CONDITION_TOKENS.include?(token)
44
46
 
45
47
  last.next = current if last
46
48
  current.previous = last if last