regexp_parser 1.7.1 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +157 -1
  3. data/Gemfile +6 -1
  4. data/LICENSE +1 -1
  5. data/README.md +38 -32
  6. data/Rakefile +18 -27
  7. data/lib/regexp_parser/error.rb +4 -0
  8. data/lib/regexp_parser/expression/base.rb +123 -0
  9. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  10. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  14. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  15. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  16. data/lib/regexp_parser/expression/classes/free_space.rb +2 -4
  17. data/lib/regexp_parser/expression/classes/group.rb +28 -3
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -17
  21. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  22. data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
  23. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  24. data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
  25. data/lib/regexp_parser/expression/quantifier.rb +11 -2
  26. data/lib/regexp_parser/expression/sequence.rb +3 -20
  27. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  28. data/lib/regexp_parser/expression.rb +7 -139
  29. data/lib/regexp_parser/lexer.rb +13 -11
  30. data/lib/regexp_parser/parser.rb +325 -344
  31. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  32. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  33. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  34. data/lib/regexp_parser/scanner/property.rl +2 -2
  35. data/lib/regexp_parser/scanner/scanner.rl +235 -255
  36. data/lib/regexp_parser/scanner.rb +1324 -1387
  37. data/lib/regexp_parser/syntax/any.rb +4 -6
  38. data/lib/regexp_parser/syntax/base.rb +13 -15
  39. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  41. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  42. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  44. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  45. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  46. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  47. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  48. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  49. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  50. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  51. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  52. data/lib/regexp_parser/syntax/token.rb +45 -0
  53. data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
  54. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  55. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  56. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  57. data/lib/regexp_parser/syntax.rb +8 -6
  58. data/lib/regexp_parser/token.rb +9 -20
  59. data/lib/regexp_parser/version.rb +1 -1
  60. data/lib/regexp_parser.rb +0 -2
  61. data/regexp_parser.gemspec +20 -22
  62. metadata +34 -165
  63. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  64. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  65. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  66. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  67. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  68. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  69. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  70. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  71. data/spec/expression/base_spec.rb +0 -94
  72. data/spec/expression/clone_spec.rb +0 -120
  73. data/spec/expression/conditional_spec.rb +0 -89
  74. data/spec/expression/free_space_spec.rb +0 -27
  75. data/spec/expression/methods/match_length_spec.rb +0 -161
  76. data/spec/expression/methods/match_spec.rb +0 -25
  77. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  78. data/spec/expression/methods/tests_spec.rb +0 -99
  79. data/spec/expression/methods/traverse_spec.rb +0 -161
  80. data/spec/expression/options_spec.rb +0 -128
  81. data/spec/expression/root_spec.rb +0 -9
  82. data/spec/expression/sequence_spec.rb +0 -9
  83. data/spec/expression/subexpression_spec.rb +0 -50
  84. data/spec/expression/to_h_spec.rb +0 -26
  85. data/spec/expression/to_s_spec.rb +0 -100
  86. data/spec/lexer/all_spec.rb +0 -22
  87. data/spec/lexer/conditionals_spec.rb +0 -53
  88. data/spec/lexer/delimiters_spec.rb +0 -68
  89. data/spec/lexer/escapes_spec.rb +0 -14
  90. data/spec/lexer/keep_spec.rb +0 -10
  91. data/spec/lexer/literals_spec.rb +0 -89
  92. data/spec/lexer/nesting_spec.rb +0 -99
  93. data/spec/lexer/refcalls_spec.rb +0 -55
  94. data/spec/parser/all_spec.rb +0 -43
  95. data/spec/parser/alternation_spec.rb +0 -88
  96. data/spec/parser/anchors_spec.rb +0 -17
  97. data/spec/parser/conditionals_spec.rb +0 -179
  98. data/spec/parser/errors_spec.rb +0 -30
  99. data/spec/parser/escapes_spec.rb +0 -121
  100. data/spec/parser/free_space_spec.rb +0 -130
  101. data/spec/parser/groups_spec.rb +0 -108
  102. data/spec/parser/keep_spec.rb +0 -6
  103. data/spec/parser/posix_classes_spec.rb +0 -8
  104. data/spec/parser/properties_spec.rb +0 -115
  105. data/spec/parser/quantifiers_spec.rb +0 -52
  106. data/spec/parser/refcalls_spec.rb +0 -112
  107. data/spec/parser/set/intersections_spec.rb +0 -127
  108. data/spec/parser/set/ranges_spec.rb +0 -111
  109. data/spec/parser/sets_spec.rb +0 -178
  110. data/spec/parser/types_spec.rb +0 -18
  111. data/spec/scanner/all_spec.rb +0 -18
  112. data/spec/scanner/anchors_spec.rb +0 -21
  113. data/spec/scanner/conditionals_spec.rb +0 -128
  114. data/spec/scanner/delimiters_spec.rb +0 -52
  115. data/spec/scanner/errors_spec.rb +0 -67
  116. data/spec/scanner/escapes_spec.rb +0 -53
  117. data/spec/scanner/free_space_spec.rb +0 -133
  118. data/spec/scanner/groups_spec.rb +0 -52
  119. data/spec/scanner/keep_spec.rb +0 -10
  120. data/spec/scanner/literals_spec.rb +0 -49
  121. data/spec/scanner/meta_spec.rb +0 -18
  122. data/spec/scanner/properties_spec.rb +0 -64
  123. data/spec/scanner/quantifiers_spec.rb +0 -20
  124. data/spec/scanner/refcalls_spec.rb +0 -36
  125. data/spec/scanner/sets_spec.rb +0 -102
  126. data/spec/scanner/types_spec.rb +0 -14
  127. data/spec/spec_helper.rb +0 -15
  128. data/spec/support/runner.rb +0 -42
  129. data/spec/support/shared_examples.rb +0 -77
  130. data/spec/support/warning_extractor.rb +0 -60
  131. data/spec/syntax/syntax_spec.rb +0 -48
  132. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  133. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  134. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  135. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  136. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  137. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  138. data/spec/syntax/versions/aliases_spec.rb +0 -37
  139. data/spec/token/token_spec.rb +0 -85
@@ -1,88 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Alternation parsing') do
4
- let(:root) { RP.parse('(ab??|cd*|ef+)*|(gh|ij|kl)?') }
5
-
6
- specify('parse alternation root') do
7
- e = root[0]
8
- expect(e).to be_a(Alternation)
9
- end
10
-
11
- specify('parse alternation alts') do
12
- alts = root[0].alternatives
13
-
14
- expect(alts[0]).to be_a(Alternative)
15
- expect(alts[1]).to be_a(Alternative)
16
-
17
- expect(alts[0][0]).to be_a(Group::Capture)
18
- expect(alts[1][0]).to be_a(Group::Capture)
19
-
20
- expect(alts.length).to eq 2
21
- end
22
-
23
- specify('parse alternation nested') do
24
- e = root[0].alternatives[0][0][0]
25
-
26
- expect(e).to be_a(Alternation)
27
- end
28
-
29
- specify('parse alternation nested sequence') do
30
- alts = root[0][0]
31
- nested = alts[0][0][0]
32
-
33
- expect(nested).to be_a(Alternative)
34
-
35
- expect(nested[0]).to be_a(Literal)
36
- expect(nested[1]).to be_a(Literal)
37
- expect(nested.expressions.length).to eq 2
38
- end
39
-
40
- specify('parse alternation nested groups') do
41
- root = RP.parse('(i|ey|([ougfd]+)|(ney))')
42
-
43
- alts = root[0][0].alternatives
44
- expect(alts.length).to eq 4
45
- end
46
-
47
- specify('parse alternation grouped alts') do
48
- root = RP.parse('ca((n)|(t)|(ll)|(b))')
49
-
50
- alts = root[1][0].alternatives
51
-
52
- expect(alts.length).to eq 4
53
-
54
- expect(alts[0]).to be_a(Alternative)
55
- expect(alts[1]).to be_a(Alternative)
56
- expect(alts[2]).to be_a(Alternative)
57
- expect(alts[3]).to be_a(Alternative)
58
- end
59
-
60
- specify('parse alternation nested grouped alts') do
61
- root = RP.parse('ca((n|t)|(ll|b))')
62
-
63
- alts = root[1][0].alternatives
64
-
65
- expect(alts.length).to eq 2
66
-
67
- expect(alts[0]).to be_a(Alternative)
68
- expect(alts[1]).to be_a(Alternative)
69
-
70
- subalts = root[1][0][0][0][0].alternatives
71
-
72
- expect(alts.length).to eq 2
73
-
74
- expect(subalts[0]).to be_a(Alternative)
75
- expect(subalts[1]).to be_a(Alternative)
76
- end
77
-
78
- specify('parse alternation continues after nesting') do
79
- root = RP.parse(/a|(b)c/)
80
-
81
- seq = root[0][1].expressions
82
-
83
- expect(seq.length).to eq 2
84
-
85
- expect(seq[0]).to be_a(Group::Capture)
86
- expect(seq[1]).to be_a(Literal)
87
- end
88
- end
@@ -1,17 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Anchor parsing') do
4
- include_examples 'parse', /^a/, 0 => [:anchor, :bol, Anchor::BOL]
5
- include_examples 'parse', /a$/, 1 => [:anchor, :eol, Anchor::EOL]
6
-
7
- include_examples 'parse', /\Aa/, 0 => [:anchor, :bos, Anchor::BOS]
8
- include_examples 'parse', /a\z/, 1 => [:anchor, :eos, Anchor::EOS]
9
- include_examples 'parse', /a\Z/, 1 => [:anchor, :eos_ob_eol, Anchor::EOSobEOL]
10
-
11
- include_examples 'parse', /a\b/, 1 => [:anchor, :word_boundary, Anchor::WordBoundary]
12
- include_examples 'parse', /a\B/, 1 => [:anchor, :nonword_boundary, Anchor::NonWordBoundary]
13
-
14
- include_examples 'parse', /a\G/, 1 => [:anchor, :match_start, Anchor::MatchStart]
15
-
16
- include_examples 'parse', /\\A/, 0 => [:escape, :backslash, EscapeSequence::Literal]
17
- end
@@ -1,179 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Conditional parsing') do
4
- specify('parse conditional') do
5
- regexp = /(?<A>a)(?(<A>)T|F)/
6
-
7
- root = RP.parse(regexp, 'ruby/2.0')
8
- exp = root[1]
9
-
10
- expect(exp).to be_a(Conditional::Expression)
11
-
12
- expect(exp.type).to eq :conditional
13
- expect(exp.token).to eq :open
14
- expect(exp.to_s).to eq '(?(<A>)T|F)'
15
- expect(exp.reference).to eq 'A'
16
- end
17
-
18
- specify('parse conditional condition') do
19
- regexp = /(?<A>a)(?(<A>)T|F)/
20
-
21
- root = RP.parse(regexp, 'ruby/2.0')
22
- exp = root[1].condition
23
-
24
- expect(exp).to be_a(Conditional::Condition)
25
-
26
- expect(exp.type).to eq :conditional
27
- expect(exp.token).to eq :condition
28
- expect(exp.to_s).to eq '(<A>)'
29
- expect(exp.reference).to eq 'A'
30
- expect(exp.referenced_expression.to_s).to eq '(?<A>a)'
31
- end
32
-
33
- specify('parse conditional condition with number ref') do
34
- regexp = /(a)(?(1)T|F)/
35
-
36
- root = RP.parse(regexp, 'ruby/2.0')
37
- exp = root[1].condition
38
-
39
- expect(exp).to be_a(Conditional::Condition)
40
-
41
- expect(exp.type).to eq :conditional
42
- expect(exp.token).to eq :condition
43
- expect(exp.to_s).to eq '(1)'
44
- expect(exp.reference).to eq 1
45
- expect(exp.referenced_expression.to_s).to eq '(a)'
46
- end
47
-
48
- specify('parse conditional nested groups') do
49
- regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
50
-
51
- root = RP.parse(regexp, 'ruby/2.0')
52
-
53
- expect(root.to_s).to eq regexp.source
54
-
55
- group = root.first
56
- expect(group).to be_instance_of(Group::Capture)
57
-
58
- alt = group.first
59
- expect(alt).to be_instance_of(Alternation)
60
- expect(alt.length).to eq 3
61
-
62
- expect(alt.map(&:first)).to all(be_a Group::Capture)
63
-
64
- subgroup = alt[2].first
65
- conditional = subgroup.first
66
-
67
- expect(conditional).to be_instance_of(Conditional::Expression)
68
- expect(conditional.length).to eq 3
69
-
70
- expect(conditional[0]).to be_instance_of(Conditional::Condition)
71
- expect(conditional[0].to_s).to eq '(2)'
72
-
73
- condition = conditional.condition
74
- expect(condition).to be_instance_of(Conditional::Condition)
75
- expect(condition.to_s).to eq '(2)'
76
-
77
- branches = conditional.branches
78
- expect(branches.length).to eq 2
79
- expect(branches).to be_instance_of(Array)
80
- end
81
-
82
- specify('parse conditional nested') do
83
- regexp = /(a(b(c(d)(e))))(?(1)(?(2)d|(?(3)e|f))|(?(4)(?(5)g|h)))/
84
-
85
- root = RP.parse(regexp, 'ruby/2.0')
86
-
87
- expect(root.to_s).to eq regexp.source
88
-
89
- {
90
- 1 => [2, root[1]],
91
- 2 => [2, root[1][1][0]],
92
- 3 => [2, root[1][1][0][2][0]],
93
- 4 => [1, root[1][2][0]],
94
- 5 => [2, root[1][2][0][1][0]]
95
- }.each do |index, example|
96
- branch_count, exp = example
97
-
98
- expect(exp).to be_instance_of(Conditional::Expression)
99
- expect(exp.condition.to_s).to eq "(#{index})"
100
- expect(exp.branches.length).to eq branch_count
101
- end
102
- end
103
-
104
- specify('parse conditional nested alternation') do
105
- regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
106
-
107
- root = RP.parse(regexp, 'ruby/2.0')
108
-
109
- expect(root.to_s).to eq regexp.source
110
-
111
- expect(root.first).to be_instance_of(Alternation)
112
-
113
- [
114
- [3, 'b|c|d', root[0][0][1][1][0][0]],
115
- [3, 'e|f|g', root[0][0][1][2][0][0]],
116
- [3, 'i|j|k', root[0][0][3][1][0][0]],
117
- [3, 'l|m|n', root[0][0][3][2][0][0]]
118
- ].each do |example|
119
- alt_count, alt_text, exp = example
120
-
121
- expect(exp).to be_instance_of(Alternation)
122
- expect(exp.to_s).to eq alt_text
123
- expect(exp.alternatives.length).to eq alt_count
124
- end
125
- end
126
-
127
- specify('parse conditional extra separator') do
128
- regexp = /(?<A>a)(?(<A>)T|)/
129
-
130
- root = RP.parse(regexp, 'ruby/2.0')
131
- branches = root[1].branches
132
-
133
- expect(branches.length).to eq 2
134
-
135
- seq_1, seq_2 = branches
136
-
137
- [seq_1, seq_2].each do |seq|
138
- expect(seq).to be_a(Sequence)
139
-
140
- expect(seq.type).to eq :expression
141
- expect(seq.token).to eq :sequence
142
- end
143
-
144
- expect(seq_1.to_s).to eq 'T'
145
- expect(seq_2.to_s).to eq ''
146
- end
147
-
148
- specify('parse conditional quantified') do
149
- regexp = /(foo)(?(1)\d|(\w)){42}/
150
-
151
- root = RP.parse(regexp, 'ruby/2.0')
152
- conditional = root[1]
153
-
154
- expect(conditional).to be_quantified
155
- expect(conditional.quantifier.to_s).to eq '{42}'
156
- expect(conditional.to_s).to eq '(?(1)\\d|(\\w)){42}'
157
- expect(conditional.branches.any?(&:quantified?)).to be false
158
- end
159
-
160
- specify('parse conditional branch content quantified') do
161
- regexp = /(foo)(?(1)\d{23}|(\w){42})/
162
-
163
- root = RP.parse(regexp, 'ruby/2.0')
164
- conditional = root[1]
165
-
166
- expect(conditional).not_to be_quantified
167
- expect(conditional.branches.any?(&:quantified?)).to be false
168
- expect(conditional.branches[0][0]).to be_quantified
169
- expect(conditional.branches[0][0].quantifier.to_s).to eq '{23}'
170
- expect(conditional.branches[1][0]).to be_quantified
171
- expect(conditional.branches[1][0].quantifier.to_s).to eq '{42}'
172
- end
173
-
174
- specify('parse conditional excessive branches') do
175
- regexp = '(?<A>a)(?(<A>)T|F|X)'
176
-
177
- expect { RP.parse(regexp, 'ruby/2.0') }.to raise_error(Conditional::TooManyBranches)
178
- end
179
- end
@@ -1,30 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Parsing errors') do
4
- let(:parser) { Regexp::Parser.new }
5
- before { parser.parse(/foo/) } # initializes ivars
6
-
7
- it('raises UnknownTokenTypeError for unknown token types') do
8
- expect { parser.send(:parse_token, Regexp::Token.new(:foo, :bar)) }
9
- .to raise_error(Regexp::Parser::UnknownTokenTypeError)
10
- end
11
-
12
- RSpec.shared_examples 'UnknownTokenError' do |type, token|
13
- it "raises for unkown tokens of type #{type}" do
14
- expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
15
- .to raise_error(Regexp::Parser::UnknownTokenError)
16
- end
17
- end
18
-
19
- include_examples 'UnknownTokenError', :anchor
20
- include_examples 'UnknownTokenError', :backref
21
- include_examples 'UnknownTokenError', :conditional
22
- include_examples 'UnknownTokenError', :free_space
23
- include_examples 'UnknownTokenError', :group
24
- include_examples 'UnknownTokenError', :meta
25
- include_examples 'UnknownTokenError', :nonproperty
26
- include_examples 'UnknownTokenError', :property
27
- include_examples 'UnknownTokenError', :quantifier
28
- include_examples 'UnknownTokenError', :set
29
- include_examples 'UnknownTokenError', :type
30
- end
@@ -1,121 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('EscapeSequence parsing') do
4
- include_examples 'parse', /a\ac/, 1 => [:escape, :bell, EscapeSequence::Bell]
5
- include_examples 'parse', /a\ec/, 1 => [:escape, :escape, EscapeSequence::AsciiEscape]
6
- include_examples 'parse', /a\fc/, 1 => [:escape, :form_feed, EscapeSequence::FormFeed]
7
- include_examples 'parse', /a\nc/, 1 => [:escape, :newline, EscapeSequence::Newline]
8
- include_examples 'parse', /a\rc/, 1 => [:escape, :carriage, EscapeSequence::Return]
9
- include_examples 'parse', /a\tc/, 1 => [:escape, :tab, EscapeSequence::Tab]
10
- include_examples 'parse', /a\vc/, 1 => [:escape, :vertical_tab, EscapeSequence::VerticalTab]
11
-
12
- # meta character escapes
13
- include_examples 'parse', /a\.c/, 1 => [:escape, :dot, EscapeSequence::Literal]
14
- include_examples 'parse', /a\?c/, 1 => [:escape, :zero_or_one, EscapeSequence::Literal]
15
- include_examples 'parse', /a\*c/, 1 => [:escape, :zero_or_more, EscapeSequence::Literal]
16
- include_examples 'parse', /a\+c/, 1 => [:escape, :one_or_more, EscapeSequence::Literal]
17
- include_examples 'parse', /a\|c/, 1 => [:escape, :alternation, EscapeSequence::Literal]
18
- include_examples 'parse', /a\(c/, 1 => [:escape, :group_open, EscapeSequence::Literal]
19
- include_examples 'parse', /a\)c/, 1 => [:escape, :group_close, EscapeSequence::Literal]
20
- include_examples 'parse', /a\{c/, 1 => [:escape, :interval_open, EscapeSequence::Literal]
21
- include_examples 'parse', /a\}c/, 1 => [:escape, :interval_close, EscapeSequence::Literal]
22
-
23
- # unicode escapes
24
- include_examples 'parse', /a\u0640/, 1 => [:escape, :codepoint, EscapeSequence::Codepoint]
25
- include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
26
- include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
27
-
28
- # hex escapes
29
- include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
30
-
31
- # octal escapes
32
- include_examples 'parse', /a\177/n, 1 => [:escape, :octal, EscapeSequence::Octal]
33
-
34
- specify('parse chars and codepoints') do
35
- root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
36
-
37
- expect(root[0].char).to eq "\n"
38
- expect(root[0].codepoint).to eq 10
39
-
40
- expect(root[1].char).to eq '?'
41
- expect(root[1].codepoint).to eq 63
42
-
43
- expect(root[2].char).to eq 'A'
44
- expect(root[2].codepoint).to eq 65
45
-
46
- expect(root[3].char).to eq 'B'
47
- expect(root[3].codepoint).to eq 66
48
-
49
- expect(root[4].char).to eq 'C'
50
- expect(root[4].codepoint).to eq 67
51
-
52
- expect(root[5].chars).to eq %w[D E]
53
- expect(root[5].codepoints).to eq [68, 69]
54
-
55
- expect { root[5].char }.to raise_error(/#chars/)
56
- expect { root[5].codepoint }.to raise_error(/#codepoints/)
57
- end
58
-
59
- specify('parse escape control sequence lower') do
60
- root = RP.parse(/a\\\c2b/)
61
-
62
- expect(root[2]).to be_instance_of(EscapeSequence::Control)
63
- expect(root[2].text).to eq '\\c2'
64
- expect(root[2].char).to eq "\x12"
65
- expect(root[2].codepoint).to eq 18
66
- end
67
-
68
- specify('parse escape control sequence upper') do
69
- root = RP.parse(/\d\\\C-C\w/)
70
-
71
- expect(root[2]).to be_instance_of(EscapeSequence::Control)
72
- expect(root[2].text).to eq '\\C-C'
73
- expect(root[2].char).to eq "\x03"
74
- expect(root[2].codepoint).to eq 3
75
- end
76
-
77
- specify('parse escape meta sequence') do
78
- root = RP.parse(/\Z\\\M-Z/n)
79
-
80
- expect(root[2]).to be_instance_of(EscapeSequence::Meta)
81
- expect(root[2].text).to eq '\\M-Z'
82
- expect(root[2].char).to eq "\u00DA"
83
- expect(root[2].codepoint).to eq 218
84
- end
85
-
86
- specify('parse escape meta control sequence') do
87
- root = RP.parse(/\A\\\M-\C-X/n)
88
-
89
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
90
- expect(root[2].text).to eq '\\M-\\C-X'
91
- expect(root[2].char).to eq "\u0098"
92
- expect(root[2].codepoint).to eq 152
93
- end
94
-
95
- specify('parse lower c meta control sequence') do
96
- root = RP.parse(/\A\\\M-\cX/n)
97
-
98
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
99
- expect(root[2].text).to eq '\\M-\\cX'
100
- expect(root[2].char).to eq "\u0098"
101
- expect(root[2].codepoint).to eq 152
102
- end
103
-
104
- specify('parse escape reverse meta control sequence') do
105
- root = RP.parse(/\A\\\C-\M-X/n)
106
-
107
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
108
- expect(root[2].text).to eq '\\C-\\M-X'
109
- expect(root[2].char).to eq "\u0098"
110
- expect(root[2].codepoint).to eq 152
111
- end
112
-
113
- specify('parse escape reverse lower c meta control sequence') do
114
- root = RP.parse(/\A\\\c\M-X/n)
115
-
116
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
117
- expect(root[2].text).to eq '\\c\\M-X'
118
- expect(root[2].char).to eq "\u0098"
119
- expect(root[2].codepoint).to eq 152
120
- end
121
- end
@@ -1,130 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('FreeSpace parsing') do
4
- specify('parse free space spaces') do
5
- regexp = /a ? b * c + d{2,4}/x
6
- root = RP.parse(regexp)
7
-
8
- 0.upto(6) do |i|
9
- if i.odd?
10
- expect(root[i]).to be_instance_of(WhiteSpace)
11
- expect(root[i].text).to eq ' '
12
- else
13
- expect(root[i]).to be_instance_of(Literal)
14
- expect(root[i]).to be_quantified
15
- end
16
- end
17
- end
18
-
19
- specify('parse non free space literals') do
20
- regexp = /a b c d/
21
- root = RP.parse(regexp)
22
-
23
- expect(root.first).to be_instance_of(Literal)
24
- expect(root.first.text).to eq 'a b c d'
25
- end
26
-
27
- specify('parse free space comments') do
28
- regexp = /
29
- a ? # One letter
30
- b {2,5} # Another one
31
- [c-g] + # A set
32
- (h|i|j) | # A group
33
- klm *
34
- nop +
35
- /x
36
-
37
- root = RP.parse(regexp)
38
-
39
- alt = root.first
40
- expect(alt).to be_instance_of(Alternation)
41
-
42
- alt_1 = alt.alternatives.first
43
- expect(alt_1).to be_instance_of(Alternative)
44
- expect(alt_1.length).to eq 15
45
-
46
- [0, 2, 4, 6, 8, 12, 14].each do |i|
47
- expect(alt_1[i]).to be_instance_of(WhiteSpace)
48
- end
49
-
50
- [3, 7, 11].each { |i| expect(alt_1[i].class).to eq Comment }
51
-
52
- alt_2 = alt.alternatives.last
53
- expect(alt_2).to be_instance_of(Alternative)
54
- expect(alt_2.length).to eq 7
55
-
56
- [0, 2, 4, 6].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
57
-
58
- expect(alt_2[1]).to be_instance_of(Comment)
59
- end
60
-
61
- specify('parse free space nested comments') do
62
- regexp = /
63
- # Group one
64
- (
65
- abc # Comment one
66
- \d? # Optional \d
67
- )+
68
-
69
- # Group two
70
- (
71
- def # Comment two
72
- \s? # Optional \s
73
- )?
74
- /x
75
-
76
- root = RP.parse(regexp)
77
-
78
- top_comment_1 = root[1]
79
- expect(top_comment_1).to be_instance_of(Comment)
80
- expect(top_comment_1.text).to eq "# Group one\n"
81
- expect(top_comment_1.starts_at).to eq 7
82
-
83
- top_comment_2 = root[5]
84
- expect(top_comment_2).to be_instance_of(Comment)
85
- expect(top_comment_2.text).to eq "# Group two\n"
86
- expect(top_comment_2.starts_at).to eq 95
87
-
88
- [3, 7].each do |g,|
89
- group = root[g]
90
-
91
- [3, 7].each do |c|
92
- comment = group[c]
93
- expect(comment).to be_instance_of(Comment)
94
- expect(comment.text.length).to eq 14
95
- end
96
- end
97
- end
98
-
99
- specify('parse free space quantifiers') do
100
- regexp = /
101
- a
102
- # comment 1
103
- ?
104
- (
105
- b # comment 2
106
- # comment 3
107
- +
108
- )
109
- # comment 4
110
- *
111
- /x
112
-
113
- root = RP.parse(regexp)
114
-
115
- literal_1 = root[1]
116
- expect(literal_1).to be_instance_of(Literal)
117
- expect(literal_1).to be_quantified
118
- expect(literal_1.quantifier.token).to eq :zero_or_one
119
-
120
- group = root[5]
121
- expect(group).to be_instance_of(Group::Capture)
122
- expect(group).to be_quantified
123
- expect(group.quantifier.token).to eq :zero_or_more
124
-
125
- literal_2 = group[1]
126
- expect(literal_2).to be_instance_of(Literal)
127
- expect(literal_2).to be_quantified
128
- expect(literal_2.quantifier.token).to eq :one_or_more
129
- end
130
- end
@@ -1,108 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Group parsing') do
4
- include_examples 'parse', /(?=abc)(?!def)/,
5
- 0 => [:assertion, :lookahead, Assertion::Lookahead],
6
- 1 => [:assertion, :nlookahead, Assertion::NegativeLookahead]
7
-
8
- include_examples 'parse', /(?<=abc)(?<!def)/,
9
- 0 => [:assertion, :lookbehind, Assertion::Lookbehind],
10
- 1 => [:assertion, :nlookbehind, Assertion::NegativeLookbehind]
11
-
12
- include_examples 'parse', /a(?# is for apple)b(?# for boy)c(?# cat)/,
13
- 1 => [:group, :comment, Group::Comment],
14
- 3 => [:group, :comment, Group::Comment],
15
- 5 => [:group, :comment, Group::Comment]
16
-
17
- if ruby_version_at_least('2.4.1')
18
- include_examples 'parse', 'a(?~b)c(?~d)e',
19
- 1 => [:group, :absence, Group::Absence],
20
- 3 => [:group, :absence, Group::Absence]
21
- end
22
-
23
- include_examples 'parse', /(?m:a)/,
24
- 0 => [:group, :options, Group::Options, options: { m: true }, option_changes: { m: true }]
25
-
26
- # self-defeating group option
27
- include_examples 'parse', /(?m-m:a)/,
28
- 0 => [:group, :options, Group::Options, options: {}, option_changes: { m: false }]
29
-
30
- # activate one option in nested group
31
- include_examples 'parse', /(?x-mi:a(?m:b))/,
32
- 0 => [:group, :options, Group::Options, options: { x: true }, option_changes: { i: false, m: false, x: true }],
33
- [0, 1] => [:group, :options, Group::Options, options: { m: true, x: true }, option_changes: { m: true }]
34
-
35
- # deactivate one option in nested group
36
- include_examples 'parse', /(?ix-m:a(?-i:b))/,
37
- 0 => [:group, :options, Group::Options, options: { i: true, x: true }, option_changes: { i: true, m: false, x: true }],
38
- [0, 1] => [:group, :options, Group::Options, options: { x: true }, option_changes: { i: false }]
39
-
40
- # invert all options in nested group
41
- include_examples 'parse', /(?xi-m:a(?m-ix:b))/,
42
- 0 => [:group, :options, Group::Options, options: { i: true, x: true }, option_changes: { i: true, m: false, x: true }],
43
- [0, 1] => [:group, :options, Group::Options, options: { m: true }, option_changes: { i: false, m: true, x: false }]
44
-
45
- # nested options affect literal subexpressions
46
- include_examples 'parse', /(?x-mi:a(?m:b))/,
47
- [0, 0] => [:literal, :literal, Literal, text: 'a', options: { x: true }],
48
- [0, 1, 0] => [:literal, :literal, Literal, text: 'b', options: { m: true, x: true }]
49
-
50
- # option switching group
51
- include_examples 'parse', /a(?i-m)b/m,
52
- 0 => [:literal, :literal, Literal, text: 'a', options: { m: true }],
53
- 1 => [:group, :options_switch, Group::Options, options: { i: true }, option_changes: { i: true, m: false }],
54
- 2 => [:literal, :literal, Literal, text: 'b', options: { i: true }]
55
-
56
- # option switch in group
57
- include_examples 'parse', /(a(?i-m)b)c/m,
58
- 0 => [:group, :capture, Group::Capture, options: { m: true }],
59
- [0, 0] => [:literal, :literal, Literal, text: 'a', options: { m: true }],
60
- [0, 1] => [:group, :options_switch, Group::Options, options: { i: true }, option_changes: { i: true, m: false }],
61
- [0, 2] => [:literal, :literal, Literal, text: 'b', options: { i: true }],
62
- 1 => [:literal, :literal, Literal, text: 'c', options: { m: true }]
63
-
64
- # nested option switch in group
65
- include_examples 'parse', /((?i-m)(a(?-i)b))/m,
66
- [0, 1] => [:group, :capture, Group::Capture, options: { i: true }],
67
- [0, 1, 0] => [:literal, :literal, Literal, text: 'a', options: { i: true }],
68
- [0, 1, 1] => [:group, :options_switch, Group::Options, options: {}, option_changes: { i: false }],
69
- [0, 1, 2] => [:literal, :literal, Literal, text: 'b', options: {}]
70
-
71
- # options dau
72
- include_examples 'parse', /(?dua:abc)/,
73
- 0 => [:group, :options, Group::Options, options: { a: true }, option_changes: { a: true }]
74
-
75
- # nested options dau
76
- include_examples 'parse', /(?u:a(?d:b))/,
77
- 0 => [:group, :options, Group::Options, options: { u: true }, option_changes: { u: true }],
78
- [0, 1] => [:group, :options, Group::Options, options: { d: true }, option_changes: { d: true, u: false }],
79
- [0, 1, 0] => [:literal, :literal, Literal, text: 'b', options: { d: true }]
80
-
81
- # nested options da
82
- include_examples 'parse', /(?di-xm:a(?da-x:b))/,
83
- 0 => [:group, :options, Group::Options, options: { d: true, i:true }],
84
- [0, 1] => [:group, :options, Group::Options, options: { a: true, i: true }, option_changes: { a: true, d: false, x: false}],
85
- [0, 1, 0] => [:literal, :literal, Literal, text: 'b', options: { a: true, i: true }]
86
-
87
- specify('parse group number') do
88
- root = RP.parse(/(a)(?=b)((?:c)(d|(e)))/)
89
-
90
- expect(root[0].number).to eq 1
91
- expect(root[1]).not_to respond_to(:number)
92
- expect(root[2].number).to eq 2
93
- expect(root[2][0]).not_to respond_to(:number)
94
- expect(root[2][1].number).to eq 3
95
- expect(root[2][1][0][1][0].number).to eq 4
96
- end
97
-
98
- specify('parse group number at level') do
99
- root = RP.parse(/(a)(?=b)((?:c)(d|(e)))/)
100
-
101
- expect(root[0].number_at_level).to eq 1
102
- expect(root[1]).not_to respond_to(:number_at_level)
103
- expect(root[2].number_at_level).to eq 2
104
- expect(root[2][0]).not_to respond_to(:number_at_level)
105
- expect(root[2][1].number_at_level).to eq 1
106
- expect(root[2][1][0][1][0].number_at_level).to eq 1
107
- end
108
- end
@@ -1,6 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Keep parsing') do
4
- include_examples 'parse', /ab\Kcd/, 1 => [:keep, :mark, Keep::Mark, text: '\K']
5
- include_examples 'parse', /(a\K)/, [0, 1] => [:keep, :mark, Keep::Mark, text: '\K']
6
- end
@@ -1,8 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('PosixClass parsing') do
4
- include_examples 'parse', /[[:word:]]/, [0, 0] => [:posixclass, :word, PosixClass,
5
- name: 'word', text: '[:word:]', negative?: false]
6
- include_examples 'parse', /[[:^word:]]/, [0, 0] => [:nonposixclass, :word, PosixClass,
7
- name: 'word', text: '[:^word:]', negative?: true]
8
- end