regexp_parser 2.2.0 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -2
  3. data/LICENSE +1 -1
  4. data/README.md +2 -2
  5. data/Rakefile +5 -8
  6. data/lib/regexp_parser/expression/classes/escape_sequence.rb +12 -7
  7. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  8. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  9. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  10. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  11. data/lib/regexp_parser/scanner.rb +126 -124
  12. data/lib/regexp_parser/syntax/base.rb +3 -5
  13. data/lib/regexp_parser/syntax/token/backreference.rb +7 -2
  14. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  15. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  16. data/lib/regexp_parser/version.rb +1 -1
  17. data/regexp_parser.gemspec +20 -22
  18. metadata +11 -143
  19. data/lib/regexp_parser/scanner/properties/long.yml +0 -607
  20. data/lib/regexp_parser/scanner/properties/short.yml +0 -245
  21. data/spec/expression/base_spec.rb +0 -104
  22. data/spec/expression/clone_spec.rb +0 -152
  23. data/spec/expression/conditional_spec.rb +0 -89
  24. data/spec/expression/free_space_spec.rb +0 -27
  25. data/spec/expression/methods/match_length_spec.rb +0 -161
  26. data/spec/expression/methods/match_spec.rb +0 -25
  27. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  28. data/spec/expression/methods/tests_spec.rb +0 -99
  29. data/spec/expression/methods/traverse_spec.rb +0 -161
  30. data/spec/expression/options_spec.rb +0 -128
  31. data/spec/expression/subexpression_spec.rb +0 -50
  32. data/spec/expression/to_h_spec.rb +0 -26
  33. data/spec/expression/to_s_spec.rb +0 -108
  34. data/spec/lexer/all_spec.rb +0 -22
  35. data/spec/lexer/conditionals_spec.rb +0 -53
  36. data/spec/lexer/delimiters_spec.rb +0 -68
  37. data/spec/lexer/escapes_spec.rb +0 -14
  38. data/spec/lexer/keep_spec.rb +0 -10
  39. data/spec/lexer/literals_spec.rb +0 -64
  40. data/spec/lexer/nesting_spec.rb +0 -99
  41. data/spec/lexer/refcalls_spec.rb +0 -60
  42. data/spec/parser/all_spec.rb +0 -43
  43. data/spec/parser/alternation_spec.rb +0 -88
  44. data/spec/parser/anchors_spec.rb +0 -17
  45. data/spec/parser/conditionals_spec.rb +0 -179
  46. data/spec/parser/errors_spec.rb +0 -30
  47. data/spec/parser/escapes_spec.rb +0 -133
  48. data/spec/parser/free_space_spec.rb +0 -130
  49. data/spec/parser/groups_spec.rb +0 -108
  50. data/spec/parser/keep_spec.rb +0 -6
  51. data/spec/parser/options_spec.rb +0 -28
  52. data/spec/parser/posix_classes_spec.rb +0 -8
  53. data/spec/parser/properties_spec.rb +0 -117
  54. data/spec/parser/quantifiers_spec.rb +0 -68
  55. data/spec/parser/refcalls_spec.rb +0 -117
  56. data/spec/parser/set/intersections_spec.rb +0 -127
  57. data/spec/parser/set/ranges_spec.rb +0 -121
  58. data/spec/parser/sets_spec.rb +0 -178
  59. data/spec/parser/types_spec.rb +0 -18
  60. data/spec/scanner/all_spec.rb +0 -18
  61. data/spec/scanner/anchors_spec.rb +0 -21
  62. data/spec/scanner/conditionals_spec.rb +0 -128
  63. data/spec/scanner/delimiters_spec.rb +0 -52
  64. data/spec/scanner/errors_spec.rb +0 -67
  65. data/spec/scanner/escapes_spec.rb +0 -73
  66. data/spec/scanner/free_space_spec.rb +0 -165
  67. data/spec/scanner/groups_spec.rb +0 -61
  68. data/spec/scanner/keep_spec.rb +0 -10
  69. data/spec/scanner/literals_spec.rb +0 -39
  70. data/spec/scanner/meta_spec.rb +0 -18
  71. data/spec/scanner/options_spec.rb +0 -36
  72. data/spec/scanner/properties_spec.rb +0 -64
  73. data/spec/scanner/quantifiers_spec.rb +0 -25
  74. data/spec/scanner/refcalls_spec.rb +0 -55
  75. data/spec/scanner/sets_spec.rb +0 -151
  76. data/spec/scanner/types_spec.rb +0 -14
  77. data/spec/spec_helper.rb +0 -28
  78. data/spec/support/capturing_stderr.rb +0 -9
  79. data/spec/support/shared_examples.rb +0 -77
  80. data/spec/syntax/syntax_spec.rb +0 -48
  81. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  82. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  83. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  84. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  85. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  86. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  87. data/spec/syntax/versions/aliases_spec.rb +0 -38
  88. data/spec/token/token_spec.rb +0 -85
@@ -1,60 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('RefCall lexing') do
4
- # Traditional numerical group back-reference
5
- include_examples 'lex', '(abc)\1',
6
- 3 => [:backref, :number, '\1', 5, 7, 0, 0, 0]
7
-
8
- # Group back-references, named, numbered, and relative
9
- include_examples 'lex', '(?<X>abc)\k<X>',
10
- 3 => [:backref, :name_ref, '\k<X>', 9, 14, 0, 0, 0]
11
- include_examples 'lex', "(?<X>abc)\\k'X'",
12
- 3 => [:backref, :name_ref, "\\k'X'", 9, 14, 0, 0, 0]
13
-
14
- include_examples 'lex', '(abc)\k<1>',
15
- 3 => [:backref, :number_ref, '\k<1>', 5, 10, 0, 0, 0]
16
- include_examples 'lex', "(abc)\\k'1'",
17
- 3 => [:backref, :number_ref, "\\k'1'", 5, 10, 0, 0, 0]
18
-
19
- include_examples 'lex', '(abc)\k<-1>',
20
- 3 => [:backref, :number_rel_ref, '\k<-1>', 5, 11, 0, 0, 0]
21
- include_examples 'lex', "(abc)\\k'-1'",
22
- 3 => [:backref, :number_rel_ref, "\\k'-1'", 5, 11, 0, 0, 0]
23
-
24
- # Sub-expression invocation, named, numbered, and relative
25
- include_examples 'lex', '(?<X>abc)\g<X>',
26
- 3 => [:backref, :name_call, '\g<X>', 9, 14, 0, 0, 0]
27
- include_examples 'lex', "(?<X>abc)\\g'X'",
28
- 3 => [:backref, :name_call, "\\g'X'", 9, 14, 0, 0, 0]
29
-
30
- include_examples 'lex', '(abc)\g<1>',
31
- 3 => [:backref, :number_call, '\g<1>', 5, 10, 0, 0, 0]
32
- include_examples 'lex', "(abc)\\g'1'",
33
- 3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
34
-
35
- include_examples 'lex', '\g<0>',
36
- 0 => [:backref, :number_call, '\g<0>', 0, 5, 0, 0, 0]
37
- include_examples 'lex', "\\g'0'",
38
- 0 => [:backref, :number_call, "\\g'0'", 0, 5, 0, 0, 0]
39
-
40
- include_examples 'lex', '(abc)\g<-1>',
41
- 3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
42
- include_examples 'lex', "(abc)\\g'-1'",
43
- 3 => [:backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0]
44
-
45
- include_examples 'lex', '(abc)\g<+1>',
46
- 3 => [:backref, :number_rel_call, '\g<+1>', 5, 11, 0, 0, 0]
47
- include_examples 'lex', "(abc)\\g'+1'",
48
- 3 => [:backref, :number_rel_call, "\\g'+1'", 5, 11, 0, 0, 0]
49
-
50
- # Group back-references, with nesting level
51
- include_examples 'lex', '(?<X>abc)\k<X-0>',
52
- 3 => [:backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0]
53
- include_examples 'lex', "(?<X>abc)\\k'X-0'",
54
- 3 => [:backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0]
55
-
56
- include_examples 'lex', '(abc)\k<1-0>',
57
- 3 => [:backref, :number_recursion_ref, '\k<1-0>', 5, 12, 0, 0, 0]
58
- include_examples 'lex', "(abc)\\k'1-0'",
59
- 3 => [:backref, :number_recursion_ref, "\\k'1-0'", 5, 12, 0, 0, 0]
60
- end
@@ -1,43 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe(Regexp::Parser) do
4
- specify('parse returns a root expression') do
5
- expect(RP.parse('abc')).to be_instance_of(Root)
6
- end
7
-
8
- specify('parse can be called with block') do
9
- expect(RP.parse('abc') { |root| root.class }).to eq Root
10
- end
11
-
12
- specify('parse root contains expressions') do
13
- root = RP.parse(/^a.c+[^one]{2,3}\b\d\\\C-C$/)
14
- expect(root.expressions).to all(be_a Regexp::Expression::Base)
15
- end
16
-
17
- specify('parse root options mi') do
18
- root = RP.parse(/[abc]/mi, 'ruby/1.8')
19
-
20
- expect(root.m?).to be true
21
- expect(root.i?).to be true
22
- expect(root.x?).to be false
23
- end
24
-
25
- specify('parse node types') do
26
- root = RP.parse('^(one){2,3}([^d\\]efm-qz\\,\\-]*)(ghi)+$')
27
-
28
- expect(root[1][0]).to be_a(Literal)
29
- expect(root[1]).to be_quantified
30
- expect(root[2][0]).to be_a(CharacterSet)
31
- expect(root[2]).not_to be_quantified
32
- expect(root[3]).to be_a(Group::Capture)
33
- expect(root[3]).to be_quantified
34
- end
35
-
36
- specify('parse no quantifier target raises error') do
37
- expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
38
- end
39
-
40
- specify('parse sequence no quantifier target raises error') do
41
- expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
42
- end
43
- end
@@ -1,88 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Alternation parsing') do
4
- let(:root) { RP.parse('(ab??|cd*|ef+)*|(gh|ij|kl)?') }
5
-
6
- specify('parse alternation root') do
7
- e = root[0]
8
- expect(e).to be_a(Alternation)
9
- end
10
-
11
- specify('parse alternation alts') do
12
- alts = root[0].alternatives
13
-
14
- expect(alts[0]).to be_a(Alternative)
15
- expect(alts[1]).to be_a(Alternative)
16
-
17
- expect(alts[0][0]).to be_a(Group::Capture)
18
- expect(alts[1][0]).to be_a(Group::Capture)
19
-
20
- expect(alts.length).to eq 2
21
- end
22
-
23
- specify('parse alternation nested') do
24
- e = root[0].alternatives[0][0][0]
25
-
26
- expect(e).to be_a(Alternation)
27
- end
28
-
29
- specify('parse alternation nested sequence') do
30
- alts = root[0][0]
31
- nested = alts[0][0][0]
32
-
33
- expect(nested).to be_a(Alternative)
34
-
35
- expect(nested[0]).to be_a(Literal)
36
- expect(nested[1]).to be_a(Literal)
37
- expect(nested.expressions.length).to eq 2
38
- end
39
-
40
- specify('parse alternation nested groups') do
41
- root = RP.parse('(i|ey|([ougfd]+)|(ney))')
42
-
43
- alts = root[0][0].alternatives
44
- expect(alts.length).to eq 4
45
- end
46
-
47
- specify('parse alternation grouped alts') do
48
- root = RP.parse('ca((n)|(t)|(ll)|(b))')
49
-
50
- alts = root[1][0].alternatives
51
-
52
- expect(alts.length).to eq 4
53
-
54
- expect(alts[0]).to be_a(Alternative)
55
- expect(alts[1]).to be_a(Alternative)
56
- expect(alts[2]).to be_a(Alternative)
57
- expect(alts[3]).to be_a(Alternative)
58
- end
59
-
60
- specify('parse alternation nested grouped alts') do
61
- root = RP.parse('ca((n|t)|(ll|b))')
62
-
63
- alts = root[1][0].alternatives
64
-
65
- expect(alts.length).to eq 2
66
-
67
- expect(alts[0]).to be_a(Alternative)
68
- expect(alts[1]).to be_a(Alternative)
69
-
70
- subalts = root[1][0][0][0][0].alternatives
71
-
72
- expect(alts.length).to eq 2
73
-
74
- expect(subalts[0]).to be_a(Alternative)
75
- expect(subalts[1]).to be_a(Alternative)
76
- end
77
-
78
- specify('parse alternation continues after nesting') do
79
- root = RP.parse(/a|(b)c/)
80
-
81
- seq = root[0][1].expressions
82
-
83
- expect(seq.length).to eq 2
84
-
85
- expect(seq[0]).to be_a(Group::Capture)
86
- expect(seq[1]).to be_a(Literal)
87
- end
88
- end
@@ -1,17 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Anchor parsing') do
4
- include_examples 'parse', /^a/, 0 => [:anchor, :bol, Anchor::BOL]
5
- include_examples 'parse', /a$/, 1 => [:anchor, :eol, Anchor::EOL]
6
-
7
- include_examples 'parse', /\Aa/, 0 => [:anchor, :bos, Anchor::BOS]
8
- include_examples 'parse', /a\z/, 1 => [:anchor, :eos, Anchor::EOS]
9
- include_examples 'parse', /a\Z/, 1 => [:anchor, :eos_ob_eol, Anchor::EOSobEOL]
10
-
11
- include_examples 'parse', /a\b/, 1 => [:anchor, :word_boundary, Anchor::WordBoundary]
12
- include_examples 'parse', /a\B/, 1 => [:anchor, :nonword_boundary, Anchor::NonWordBoundary]
13
-
14
- include_examples 'parse', /a\G/, 1 => [:anchor, :match_start, Anchor::MatchStart]
15
-
16
- include_examples 'parse', /\\A/, 0 => [:escape, :backslash, EscapeSequence::Literal]
17
- end
@@ -1,179 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Conditional parsing') do
4
- specify('parse conditional') do
5
- regexp = /(?<A>a)(?(<A>)T|F)/
6
-
7
- root = RP.parse(regexp, 'ruby/2.0')
8
- exp = root[1]
9
-
10
- expect(exp).to be_a(Conditional::Expression)
11
-
12
- expect(exp.type).to eq :conditional
13
- expect(exp.token).to eq :open
14
- expect(exp.to_s).to eq '(?(<A>)T|F)'
15
- expect(exp.reference).to eq 'A'
16
- end
17
-
18
- specify('parse conditional condition') do
19
- regexp = /(?<A>a)(?(<A>)T|F)/
20
-
21
- root = RP.parse(regexp, 'ruby/2.0')
22
- exp = root[1].condition
23
-
24
- expect(exp).to be_a(Conditional::Condition)
25
-
26
- expect(exp.type).to eq :conditional
27
- expect(exp.token).to eq :condition
28
- expect(exp.to_s).to eq '(<A>)'
29
- expect(exp.reference).to eq 'A'
30
- expect(exp.referenced_expression.to_s).to eq '(?<A>a)'
31
- end
32
-
33
- specify('parse conditional condition with number ref') do
34
- regexp = /(a)(?(1)T|F)/
35
-
36
- root = RP.parse(regexp, 'ruby/2.0')
37
- exp = root[1].condition
38
-
39
- expect(exp).to be_a(Conditional::Condition)
40
-
41
- expect(exp.type).to eq :conditional
42
- expect(exp.token).to eq :condition
43
- expect(exp.to_s).to eq '(1)'
44
- expect(exp.reference).to eq 1
45
- expect(exp.referenced_expression.to_s).to eq '(a)'
46
- end
47
-
48
- specify('parse conditional nested groups') do
49
- regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
50
-
51
- root = RP.parse(regexp, 'ruby/2.0')
52
-
53
- expect(root.to_s).to eq regexp.source
54
-
55
- group = root.first
56
- expect(group).to be_instance_of(Group::Capture)
57
-
58
- alt = group.first
59
- expect(alt).to be_instance_of(Alternation)
60
- expect(alt.length).to eq 3
61
-
62
- expect(alt.map(&:first)).to all(be_a Group::Capture)
63
-
64
- subgroup = alt[2].first
65
- conditional = subgroup.first
66
-
67
- expect(conditional).to be_instance_of(Conditional::Expression)
68
- expect(conditional.length).to eq 3
69
-
70
- expect(conditional[0]).to be_instance_of(Conditional::Condition)
71
- expect(conditional[0].to_s).to eq '(2)'
72
-
73
- condition = conditional.condition
74
- expect(condition).to be_instance_of(Conditional::Condition)
75
- expect(condition.to_s).to eq '(2)'
76
-
77
- branches = conditional.branches
78
- expect(branches.length).to eq 2
79
- expect(branches).to be_instance_of(Array)
80
- end
81
-
82
- specify('parse conditional nested') do
83
- regexp = /(a(b(c(d)(e))))(?(1)(?(2)d|(?(3)e|f))|(?(4)(?(5)g|h)))/
84
-
85
- root = RP.parse(regexp, 'ruby/2.0')
86
-
87
- expect(root.to_s).to eq regexp.source
88
-
89
- {
90
- 1 => [2, root[1]],
91
- 2 => [2, root[1][1][0]],
92
- 3 => [2, root[1][1][0][2][0]],
93
- 4 => [1, root[1][2][0]],
94
- 5 => [2, root[1][2][0][1][0]]
95
- }.each do |index, example|
96
- branch_count, exp = example
97
-
98
- expect(exp).to be_instance_of(Conditional::Expression)
99
- expect(exp.condition.to_s).to eq "(#{index})"
100
- expect(exp.branches.length).to eq branch_count
101
- end
102
- end
103
-
104
- specify('parse conditional nested alternation') do
105
- regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
106
-
107
- root = RP.parse(regexp, 'ruby/2.0')
108
-
109
- expect(root.to_s).to eq regexp.source
110
-
111
- expect(root.first).to be_instance_of(Alternation)
112
-
113
- [
114
- [3, 'b|c|d', root[0][0][1][1][0][0]],
115
- [3, 'e|f|g', root[0][0][1][2][0][0]],
116
- [3, 'i|j|k', root[0][0][3][1][0][0]],
117
- [3, 'l|m|n', root[0][0][3][2][0][0]]
118
- ].each do |example|
119
- alt_count, alt_text, exp = example
120
-
121
- expect(exp).to be_instance_of(Alternation)
122
- expect(exp.to_s).to eq alt_text
123
- expect(exp.alternatives.length).to eq alt_count
124
- end
125
- end
126
-
127
- specify('parse conditional extra separator') do
128
- regexp = /(?<A>a)(?(<A>)T|)/
129
-
130
- root = RP.parse(regexp, 'ruby/2.0')
131
- branches = root[1].branches
132
-
133
- expect(branches.length).to eq 2
134
-
135
- seq_1, seq_2 = branches
136
-
137
- [seq_1, seq_2].each do |seq|
138
- expect(seq).to be_a(Sequence)
139
-
140
- expect(seq.type).to eq :expression
141
- expect(seq.token).to eq :sequence
142
- end
143
-
144
- expect(seq_1.to_s).to eq 'T'
145
- expect(seq_2.to_s).to eq ''
146
- end
147
-
148
- specify('parse conditional quantified') do
149
- regexp = /(foo)(?(1)\d|(\w)){42}/
150
-
151
- root = RP.parse(regexp, 'ruby/2.0')
152
- conditional = root[1]
153
-
154
- expect(conditional).to be_quantified
155
- expect(conditional.quantifier.to_s).to eq '{42}'
156
- expect(conditional.to_s).to eq '(?(1)\\d|(\\w)){42}'
157
- expect(conditional.branches.any?(&:quantified?)).to be false
158
- end
159
-
160
- specify('parse conditional branch content quantified') do
161
- regexp = /(foo)(?(1)\d{23}|(\w){42})/
162
-
163
- root = RP.parse(regexp, 'ruby/2.0')
164
- conditional = root[1]
165
-
166
- expect(conditional).not_to be_quantified
167
- expect(conditional.branches.any?(&:quantified?)).to be false
168
- expect(conditional.branches[0][0]).to be_quantified
169
- expect(conditional.branches[0][0].quantifier.to_s).to eq '{23}'
170
- expect(conditional.branches[1][0]).to be_quantified
171
- expect(conditional.branches[1][0].quantifier.to_s).to eq '{42}'
172
- end
173
-
174
- specify('parse conditional excessive branches') do
175
- regexp = '(?<A>a)(?(<A>)T|F|X)'
176
-
177
- expect { RP.parse(regexp, 'ruby/2.0') }.to raise_error(Conditional::TooManyBranches)
178
- end
179
- end
@@ -1,30 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('Parsing errors') do
4
- let(:parser) { Regexp::Parser.new }
5
- before { parser.parse(/foo/) } # initializes ivars
6
-
7
- it('raises UnknownTokenTypeError for unknown token types') do
8
- expect { parser.send(:parse_token, Regexp::Token.new(:foo, :bar)) }
9
- .to raise_error(Regexp::Parser::UnknownTokenTypeError)
10
- end
11
-
12
- RSpec.shared_examples 'UnknownTokenError' do |type|
13
- it "raises for unkown tokens of type #{type}" do
14
- expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
15
- .to raise_error(Regexp::Parser::UnknownTokenError)
16
- end
17
- end
18
-
19
- include_examples 'UnknownTokenError', :anchor
20
- include_examples 'UnknownTokenError', :backref
21
- include_examples 'UnknownTokenError', :conditional
22
- include_examples 'UnknownTokenError', :free_space
23
- include_examples 'UnknownTokenError', :group
24
- include_examples 'UnknownTokenError', :meta
25
- include_examples 'UnknownTokenError', :nonproperty
26
- include_examples 'UnknownTokenError', :property
27
- include_examples 'UnknownTokenError', :quantifier
28
- include_examples 'UnknownTokenError', :set
29
- include_examples 'UnknownTokenError', :type
30
- end
@@ -1,133 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('EscapeSequence parsing') do
4
- include_examples 'parse', /a\ac/, 1 => [:escape, :bell, EscapeSequence::Bell]
5
- include_examples 'parse', /a\ec/, 1 => [:escape, :escape, EscapeSequence::AsciiEscape]
6
- include_examples 'parse', /a\fc/, 1 => [:escape, :form_feed, EscapeSequence::FormFeed]
7
- include_examples 'parse', /a\nc/, 1 => [:escape, :newline, EscapeSequence::Newline]
8
- include_examples 'parse', /a\rc/, 1 => [:escape, :carriage, EscapeSequence::Return]
9
- include_examples 'parse', /a\tc/, 1 => [:escape, :tab, EscapeSequence::Tab]
10
- include_examples 'parse', /a\vc/, 1 => [:escape, :vertical_tab, EscapeSequence::VerticalTab]
11
-
12
- # meta character escapes
13
- include_examples 'parse', /a\.c/, 1 => [:escape, :dot, EscapeSequence::Literal]
14
- include_examples 'parse', /a\?c/, 1 => [:escape, :zero_or_one, EscapeSequence::Literal]
15
- include_examples 'parse', /a\*c/, 1 => [:escape, :zero_or_more, EscapeSequence::Literal]
16
- include_examples 'parse', /a\+c/, 1 => [:escape, :one_or_more, EscapeSequence::Literal]
17
- include_examples 'parse', /a\|c/, 1 => [:escape, :alternation, EscapeSequence::Literal]
18
- include_examples 'parse', /a\(c/, 1 => [:escape, :group_open, EscapeSequence::Literal]
19
- include_examples 'parse', /a\)c/, 1 => [:escape, :group_close, EscapeSequence::Literal]
20
- include_examples 'parse', /a\{c/, 1 => [:escape, :interval_open, EscapeSequence::Literal]
21
- include_examples 'parse', /a\}c/, 1 => [:escape, :interval_close, EscapeSequence::Literal]
22
-
23
- # unicode escapes
24
- include_examples 'parse', /a\u0640/, 1 => [:escape, :codepoint, EscapeSequence::Codepoint]
25
- include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
26
- include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
27
-
28
- # hex escapes
29
- include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
30
-
31
- # octal escapes
32
- include_examples 'parse', /a\177/n, 1 => [:escape, :octal, EscapeSequence::Octal]
33
-
34
- specify('parse chars and codepoints') do
35
- root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
36
-
37
- expect(root[0].char).to eq "\n"
38
- expect(root[0].codepoint).to eq 10
39
-
40
- expect(root[1].char).to eq '?'
41
- expect(root[1].codepoint).to eq 63
42
-
43
- expect(root[2].char).to eq 'A'
44
- expect(root[2].codepoint).to eq 65
45
-
46
- expect(root[3].char).to eq 'B'
47
- expect(root[3].codepoint).to eq 66
48
-
49
- expect(root[4].char).to eq 'C'
50
- expect(root[4].codepoint).to eq 67
51
-
52
- expect(root[5].chars).to eq %w[D E]
53
- expect(root[5].codepoints).to eq [68, 69]
54
-
55
- expect { root[5].char }.to raise_error(/#chars/)
56
- expect { root[5].codepoint }.to raise_error(/#codepoints/)
57
- end
58
-
59
- # Meta/control espaces
60
- #
61
- # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
62
- # escapes can only be set with the Regexp::new constructor.
63
- # In Regexp literals, these escapes are now pre-processed to hex escapes.
64
- #
65
- # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
66
- def parse_meta_control(regexp_body)
67
- regexp = Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n')
68
- RP.parse(regexp)
69
- end
70
-
71
- specify('parse escape control sequence lower') do
72
- root = parse_meta_control('a\\\\\c2b')
73
-
74
- expect(root[2]).to be_instance_of(EscapeSequence::Control)
75
- expect(root[2].text).to eq '\\c2'
76
- expect(root[2].char).to eq "\x12"
77
- expect(root[2].codepoint).to eq 18
78
- end
79
-
80
- specify('parse escape control sequence upper') do
81
- root = parse_meta_control('\d\C-C\w')
82
-
83
- expect(root[1]).to be_instance_of(EscapeSequence::Control)
84
- expect(root[1].text).to eq '\\C-C'
85
- expect(root[1].char).to eq "\x03"
86
- expect(root[1].codepoint).to eq 3
87
- end
88
-
89
- specify('parse escape meta sequence') do
90
- root = parse_meta_control('\Z\M-Z')
91
-
92
- expect(root[1]).to be_instance_of(EscapeSequence::Meta)
93
- expect(root[1].text).to eq '\\M-Z'
94
- expect(root[1].char).to eq "\u00DA"
95
- expect(root[1].codepoint).to eq 218
96
- end
97
-
98
- specify('parse escape meta control sequence') do
99
- root = parse_meta_control('\A\M-\C-X')
100
-
101
- expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
102
- expect(root[1].text).to eq '\\M-\\C-X'
103
- expect(root[1].char).to eq "\u0098"
104
- expect(root[1].codepoint).to eq 152
105
- end
106
-
107
- specify('parse lower c meta control sequence') do
108
- root = parse_meta_control('\A\M-\cX')
109
-
110
- expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
111
- expect(root[1].text).to eq '\\M-\\cX'
112
- expect(root[1].char).to eq "\u0098"
113
- expect(root[1].codepoint).to eq 152
114
- end
115
-
116
- specify('parse escape reverse meta control sequence') do
117
- root = parse_meta_control('\A\C-\M-X')
118
-
119
- expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
120
- expect(root[1].text).to eq '\\C-\\M-X'
121
- expect(root[1].char).to eq "\u0098"
122
- expect(root[1].codepoint).to eq 152
123
- end
124
-
125
- specify('parse escape reverse lower c meta control sequence') do
126
- root = parse_meta_control('\A\c\M-X')
127
-
128
- expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
129
- expect(root[1].text).to eq '\\c\\M-X'
130
- expect(root[1].char).to eq "\u0098"
131
- expect(root[1].codepoint).to eq 152
132
- end
133
- end
@@ -1,130 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe('FreeSpace parsing') do
4
- specify('parse free space spaces') do
5
- regexp = /a ? b * c + d{2,4}/x
6
- root = RP.parse(regexp)
7
-
8
- 0.upto(6) do |i|
9
- if i.odd?
10
- expect(root[i]).to be_instance_of(WhiteSpace)
11
- expect(root[i].text).to eq ' '
12
- else
13
- expect(root[i]).to be_instance_of(Literal)
14
- expect(root[i]).to be_quantified
15
- end
16
- end
17
- end
18
-
19
- specify('parse non free space literals') do
20
- regexp = /a b c d/
21
- root = RP.parse(regexp)
22
-
23
- expect(root.first).to be_instance_of(Literal)
24
- expect(root.first.text).to eq 'a b c d'
25
- end
26
-
27
- specify('parse free space comments') do
28
- regexp = /
29
- a ? # One letter
30
- b {2,5} # Another one
31
- [c-g] + # A set
32
- (h|i|j) | # A group
33
- klm *
34
- nop +
35
- /x
36
-
37
- root = RP.parse(regexp)
38
-
39
- alt = root.first
40
- expect(alt).to be_instance_of(Alternation)
41
-
42
- alt_1 = alt.alternatives.first
43
- expect(alt_1).to be_instance_of(Alternative)
44
- expect(alt_1.length).to eq 15
45
-
46
- [0, 2, 4, 6, 8, 12, 14].each do |i|
47
- expect(alt_1[i]).to be_instance_of(WhiteSpace)
48
- end
49
-
50
- [3, 7, 11].each { |i| expect(alt_1[i].class).to eq Comment }
51
-
52
- alt_2 = alt.alternatives.last
53
- expect(alt_2).to be_instance_of(Alternative)
54
- expect(alt_2.length).to eq 7
55
-
56
- [0, 2, 4, 6].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
57
-
58
- expect(alt_2[1]).to be_instance_of(Comment)
59
- end
60
-
61
- specify('parse free space nested comments') do
62
- regexp = /
63
- # Group one
64
- (
65
- abc # Comment one
66
- \d? # Optional \d
67
- )+
68
-
69
- # Group two
70
- (
71
- def # Comment two
72
- \s? # Optional \s
73
- )?
74
- /x
75
-
76
- root = RP.parse(regexp)
77
-
78
- top_comment_1 = root[1]
79
- expect(top_comment_1).to be_instance_of(Comment)
80
- expect(top_comment_1.text).to eq "# Group one\n"
81
- expect(top_comment_1.starts_at).to eq 7
82
-
83
- top_comment_2 = root[5]
84
- expect(top_comment_2).to be_instance_of(Comment)
85
- expect(top_comment_2.text).to eq "# Group two\n"
86
- expect(top_comment_2.starts_at).to eq 95
87
-
88
- [3, 7].each do |g,|
89
- group = root[g]
90
-
91
- [3, 7].each do |c|
92
- comment = group[c]
93
- expect(comment).to be_instance_of(Comment)
94
- expect(comment.text.length).to eq 14
95
- end
96
- end
97
- end
98
-
99
- specify('parse free space quantifiers') do
100
- regexp = /
101
- a
102
- # comment 1
103
- ?
104
- (
105
- b # comment 2
106
- # comment 3
107
- +
108
- )
109
- # comment 4
110
- *
111
- /x
112
-
113
- root = RP.parse(regexp)
114
-
115
- literal_1 = root[1]
116
- expect(literal_1).to be_instance_of(Literal)
117
- expect(literal_1).to be_quantified
118
- expect(literal_1.quantifier.token).to eq :zero_or_one
119
-
120
- group = root[5]
121
- expect(group).to be_instance_of(Group::Capture)
122
- expect(group).to be_quantified
123
- expect(group.quantifier.token).to eq :zero_or_more
124
-
125
- literal_2 = group[1]
126
- expect(literal_2).to be_instance_of(Literal)
127
- expect(literal_2).to be_quantified
128
- expect(literal_2.quantifier.token).to eq :one_or_more
129
- end
130
- end