hoozuki 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.md +18 -5
- data/Rakefile +12 -2
- data/lib/hoozuki/automaton/dfa/builder.rb +79 -0
- data/lib/hoozuki/automaton/dfa.rb +4 -41
- data/lib/hoozuki/automaton/nfa.rb +29 -108
- data/lib/hoozuki/automaton/state_id.rb +2 -1
- data/lib/hoozuki/instruction/char.rb +13 -0
- data/lib/hoozuki/instruction/jmp.rb +13 -0
- data/lib/hoozuki/instruction/match.rb +8 -0
- data/lib/hoozuki/instruction/split.rb +14 -0
- data/lib/hoozuki/instruction.rb +6 -0
- data/lib/hoozuki/node/choice.rb +13 -1
- data/lib/hoozuki/node/concatenation.rb +16 -1
- data/lib/hoozuki/node/epsilon.rb +8 -1
- data/lib/hoozuki/node/literal.rb +9 -1
- data/lib/hoozuki/node/repetition.rb +55 -1
- data/lib/hoozuki/parser.rb +888 -76
- data/lib/hoozuki/parser.y +128 -0
- data/lib/hoozuki/version.rb +2 -2
- data/lib/hoozuki/vm/compiler.rb +129 -0
- data/lib/hoozuki/vm/evaluator.rb +39 -0
- data/lib/hoozuki/vm.rb +4 -0
- data/lib/hoozuki.rb +22 -15
- data/spec/hoozuki/automaton/dfa/builder_spec.rb +79 -0
- data/spec/hoozuki/automaton/dfa_spec.rb +149 -0
- data/spec/hoozuki/automaton/nfa_spec.rb +168 -0
- data/spec/hoozuki/instruction_spec.rb +88 -0
- data/spec/hoozuki/node_spec.rb +110 -0
- data/spec/hoozuki/parser_spec.rb +168 -0
- data/spec/hoozuki/vm/compiler_spec.rb +219 -0
- data/spec/hoozuki/vm/evaluator_spec.rb +260 -0
- data/spec/hoozuki_spec.rb +186 -2
- metadata +20 -2
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Hoozuki::Automaton::NFA do
|
|
4
|
+
describe '.from_node' do
|
|
5
|
+
let(:state) { Hoozuki::Automaton::StateID.new(0) }
|
|
6
|
+
|
|
7
|
+
context 'with nil node' do
|
|
8
|
+
it 'raises ArgumentError' do
|
|
9
|
+
expect { described_class.from_node(nil, state) }.to raise_error(ArgumentError, 'Node cannot be nil')
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
context 'with Literal node' do
|
|
14
|
+
it 'creates NFA from literal node' do
|
|
15
|
+
node = Hoozuki::Node::Literal.new('a')
|
|
16
|
+
nfa = described_class.from_node(node, state)
|
|
17
|
+
|
|
18
|
+
expect(nfa).to be_a(described_class)
|
|
19
|
+
expect(nfa.start).to be_a(Hoozuki::Automaton::StateID)
|
|
20
|
+
expect(nfa.accept).to be_an(Array)
|
|
21
|
+
expect(nfa.accept.length).to eq(1)
|
|
22
|
+
expect(nfa.transitions.size).to eq(1)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
context 'with Epsilon node' do
|
|
27
|
+
it 'creates NFA from epsilon node' do
|
|
28
|
+
node = Hoozuki::Node::Epsilon.new
|
|
29
|
+
nfa = described_class.from_node(node, state)
|
|
30
|
+
|
|
31
|
+
expect(nfa).to be_a(described_class)
|
|
32
|
+
expect(nfa.accept.length).to eq(1)
|
|
33
|
+
expect(nfa.transitions.size).to eq(1)
|
|
34
|
+
expect(nfa.transitions.first[1]).to be_nil
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
context 'with Concatenation node' do
|
|
39
|
+
it 'creates NFA from concatenation node' do
|
|
40
|
+
node = Hoozuki::Node::Concatenation.new([
|
|
41
|
+
Hoozuki::Node::Literal.new('a'),
|
|
42
|
+
Hoozuki::Node::Literal.new('b')
|
|
43
|
+
])
|
|
44
|
+
nfa = described_class.from_node(node, state)
|
|
45
|
+
|
|
46
|
+
expect(nfa).to be_a(described_class)
|
|
47
|
+
expect(nfa.accept.length).to eq(1)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
context 'with Choice node' do
|
|
52
|
+
it 'creates NFA from choice node' do
|
|
53
|
+
node = Hoozuki::Node::Choice.new([
|
|
54
|
+
Hoozuki::Node::Literal.new('a'),
|
|
55
|
+
Hoozuki::Node::Literal.new('b')
|
|
56
|
+
])
|
|
57
|
+
nfa = described_class.from_node(node, state)
|
|
58
|
+
|
|
59
|
+
expect(nfa).to be_a(described_class)
|
|
60
|
+
expect(nfa.accept.length).to eq(2)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
context 'with Repetition node' do
|
|
65
|
+
it 'creates NFA from zero-or-more repetition' do
|
|
66
|
+
node = Hoozuki::Node::Repetition.new(
|
|
67
|
+
Hoozuki::Node::Literal.new('a'),
|
|
68
|
+
:zero_or_more
|
|
69
|
+
)
|
|
70
|
+
nfa = described_class.from_node(node, state)
|
|
71
|
+
|
|
72
|
+
expect(nfa).to be_a(described_class)
|
|
73
|
+
expect(nfa.accept.length).to eq(2)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
it 'creates NFA from one-or-more repetition' do
|
|
77
|
+
node = Hoozuki::Node::Repetition.new(
|
|
78
|
+
Hoozuki::Node::Literal.new('a'),
|
|
79
|
+
:one_or_more
|
|
80
|
+
)
|
|
81
|
+
nfa = described_class.from_node(node, state)
|
|
82
|
+
|
|
83
|
+
expect(nfa).to be_a(described_class)
|
|
84
|
+
expect(nfa.accept.length).to eq(1)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it 'creates NFA from optional repetition' do
|
|
88
|
+
node = Hoozuki::Node::Repetition.new(
|
|
89
|
+
Hoozuki::Node::Literal.new('a'),
|
|
90
|
+
:optional
|
|
91
|
+
)
|
|
92
|
+
nfa = described_class.from_node(node, state)
|
|
93
|
+
|
|
94
|
+
expect(nfa).to be_a(described_class)
|
|
95
|
+
expect(nfa.accept.length).to eq(2)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
context 'with complex node' do
|
|
100
|
+
it 'delegates to node.to_nfa' do
|
|
101
|
+
node = Hoozuki::Node::Literal.new('x')
|
|
102
|
+
expect(node).to receive(:to_nfa).with(state).and_call_original
|
|
103
|
+
|
|
104
|
+
described_class.from_node(node, state)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
describe '#epsilon_closure' do
|
|
110
|
+
it 'returns set containing start state with no epsilon transitions' do
|
|
111
|
+
nfa = described_class.new(0, [1])
|
|
112
|
+
nfa.add_transition(0, 'a', 1)
|
|
113
|
+
|
|
114
|
+
closure = nfa.epsilon_closure(Set.new([0]))
|
|
115
|
+
expect(closure).to include(0)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
it 'follows epsilon transitions' do
|
|
119
|
+
nfa = described_class.new(0, [2])
|
|
120
|
+
nfa.add_epsilon_transition(0, 1)
|
|
121
|
+
nfa.add_epsilon_transition(1, 2)
|
|
122
|
+
|
|
123
|
+
closure = nfa.epsilon_closure(Set.new([0]))
|
|
124
|
+
expect(closure).to include(0, 1, 2)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
it 'handles multiple starting states' do
|
|
128
|
+
nfa = described_class.new(0, [2])
|
|
129
|
+
nfa.add_epsilon_transition(0, 1)
|
|
130
|
+
nfa.add_epsilon_transition(2, 3)
|
|
131
|
+
|
|
132
|
+
closure = nfa.epsilon_closure(Set.new([0, 2]))
|
|
133
|
+
expect(closure).to include(0, 1, 2, 3)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
describe '#add_transition' do
|
|
138
|
+
it 'adds a labeled transition' do
|
|
139
|
+
nfa = described_class.new(0, [1])
|
|
140
|
+
nfa.add_transition(0, 'a', 1)
|
|
141
|
+
|
|
142
|
+
expect(nfa.transitions).to include([0, 'a', 1])
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
describe '#add_epsilon_transition' do
|
|
147
|
+
it 'adds an epsilon transition' do
|
|
148
|
+
nfa = described_class.new(0, [1])
|
|
149
|
+
nfa.add_epsilon_transition(0, 1)
|
|
150
|
+
|
|
151
|
+
expect(nfa.transitions).to include([0, nil, 1])
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
describe '#merge_transitions' do
|
|
156
|
+
it 'merges transitions from another NFA' do
|
|
157
|
+
nfa1 = described_class.new(0, [1])
|
|
158
|
+
nfa1.add_transition(0, 'a', 1)
|
|
159
|
+
|
|
160
|
+
nfa2 = described_class.new(2, [3])
|
|
161
|
+
nfa2.add_transition(2, 'b', 3)
|
|
162
|
+
|
|
163
|
+
nfa1.merge_transitions(nfa2)
|
|
164
|
+
|
|
165
|
+
expect(nfa1.transitions).to include([0, 'a', 1], [2, 'b', 3])
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Hoozuki::Instruction do
|
|
4
|
+
describe Hoozuki::Instruction::Char do
|
|
5
|
+
describe '#initialize' do
|
|
6
|
+
it 'sets the character' do
|
|
7
|
+
instruction = described_class.new('a')
|
|
8
|
+
expect(instruction.char).to eq('a')
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it 'handles multibyte characters' do
|
|
12
|
+
instruction = described_class.new('あ')
|
|
13
|
+
expect(instruction.char).to eq('あ')
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
describe '#char=' do
|
|
18
|
+
it 'allows updating the character' do
|
|
19
|
+
instruction = described_class.new('a')
|
|
20
|
+
instruction.char = 'b'
|
|
21
|
+
expect(instruction.char).to eq('b')
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
describe Hoozuki::Instruction::Jmp do
|
|
27
|
+
describe '#initialize' do
|
|
28
|
+
it 'sets the target' do
|
|
29
|
+
instruction = described_class.new(5)
|
|
30
|
+
expect(instruction.target).to eq(5)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it 'handles zero target' do
|
|
34
|
+
instruction = described_class.new(0)
|
|
35
|
+
expect(instruction.target).to eq(0)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
describe '#target=' do
|
|
40
|
+
it 'allows updating the target' do
|
|
41
|
+
instruction = described_class.new(5)
|
|
42
|
+
instruction.target = 10
|
|
43
|
+
expect(instruction.target).to eq(10)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
describe Hoozuki::Instruction::Match do
|
|
49
|
+
describe '#initialize' do
|
|
50
|
+
it 'creates a Match instruction' do
|
|
51
|
+
instruction = described_class.new
|
|
52
|
+
expect(instruction).to be_a(described_class)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
describe Hoozuki::Instruction::Split do
|
|
58
|
+
describe '#initialize' do
|
|
59
|
+
it 'sets left and right targets' do
|
|
60
|
+
instruction = described_class.new(3, 7)
|
|
61
|
+
expect(instruction.left).to eq(3)
|
|
62
|
+
expect(instruction.right).to eq(7)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
it 'handles zero targets' do
|
|
66
|
+
instruction = described_class.new(0, 0)
|
|
67
|
+
expect(instruction.left).to eq(0)
|
|
68
|
+
expect(instruction.right).to eq(0)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
describe '#left=' do
|
|
73
|
+
it 'allows updating the left target' do
|
|
74
|
+
instruction = described_class.new(3, 7)
|
|
75
|
+
instruction.left = 5
|
|
76
|
+
expect(instruction.left).to eq(5)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
describe '#right=' do
|
|
81
|
+
it 'allows updating the right target' do
|
|
82
|
+
instruction = described_class.new(3, 7)
|
|
83
|
+
instruction.right = 10
|
|
84
|
+
expect(instruction.right).to eq(10)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe 'Hoozuki::Node' do
|
|
4
|
+
let(:state) { Hoozuki::Automaton::StateID.new(0) }
|
|
5
|
+
|
|
6
|
+
describe Hoozuki::Node::Literal do
|
|
7
|
+
describe '#to_nfa' do
|
|
8
|
+
it 'creates an NFA with a single transition' do
|
|
9
|
+
node = described_class.new('a')
|
|
10
|
+
nfa = node.to_nfa(state)
|
|
11
|
+
|
|
12
|
+
expect(nfa).to be_a(Hoozuki::Automaton::NFA)
|
|
13
|
+
expect(nfa.start).to be_a(Hoozuki::Automaton::StateID)
|
|
14
|
+
expect(nfa.accept).to be_an(Array)
|
|
15
|
+
expect(nfa.accept.length).to eq(1)
|
|
16
|
+
expect(nfa.transitions.size).to eq(1)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'creates correct transition' do
|
|
20
|
+
node = described_class.new('b')
|
|
21
|
+
nfa = node.to_nfa(state)
|
|
22
|
+
|
|
23
|
+
transition = nfa.transitions.first
|
|
24
|
+
expect(transition[1]).to eq('b')
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
describe Hoozuki::Node::Epsilon do
|
|
30
|
+
describe '#to_nfa' do
|
|
31
|
+
it 'creates an NFA with epsilon transition' do
|
|
32
|
+
node = described_class.new
|
|
33
|
+
nfa = node.to_nfa(state)
|
|
34
|
+
|
|
35
|
+
expect(nfa).to be_a(Hoozuki::Automaton::NFA)
|
|
36
|
+
expect(nfa.transitions.size).to eq(1)
|
|
37
|
+
transition = nfa.transitions.first
|
|
38
|
+
expect(transition[1]).to be_nil
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
describe Hoozuki::Node::Repetition do
|
|
44
|
+
describe '#to_nfa' do
|
|
45
|
+
context 'with zero_or_more' do
|
|
46
|
+
it 'creates correct NFA structure' do
|
|
47
|
+
child = Hoozuki::Node::Literal.new('a')
|
|
48
|
+
node = described_class.new(child, :zero_or_more)
|
|
49
|
+
nfa = node.to_nfa(state)
|
|
50
|
+
|
|
51
|
+
expect(nfa).to be_a(Hoozuki::Automaton::NFA)
|
|
52
|
+
expect(nfa.accept.length).to eq(2)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
context 'with one_or_more' do
|
|
57
|
+
it 'creates correct NFA structure' do
|
|
58
|
+
child = Hoozuki::Node::Literal.new('a')
|
|
59
|
+
node = described_class.new(child, :one_or_more)
|
|
60
|
+
nfa = node.to_nfa(state)
|
|
61
|
+
|
|
62
|
+
expect(nfa).to be_a(Hoozuki::Automaton::NFA)
|
|
63
|
+
expect(nfa.accept.length).to eq(1)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
context 'with optional' do
|
|
68
|
+
it 'creates correct NFA structure' do
|
|
69
|
+
child = Hoozuki::Node::Literal.new('a')
|
|
70
|
+
node = described_class.new(child, :optional)
|
|
71
|
+
nfa = node.to_nfa(state)
|
|
72
|
+
|
|
73
|
+
expect(nfa).to be_a(Hoozuki::Automaton::NFA)
|
|
74
|
+
expect(nfa.accept.length).to eq(2)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
describe Hoozuki::Node::Choice do
|
|
81
|
+
describe '#to_nfa' do
|
|
82
|
+
it 'creates NFA with branches' do
|
|
83
|
+
left = Hoozuki::Node::Literal.new('a')
|
|
84
|
+
right = Hoozuki::Node::Literal.new('b')
|
|
85
|
+
node = described_class.new([left, right])
|
|
86
|
+
nfa = node.to_nfa(state)
|
|
87
|
+
|
|
88
|
+
expect(nfa).to be_a(Hoozuki::Automaton::NFA)
|
|
89
|
+
expect(nfa.accept.length).to eq(2)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
describe Hoozuki::Node::Concatenation do
|
|
95
|
+
describe '#to_nfa' do
|
|
96
|
+
it 'creates NFA with sequential states' do
|
|
97
|
+
children = [
|
|
98
|
+
Hoozuki::Node::Literal.new('a'),
|
|
99
|
+
Hoozuki::Node::Literal.new('b'),
|
|
100
|
+
Hoozuki::Node::Literal.new('c')
|
|
101
|
+
]
|
|
102
|
+
node = described_class.new(children)
|
|
103
|
+
nfa = node.to_nfa(state)
|
|
104
|
+
|
|
105
|
+
expect(nfa).to be_a(Hoozuki::Automaton::NFA)
|
|
106
|
+
expect(nfa.accept.length).to eq(1)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Hoozuki::Parser do
|
|
4
|
+
describe '#parse' do
|
|
5
|
+
context 'with a single literal' do
|
|
6
|
+
it 'returns a Literal node' do
|
|
7
|
+
result = described_class.new.parse('a')
|
|
8
|
+
expect(result).to be_a(Hoozuki::Node::Literal)
|
|
9
|
+
expect(result.value).to eq('a')
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
context 'with concatenation' do
|
|
14
|
+
it 'returns a Concatenation node' do
|
|
15
|
+
result = described_class.new.parse('abc')
|
|
16
|
+
expect(result).to be_a(Hoozuki::Node::Concatenation)
|
|
17
|
+
expect(result.children.length).to eq(3)
|
|
18
|
+
expect(result.children.map(&:value)).to eq(%w[a b c])
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it 'handles multibyte characters' do
|
|
22
|
+
result = described_class.new.parse('あいう')
|
|
23
|
+
expect(result).to be_a(Hoozuki::Node::Concatenation)
|
|
24
|
+
expect(result.children.map(&:value)).to eq(%w[あ い う])
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
context 'with alternation' do
|
|
29
|
+
it 'returns a Choice node' do
|
|
30
|
+
result = described_class.new.parse('a|b')
|
|
31
|
+
expect(result).to be_a(Hoozuki::Node::Choice)
|
|
32
|
+
expect(result.children.length).to eq(2)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'parses choice with concatenation' do
|
|
36
|
+
result = described_class.new.parse('cat|dog')
|
|
37
|
+
expect(result).to be_a(Hoozuki::Node::Choice)
|
|
38
|
+
expect(result.children[0]).to be_a(Hoozuki::Node::Concatenation)
|
|
39
|
+
expect(result.children[1]).to be_a(Hoozuki::Node::Concatenation)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it 'parses multiple choices' do
|
|
43
|
+
result = described_class.new.parse('a|b|c')
|
|
44
|
+
expect(result).to be_a(Hoozuki::Node::Choice)
|
|
45
|
+
expect(result.children.length).to eq(3)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
context 'with zero or more quantifier' do
|
|
50
|
+
it 'returns a Repetition node with zero_or_more' do
|
|
51
|
+
result = described_class.new.parse('a*')
|
|
52
|
+
expect(result).to be_a(Hoozuki::Node::Repetition)
|
|
53
|
+
expect(result.zero_or_more?).to be true
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
context 'with one or more quantifier' do
|
|
58
|
+
it 'returns a Repetition node with one_or_more' do
|
|
59
|
+
result = described_class.new.parse('a+')
|
|
60
|
+
expect(result).to be_a(Hoozuki::Node::Repetition)
|
|
61
|
+
expect(result.one_or_more?).to be true
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
context 'with optional quantifier' do
|
|
66
|
+
it 'returns a Repetition node with optional' do
|
|
67
|
+
result = described_class.new.parse('a?')
|
|
68
|
+
expect(result).to be_a(Hoozuki::Node::Repetition)
|
|
69
|
+
expect(result.optional?).to be true
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
context 'with grouping' do
|
|
74
|
+
it 'returns correct structure' do
|
|
75
|
+
result = described_class.new.parse('(ab)')
|
|
76
|
+
expect(result).to be_a(Hoozuki::Node::Concatenation)
|
|
77
|
+
expect(result.children.length).to eq(2)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it 'parses nested groups' do
|
|
81
|
+
result = described_class.new.parse('((a))')
|
|
82
|
+
expect(result).to be_a(Hoozuki::Node::Literal)
|
|
83
|
+
expect(result.value).to eq('a')
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it 'parses group with empty alternative' do
|
|
87
|
+
result = described_class.new.parse('(a|)')
|
|
88
|
+
expect(result).to be_a(Hoozuki::Node::Choice)
|
|
89
|
+
expect(result.children.last).to be_a(Hoozuki::Node::Epsilon)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it 'parses group repetition' do
|
|
93
|
+
result = described_class.new.parse('(ab)*')
|
|
94
|
+
expect(result).to be_a(Hoozuki::Node::Repetition)
|
|
95
|
+
expect(result.child).to be_a(Hoozuki::Node::Concatenation)
|
|
96
|
+
expect(result.zero_or_more?).to be true
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
context 'with escaped characters' do
|
|
101
|
+
it 'treats escaped special characters as literals' do
|
|
102
|
+
result = described_class.new.parse('\\*')
|
|
103
|
+
expect(result).to be_a(Hoozuki::Node::Literal)
|
|
104
|
+
expect(result.value).to eq('*')
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
it 'escapes opening parenthesis' do
|
|
108
|
+
result = described_class.new.parse('\\(')
|
|
109
|
+
expect(result).to be_a(Hoozuki::Node::Literal)
|
|
110
|
+
expect(result.value).to eq('(')
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it 'escapes closing parenthesis' do
|
|
114
|
+
result = described_class.new.parse('\\)')
|
|
115
|
+
expect(result).to be_a(Hoozuki::Node::Literal)
|
|
116
|
+
expect(result.value).to eq(')')
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
it 'escapes pipe character' do
|
|
120
|
+
result = described_class.new.parse('\\|')
|
|
121
|
+
expect(result).to be_a(Hoozuki::Node::Literal)
|
|
122
|
+
expect(result.value).to eq('|')
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
it 'escapes backslash' do
|
|
126
|
+
result = described_class.new.parse('\\\\')
|
|
127
|
+
expect(result).to be_a(Hoozuki::Node::Literal)
|
|
128
|
+
expect(result.value).to eq('\\')
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
it 'raises error for incomplete escape' do
|
|
132
|
+
expect { described_class.new.parse('\\') }.to raise_error
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
context 'with error cases' do
|
|
137
|
+
it 'raises error for unmatched opening parenthesis' do
|
|
138
|
+
expect { described_class.new.parse('(abc') }.to raise_error
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
it 'raises error for unmatched closing parenthesis' do
|
|
142
|
+
expect { described_class.new.parse('abc)') }.to raise_error
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
it 'raises error for nested unmatched parentheses' do
|
|
146
|
+
expect { described_class.new.parse('((a)') }.to raise_error
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
context 'with empty alternation' do
|
|
151
|
+
it 'returns a Choice with Epsilon' do
|
|
152
|
+
result = described_class.new.parse('a|')
|
|
153
|
+
expect(result).to be_a(Hoozuki::Node::Choice)
|
|
154
|
+
expect(result.children.length).to eq(2)
|
|
155
|
+
expect(result.children.last).to be_a(Hoozuki::Node::Epsilon)
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
context 'with complex pattern' do
|
|
160
|
+
it 'parses correctly' do
|
|
161
|
+
result = described_class.new.parse('a(b|c)*d')
|
|
162
|
+
expect(result).to be_a(Hoozuki::Node::Concatenation)
|
|
163
|
+
expect(result.children.length).to eq(3)
|
|
164
|
+
expect(result.children[1]).to be_a(Hoozuki::Node::Repetition)
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|