hoozuki 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Hoozuki::Automaton::NFA do
4
+ describe '.from_node' do
5
+ let(:state) { Hoozuki::Automaton::StateID.new(0) }
6
+
7
+ context 'with nil node' do
8
+ it 'raises ArgumentError' do
9
+ expect { described_class.from_node(nil, state) }.to raise_error(ArgumentError, 'Node cannot be nil')
10
+ end
11
+ end
12
+
13
+ context 'with Literal node' do
14
+ it 'creates NFA from literal node' do
15
+ node = Hoozuki::Node::Literal.new('a')
16
+ nfa = described_class.from_node(node, state)
17
+
18
+ expect(nfa).to be_a(described_class)
19
+ expect(nfa.start).to be_a(Hoozuki::Automaton::StateID)
20
+ expect(nfa.accept).to be_an(Array)
21
+ expect(nfa.accept.length).to eq(1)
22
+ expect(nfa.transitions.size).to eq(1)
23
+ end
24
+ end
25
+
26
+ context 'with Epsilon node' do
27
+ it 'creates NFA from epsilon node' do
28
+ node = Hoozuki::Node::Epsilon.new
29
+ nfa = described_class.from_node(node, state)
30
+
31
+ expect(nfa).to be_a(described_class)
32
+ expect(nfa.accept.length).to eq(1)
33
+ expect(nfa.transitions.size).to eq(1)
34
+ expect(nfa.transitions.first[1]).to be_nil
35
+ end
36
+ end
37
+
38
+ context 'with Concatenation node' do
39
+ it 'creates NFA from concatenation node' do
40
+ node = Hoozuki::Node::Concatenation.new([
41
+ Hoozuki::Node::Literal.new('a'),
42
+ Hoozuki::Node::Literal.new('b')
43
+ ])
44
+ nfa = described_class.from_node(node, state)
45
+
46
+ expect(nfa).to be_a(described_class)
47
+ expect(nfa.accept.length).to eq(1)
48
+ end
49
+ end
50
+
51
+ context 'with Choice node' do
52
+ it 'creates NFA from choice node' do
53
+ node = Hoozuki::Node::Choice.new([
54
+ Hoozuki::Node::Literal.new('a'),
55
+ Hoozuki::Node::Literal.new('b')
56
+ ])
57
+ nfa = described_class.from_node(node, state)
58
+
59
+ expect(nfa).to be_a(described_class)
60
+ expect(nfa.accept.length).to eq(2)
61
+ end
62
+ end
63
+
64
+ context 'with Repetition node' do
65
+ it 'creates NFA from zero-or-more repetition' do
66
+ node = Hoozuki::Node::Repetition.new(
67
+ Hoozuki::Node::Literal.new('a'),
68
+ :zero_or_more
69
+ )
70
+ nfa = described_class.from_node(node, state)
71
+
72
+ expect(nfa).to be_a(described_class)
73
+ expect(nfa.accept.length).to eq(2)
74
+ end
75
+
76
+ it 'creates NFA from one-or-more repetition' do
77
+ node = Hoozuki::Node::Repetition.new(
78
+ Hoozuki::Node::Literal.new('a'),
79
+ :one_or_more
80
+ )
81
+ nfa = described_class.from_node(node, state)
82
+
83
+ expect(nfa).to be_a(described_class)
84
+ expect(nfa.accept.length).to eq(1)
85
+ end
86
+
87
+ it 'creates NFA from optional repetition' do
88
+ node = Hoozuki::Node::Repetition.new(
89
+ Hoozuki::Node::Literal.new('a'),
90
+ :optional
91
+ )
92
+ nfa = described_class.from_node(node, state)
93
+
94
+ expect(nfa).to be_a(described_class)
95
+ expect(nfa.accept.length).to eq(2)
96
+ end
97
+ end
98
+
99
+ context 'with complex node' do
100
+ it 'delegates to node.to_nfa' do
101
+ node = Hoozuki::Node::Literal.new('x')
102
+ expect(node).to receive(:to_nfa).with(state).and_call_original
103
+
104
+ described_class.from_node(node, state)
105
+ end
106
+ end
107
+ end
108
+
109
+ describe '#epsilon_closure' do
110
+ it 'returns set containing start state with no epsilon transitions' do
111
+ nfa = described_class.new(0, [1])
112
+ nfa.add_transition(0, 'a', 1)
113
+
114
+ closure = nfa.epsilon_closure(Set.new([0]))
115
+ expect(closure).to include(0)
116
+ end
117
+
118
+ it 'follows epsilon transitions' do
119
+ nfa = described_class.new(0, [2])
120
+ nfa.add_epsilon_transition(0, 1)
121
+ nfa.add_epsilon_transition(1, 2)
122
+
123
+ closure = nfa.epsilon_closure(Set.new([0]))
124
+ expect(closure).to include(0, 1, 2)
125
+ end
126
+
127
+ it 'handles multiple starting states' do
128
+ nfa = described_class.new(0, [2])
129
+ nfa.add_epsilon_transition(0, 1)
130
+ nfa.add_epsilon_transition(2, 3)
131
+
132
+ closure = nfa.epsilon_closure(Set.new([0, 2]))
133
+ expect(closure).to include(0, 1, 2, 3)
134
+ end
135
+ end
136
+
137
+ describe '#add_transition' do
138
+ it 'adds a labeled transition' do
139
+ nfa = described_class.new(0, [1])
140
+ nfa.add_transition(0, 'a', 1)
141
+
142
+ expect(nfa.transitions).to include([0, 'a', 1])
143
+ end
144
+ end
145
+
146
+ describe '#add_epsilon_transition' do
147
+ it 'adds an epsilon transition' do
148
+ nfa = described_class.new(0, [1])
149
+ nfa.add_epsilon_transition(0, 1)
150
+
151
+ expect(nfa.transitions).to include([0, nil, 1])
152
+ end
153
+ end
154
+
155
+ describe '#merge_transitions' do
156
+ it 'merges transitions from another NFA' do
157
+ nfa1 = described_class.new(0, [1])
158
+ nfa1.add_transition(0, 'a', 1)
159
+
160
+ nfa2 = described_class.new(2, [3])
161
+ nfa2.add_transition(2, 'b', 3)
162
+
163
+ nfa1.merge_transitions(nfa2)
164
+
165
+ expect(nfa1.transitions).to include([0, 'a', 1], [2, 'b', 3])
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Hoozuki::Instruction do
4
+ describe Hoozuki::Instruction::Char do
5
+ describe '#initialize' do
6
+ it 'sets the character' do
7
+ instruction = described_class.new('a')
8
+ expect(instruction.char).to eq('a')
9
+ end
10
+
11
+ it 'handles multibyte characters' do
12
+ instruction = described_class.new('あ')
13
+ expect(instruction.char).to eq('あ')
14
+ end
15
+ end
16
+
17
+ describe '#char=' do
18
+ it 'allows updating the character' do
19
+ instruction = described_class.new('a')
20
+ instruction.char = 'b'
21
+ expect(instruction.char).to eq('b')
22
+ end
23
+ end
24
+ end
25
+
26
+ describe Hoozuki::Instruction::Jmp do
27
+ describe '#initialize' do
28
+ it 'sets the target' do
29
+ instruction = described_class.new(5)
30
+ expect(instruction.target).to eq(5)
31
+ end
32
+
33
+ it 'handles zero target' do
34
+ instruction = described_class.new(0)
35
+ expect(instruction.target).to eq(0)
36
+ end
37
+ end
38
+
39
+ describe '#target=' do
40
+ it 'allows updating the target' do
41
+ instruction = described_class.new(5)
42
+ instruction.target = 10
43
+ expect(instruction.target).to eq(10)
44
+ end
45
+ end
46
+ end
47
+
48
+ describe Hoozuki::Instruction::Match do
49
+ describe '#initialize' do
50
+ it 'creates a Match instruction' do
51
+ instruction = described_class.new
52
+ expect(instruction).to be_a(described_class)
53
+ end
54
+ end
55
+ end
56
+
57
+ describe Hoozuki::Instruction::Split do
58
+ describe '#initialize' do
59
+ it 'sets left and right targets' do
60
+ instruction = described_class.new(3, 7)
61
+ expect(instruction.left).to eq(3)
62
+ expect(instruction.right).to eq(7)
63
+ end
64
+
65
+ it 'handles zero targets' do
66
+ instruction = described_class.new(0, 0)
67
+ expect(instruction.left).to eq(0)
68
+ expect(instruction.right).to eq(0)
69
+ end
70
+ end
71
+
72
+ describe '#left=' do
73
+ it 'allows updating the left target' do
74
+ instruction = described_class.new(3, 7)
75
+ instruction.left = 5
76
+ expect(instruction.left).to eq(5)
77
+ end
78
+ end
79
+
80
+ describe '#right=' do
81
+ it 'allows updating the right target' do
82
+ instruction = described_class.new(3, 7)
83
+ instruction.right = 10
84
+ expect(instruction.right).to eq(10)
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Hoozuki::Node' do
4
+ let(:state) { Hoozuki::Automaton::StateID.new(0) }
5
+
6
+ describe Hoozuki::Node::Literal do
7
+ describe '#to_nfa' do
8
+ it 'creates an NFA with a single transition' do
9
+ node = described_class.new('a')
10
+ nfa = node.to_nfa(state)
11
+
12
+ expect(nfa).to be_a(Hoozuki::Automaton::NFA)
13
+ expect(nfa.start).to be_a(Hoozuki::Automaton::StateID)
14
+ expect(nfa.accept).to be_an(Array)
15
+ expect(nfa.accept.length).to eq(1)
16
+ expect(nfa.transitions.size).to eq(1)
17
+ end
18
+
19
+ it 'creates correct transition' do
20
+ node = described_class.new('b')
21
+ nfa = node.to_nfa(state)
22
+
23
+ transition = nfa.transitions.first
24
+ expect(transition[1]).to eq('b')
25
+ end
26
+ end
27
+ end
28
+
29
+ describe Hoozuki::Node::Epsilon do
30
+ describe '#to_nfa' do
31
+ it 'creates an NFA with epsilon transition' do
32
+ node = described_class.new
33
+ nfa = node.to_nfa(state)
34
+
35
+ expect(nfa).to be_a(Hoozuki::Automaton::NFA)
36
+ expect(nfa.transitions.size).to eq(1)
37
+ transition = nfa.transitions.first
38
+ expect(transition[1]).to be_nil
39
+ end
40
+ end
41
+ end
42
+
43
+ describe Hoozuki::Node::Repetition do
44
+ describe '#to_nfa' do
45
+ context 'with zero_or_more' do
46
+ it 'creates correct NFA structure' do
47
+ child = Hoozuki::Node::Literal.new('a')
48
+ node = described_class.new(child, :zero_or_more)
49
+ nfa = node.to_nfa(state)
50
+
51
+ expect(nfa).to be_a(Hoozuki::Automaton::NFA)
52
+ expect(nfa.accept.length).to eq(2)
53
+ end
54
+ end
55
+
56
+ context 'with one_or_more' do
57
+ it 'creates correct NFA structure' do
58
+ child = Hoozuki::Node::Literal.new('a')
59
+ node = described_class.new(child, :one_or_more)
60
+ nfa = node.to_nfa(state)
61
+
62
+ expect(nfa).to be_a(Hoozuki::Automaton::NFA)
63
+ expect(nfa.accept.length).to eq(1)
64
+ end
65
+ end
66
+
67
+ context 'with optional' do
68
+ it 'creates correct NFA structure' do
69
+ child = Hoozuki::Node::Literal.new('a')
70
+ node = described_class.new(child, :optional)
71
+ nfa = node.to_nfa(state)
72
+
73
+ expect(nfa).to be_a(Hoozuki::Automaton::NFA)
74
+ expect(nfa.accept.length).to eq(2)
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ describe Hoozuki::Node::Choice do
81
+ describe '#to_nfa' do
82
+ it 'creates NFA with branches' do
83
+ left = Hoozuki::Node::Literal.new('a')
84
+ right = Hoozuki::Node::Literal.new('b')
85
+ node = described_class.new([left, right])
86
+ nfa = node.to_nfa(state)
87
+
88
+ expect(nfa).to be_a(Hoozuki::Automaton::NFA)
89
+ expect(nfa.accept.length).to eq(2)
90
+ end
91
+ end
92
+ end
93
+
94
+ describe Hoozuki::Node::Concatenation do
95
+ describe '#to_nfa' do
96
+ it 'creates NFA with sequential states' do
97
+ children = [
98
+ Hoozuki::Node::Literal.new('a'),
99
+ Hoozuki::Node::Literal.new('b'),
100
+ Hoozuki::Node::Literal.new('c')
101
+ ]
102
+ node = described_class.new(children)
103
+ nfa = node.to_nfa(state)
104
+
105
+ expect(nfa).to be_a(Hoozuki::Automaton::NFA)
106
+ expect(nfa.accept.length).to eq(1)
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Hoozuki::Parser do
4
+ describe '#parse' do
5
+ context 'with a single literal' do
6
+ it 'returns a Literal node' do
7
+ result = described_class.new.parse('a')
8
+ expect(result).to be_a(Hoozuki::Node::Literal)
9
+ expect(result.value).to eq('a')
10
+ end
11
+ end
12
+
13
+ context 'with concatenation' do
14
+ it 'returns a Concatenation node' do
15
+ result = described_class.new.parse('abc')
16
+ expect(result).to be_a(Hoozuki::Node::Concatenation)
17
+ expect(result.children.length).to eq(3)
18
+ expect(result.children.map(&:value)).to eq(%w[a b c])
19
+ end
20
+
21
+ it 'handles multibyte characters' do
22
+ result = described_class.new.parse('あいう')
23
+ expect(result).to be_a(Hoozuki::Node::Concatenation)
24
+ expect(result.children.map(&:value)).to eq(%w[あ い う])
25
+ end
26
+ end
27
+
28
+ context 'with alternation' do
29
+ it 'returns a Choice node' do
30
+ result = described_class.new.parse('a|b')
31
+ expect(result).to be_a(Hoozuki::Node::Choice)
32
+ expect(result.children.length).to eq(2)
33
+ end
34
+
35
+ it 'parses choice with concatenation' do
36
+ result = described_class.new.parse('cat|dog')
37
+ expect(result).to be_a(Hoozuki::Node::Choice)
38
+ expect(result.children[0]).to be_a(Hoozuki::Node::Concatenation)
39
+ expect(result.children[1]).to be_a(Hoozuki::Node::Concatenation)
40
+ end
41
+
42
+ it 'parses multiple choices' do
43
+ result = described_class.new.parse('a|b|c')
44
+ expect(result).to be_a(Hoozuki::Node::Choice)
45
+ expect(result.children.length).to eq(3)
46
+ end
47
+ end
48
+
49
+ context 'with zero or more quantifier' do
50
+ it 'returns a Repetition node with zero_or_more' do
51
+ result = described_class.new.parse('a*')
52
+ expect(result).to be_a(Hoozuki::Node::Repetition)
53
+ expect(result.zero_or_more?).to be true
54
+ end
55
+ end
56
+
57
+ context 'with one or more quantifier' do
58
+ it 'returns a Repetition node with one_or_more' do
59
+ result = described_class.new.parse('a+')
60
+ expect(result).to be_a(Hoozuki::Node::Repetition)
61
+ expect(result.one_or_more?).to be true
62
+ end
63
+ end
64
+
65
+ context 'with optional quantifier' do
66
+ it 'returns a Repetition node with optional' do
67
+ result = described_class.new.parse('a?')
68
+ expect(result).to be_a(Hoozuki::Node::Repetition)
69
+ expect(result.optional?).to be true
70
+ end
71
+ end
72
+
73
+ context 'with grouping' do
74
+ it 'returns correct structure' do
75
+ result = described_class.new.parse('(ab)')
76
+ expect(result).to be_a(Hoozuki::Node::Concatenation)
77
+ expect(result.children.length).to eq(2)
78
+ end
79
+
80
+ it 'parses nested groups' do
81
+ result = described_class.new.parse('((a))')
82
+ expect(result).to be_a(Hoozuki::Node::Literal)
83
+ expect(result.value).to eq('a')
84
+ end
85
+
86
+ it 'parses group with empty alternative' do
87
+ result = described_class.new.parse('(a|)')
88
+ expect(result).to be_a(Hoozuki::Node::Choice)
89
+ expect(result.children.last).to be_a(Hoozuki::Node::Epsilon)
90
+ end
91
+
92
+ it 'parses group repetition' do
93
+ result = described_class.new.parse('(ab)*')
94
+ expect(result).to be_a(Hoozuki::Node::Repetition)
95
+ expect(result.child).to be_a(Hoozuki::Node::Concatenation)
96
+ expect(result.zero_or_more?).to be true
97
+ end
98
+ end
99
+
100
+ context 'with escaped characters' do
101
+ it 'treats escaped special characters as literals' do
102
+ result = described_class.new.parse('\\*')
103
+ expect(result).to be_a(Hoozuki::Node::Literal)
104
+ expect(result.value).to eq('*')
105
+ end
106
+
107
+ it 'escapes opening parenthesis' do
108
+ result = described_class.new.parse('\\(')
109
+ expect(result).to be_a(Hoozuki::Node::Literal)
110
+ expect(result.value).to eq('(')
111
+ end
112
+
113
+ it 'escapes closing parenthesis' do
114
+ result = described_class.new.parse('\\)')
115
+ expect(result).to be_a(Hoozuki::Node::Literal)
116
+ expect(result.value).to eq(')')
117
+ end
118
+
119
+ it 'escapes pipe character' do
120
+ result = described_class.new.parse('\\|')
121
+ expect(result).to be_a(Hoozuki::Node::Literal)
122
+ expect(result.value).to eq('|')
123
+ end
124
+
125
+ it 'escapes backslash' do
126
+ result = described_class.new.parse('\\\\')
127
+ expect(result).to be_a(Hoozuki::Node::Literal)
128
+ expect(result.value).to eq('\\')
129
+ end
130
+
131
+ it 'raises error for incomplete escape' do
132
+ expect { described_class.new.parse('\\') }.to raise_error
133
+ end
134
+ end
135
+
136
+ context 'with error cases' do
137
+ it 'raises error for unmatched opening parenthesis' do
138
+ expect { described_class.new.parse('(abc') }.to raise_error
139
+ end
140
+
141
+ it 'raises error for unmatched closing parenthesis' do
142
+ expect { described_class.new.parse('abc)') }.to raise_error
143
+ end
144
+
145
+ it 'raises error for nested unmatched parentheses' do
146
+ expect { described_class.new.parse('((a)') }.to raise_error
147
+ end
148
+ end
149
+
150
+ context 'with empty alternation' do
151
+ it 'returns a Choice with Epsilon' do
152
+ result = described_class.new.parse('a|')
153
+ expect(result).to be_a(Hoozuki::Node::Choice)
154
+ expect(result.children.length).to eq(2)
155
+ expect(result.children.last).to be_a(Hoozuki::Node::Epsilon)
156
+ end
157
+ end
158
+
159
+ context 'with complex pattern' do
160
+ it 'parses correctly' do
161
+ result = described_class.new.parse('a(b|c)*d')
162
+ expect(result).to be_a(Hoozuki::Node::Concatenation)
163
+ expect(result.children.length).to eq(3)
164
+ expect(result.children[1]).to be_a(Hoozuki::Node::Repetition)
165
+ end
166
+ end
167
+ end
168
+ end