hoozuki 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.md +18 -5
- data/Rakefile +12 -2
- data/lib/hoozuki/automaton/dfa/builder.rb +79 -0
- data/lib/hoozuki/automaton/dfa.rb +4 -41
- data/lib/hoozuki/automaton/nfa.rb +29 -108
- data/lib/hoozuki/automaton/state_id.rb +2 -1
- data/lib/hoozuki/instruction/char.rb +13 -0
- data/lib/hoozuki/instruction/jmp.rb +13 -0
- data/lib/hoozuki/instruction/match.rb +8 -0
- data/lib/hoozuki/instruction/split.rb +14 -0
- data/lib/hoozuki/instruction.rb +6 -0
- data/lib/hoozuki/node/choice.rb +13 -1
- data/lib/hoozuki/node/concatenation.rb +16 -1
- data/lib/hoozuki/node/epsilon.rb +8 -1
- data/lib/hoozuki/node/literal.rb +9 -1
- data/lib/hoozuki/node/repetition.rb +55 -1
- data/lib/hoozuki/parser.rb +888 -76
- data/lib/hoozuki/parser.y +128 -0
- data/lib/hoozuki/version.rb +2 -2
- data/lib/hoozuki/vm/compiler.rb +129 -0
- data/lib/hoozuki/vm/evaluator.rb +39 -0
- data/lib/hoozuki/vm.rb +4 -0
- data/lib/hoozuki.rb +22 -15
- data/spec/hoozuki/automaton/dfa/builder_spec.rb +79 -0
- data/spec/hoozuki/automaton/dfa_spec.rb +149 -0
- data/spec/hoozuki/automaton/nfa_spec.rb +168 -0
- data/spec/hoozuki/instruction_spec.rb +88 -0
- data/spec/hoozuki/node_spec.rb +110 -0
- data/spec/hoozuki/parser_spec.rb +168 -0
- data/spec/hoozuki/vm/compiler_spec.rb +219 -0
- data/spec/hoozuki/vm/evaluator_spec.rb +260 -0
- data/spec/hoozuki_spec.rb +186 -2
- metadata +20 -2
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class Hoozuki::Parser
|
|
4
|
+
rule
|
|
5
|
+
target: choice
|
|
6
|
+
|
|
7
|
+
choice:
|
|
8
|
+
concatenation
|
|
9
|
+
| choice PIPE concatenation {
|
|
10
|
+
children = val[0].is_a?(Hoozuki::Node::Choice) ? val[0].children.dup : [val[0]]
|
|
11
|
+
children << val[2]
|
|
12
|
+
result = Hoozuki::Node::Choice.new(children)
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
concatenation:
|
|
16
|
+
repetition
|
|
17
|
+
| EPSILON { result = Hoozuki::Node::Epsilon.new }
|
|
18
|
+
| concatenation repetition {
|
|
19
|
+
if val[0].is_a?(Hoozuki::Node::Epsilon)
|
|
20
|
+
result = val[1]
|
|
21
|
+
else
|
|
22
|
+
children = val[0].is_a?(Hoozuki::Node::Concatenation) ? val[0].children.dup : [val[0]]
|
|
23
|
+
children << val[1]
|
|
24
|
+
result = Hoozuki::Node::Concatenation.new(children)
|
|
25
|
+
end
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
repetition:
|
|
29
|
+
group
|
|
30
|
+
| group STAR { result = Hoozuki::Node::Repetition.new(val[0], :zero_or_more) }
|
|
31
|
+
| group PLUS { result = Hoozuki::Node::Repetition.new(val[0], :one_or_more) }
|
|
32
|
+
| group QUESTION { result = Hoozuki::Node::Repetition.new(val[0], :optional) }
|
|
33
|
+
|
|
34
|
+
group:
|
|
35
|
+
LPAREN choice RPAREN { result = val[1] }
|
|
36
|
+
| literal
|
|
37
|
+
|
|
38
|
+
literal:
|
|
39
|
+
CHAR { result = Hoozuki::Node::Literal.new(val[0]) }
|
|
40
|
+
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
---- header
|
|
44
|
+
require_relative 'node'
|
|
45
|
+
|
|
46
|
+
---- inner
|
|
47
|
+
def initialize
|
|
48
|
+
@yydebug = true if ENV['DEBUG']
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def parse(pattern)
|
|
52
|
+
@pattern = pattern
|
|
53
|
+
@offset = 0
|
|
54
|
+
@tokens = []
|
|
55
|
+
tokenize
|
|
56
|
+
do_parse
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
ESCAPABLE_CHARS = ['(', ')', '|', '*', '+', '?', '\\'].freeze
|
|
62
|
+
SPECIAL_TOKENS = {
|
|
63
|
+
'(' => :LPAREN,
|
|
64
|
+
')' => :RPAREN,
|
|
65
|
+
'|' => :PIPE,
|
|
66
|
+
'*' => :STAR,
|
|
67
|
+
'+' => :PLUS,
|
|
68
|
+
'?' => :QUESTION
|
|
69
|
+
}.freeze
|
|
70
|
+
|
|
71
|
+
def tokenize
|
|
72
|
+
while @offset < @pattern.length
|
|
73
|
+
char = @pattern[@offset]
|
|
74
|
+
|
|
75
|
+
if char == '\\'
|
|
76
|
+
handle_escape_sequence
|
|
77
|
+
elsif SPECIAL_TOKENS.key?(char)
|
|
78
|
+
handle_special_char(char)
|
|
79
|
+
else
|
|
80
|
+
add_token(:CHAR, char)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
@tokens << [false, false]
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def handle_escape_sequence
|
|
88
|
+
@offset += 1
|
|
89
|
+
raise 'Unexpected end of pattern' if @offset >= @pattern.length
|
|
90
|
+
|
|
91
|
+
escaped = @pattern[@offset]
|
|
92
|
+
raise "Invalid escape sequence: \\#{escaped}" unless ESCAPABLE_CHARS.include?(escaped)
|
|
93
|
+
|
|
94
|
+
add_token(:CHAR, escaped)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def handle_special_char(char)
|
|
98
|
+
token_type = SPECIAL_TOKENS[char]
|
|
99
|
+
add_token(token_type, char)
|
|
100
|
+
|
|
101
|
+
insert_epsilon_after_lparen if char == '(' && next_char == '|'
|
|
102
|
+
insert_epsilon_after_pipe if char == '|' && should_insert_epsilon_after_pipe?
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def should_insert_epsilon_after_pipe?
|
|
106
|
+
next_char.nil? || [')', '|'].include?(next_char)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def insert_epsilon_after_lparen
|
|
110
|
+
@tokens << [:EPSILON, nil]
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def insert_epsilon_after_pipe
|
|
114
|
+
@tokens << [:EPSILON, nil]
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def add_token(type, value)
|
|
118
|
+
@tokens << [type, value]
|
|
119
|
+
@offset += 1
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def next_char
|
|
123
|
+
@offset < @pattern.length ? @pattern[@offset] : nil
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def next_token
|
|
127
|
+
@tokens.shift
|
|
128
|
+
end
|
data/lib/hoozuki/version.rb
CHANGED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hoozuki
|
|
4
|
+
module VM
|
|
5
|
+
class Compiler
|
|
6
|
+
attr_reader :instructions
|
|
7
|
+
|
|
8
|
+
def initialize
|
|
9
|
+
@pc = 0
|
|
10
|
+
@instructions = []
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def compile(ast)
|
|
14
|
+
compile_node(ast)
|
|
15
|
+
emit_match
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def compile_node(ast)
|
|
21
|
+
case ast
|
|
22
|
+
when Hoozuki::Node::Literal
|
|
23
|
+
compile_literal(ast)
|
|
24
|
+
when Hoozuki::Node::Epsilon
|
|
25
|
+
# Do nothing for epsilon
|
|
26
|
+
when Node::Repetition
|
|
27
|
+
compile_repetition(ast)
|
|
28
|
+
when Node::Choice
|
|
29
|
+
compile_choice(ast)
|
|
30
|
+
when Node::Concatenation
|
|
31
|
+
compile_concatenation(ast)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def compile_literal(node)
|
|
36
|
+
emit(Hoozuki::Instruction::Char.new(node.value))
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def compile_repetition(node)
|
|
40
|
+
if node.zero_or_more?
|
|
41
|
+
compile_zero_or_more(node.child)
|
|
42
|
+
elsif node.one_or_more?
|
|
43
|
+
compile_one_or_more(node.child)
|
|
44
|
+
elsif node.optional?
|
|
45
|
+
compile_optional(node.child)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def compile_zero_or_more(child)
|
|
50
|
+
split = @pc
|
|
51
|
+
emit(Hoozuki::Instruction::Split.new(@pc + 1, 0))
|
|
52
|
+
compile_node(child)
|
|
53
|
+
emit(Hoozuki::Instruction::Jmp.new(split))
|
|
54
|
+
patch(split, Hoozuki::Instruction::Split.new(split + 1, @pc))
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def compile_one_or_more(child)
|
|
58
|
+
start = @pc
|
|
59
|
+
compile_node(child)
|
|
60
|
+
emit(Hoozuki::Instruction::Split.new(start, @pc + 1))
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def compile_optional(child)
|
|
64
|
+
split = @pc
|
|
65
|
+
emit(Hoozuki::Instruction::Split.new(0, 0))
|
|
66
|
+
start = @pc
|
|
67
|
+
compile_node(child)
|
|
68
|
+
last = @pc
|
|
69
|
+
patch(split, Hoozuki::Instruction::Split.new(start, last))
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def compile_choice(node)
|
|
73
|
+
if node.children.length == 2
|
|
74
|
+
compile_binary_choice(node.children[0], node.children[1])
|
|
75
|
+
else
|
|
76
|
+
first = node.children[0]
|
|
77
|
+
rest = Node::Choice.new(node.children[1..])
|
|
78
|
+
compile_binary_choice(first, rest)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def compile_binary_choice(left, right)
|
|
83
|
+
split = @pc
|
|
84
|
+
@pc += 1
|
|
85
|
+
@instructions << Hoozuki::Instruction::Split.new(@pc, 0)
|
|
86
|
+
compile_node(left)
|
|
87
|
+
jump = @pc
|
|
88
|
+
emit(Hoozuki::Instruction::Jmp.new(0))
|
|
89
|
+
validate_split_instruction(split)
|
|
90
|
+
@instructions[split].right = @pc
|
|
91
|
+
compile_node(right)
|
|
92
|
+
validate_jmp_instruction(jump)
|
|
93
|
+
@instructions[jump].target = @pc
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def compile_concatenation(node)
|
|
97
|
+
node.children.each do |child|
|
|
98
|
+
compile_node(child)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def emit_match
|
|
103
|
+
@pc += 1
|
|
104
|
+
@instructions << Instruction::Match.new
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def validate_split_instruction(pc)
|
|
108
|
+
return if @instructions[pc].is_a?(Hoozuki::Instruction::Split)
|
|
109
|
+
|
|
110
|
+
raise "Instruction at pc #{pc} is not a Split"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def validate_jmp_instruction(pc)
|
|
114
|
+
return if @instructions[pc].is_a?(Hoozuki::Instruction::Jmp)
|
|
115
|
+
|
|
116
|
+
raise "Instruction at pc #{pc} is not a Jmp"
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def emit(instruction)
|
|
120
|
+
@instructions << instruction
|
|
121
|
+
@pc += 1
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def patch(pc, instruction)
|
|
125
|
+
@instructions[pc] = instruction
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hoozuki
|
|
4
|
+
module VM
|
|
5
|
+
class Evaluator
|
|
6
|
+
class << self
|
|
7
|
+
def evaluate(instructions, input, input_pos = 0, pc = 0)
|
|
8
|
+
new._evaluate(instructions, input, input_pos, pc)
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def _evaluate(instructions, input, input_pos, pc)
|
|
13
|
+
loop do
|
|
14
|
+
return false if pc >= instructions.size
|
|
15
|
+
|
|
16
|
+
inst = instructions[pc]
|
|
17
|
+
case inst
|
|
18
|
+
when Hoozuki::Instruction::Char
|
|
19
|
+
return false if input_pos >= input.size || input[input_pos] != inst.char
|
|
20
|
+
|
|
21
|
+
input_pos += 1
|
|
22
|
+
pc += 1
|
|
23
|
+
when Hoozuki::Instruction::Jmp
|
|
24
|
+
pc = inst.target
|
|
25
|
+
when Hoozuki::Instruction::Split
|
|
26
|
+
return true if _evaluate(instructions, input, input_pos, inst.left)
|
|
27
|
+
|
|
28
|
+
pc = inst.right
|
|
29
|
+
|
|
30
|
+
when Hoozuki::Instruction::Match
|
|
31
|
+
return input_pos == input.length
|
|
32
|
+
else
|
|
33
|
+
raise "Unknown instruction: #{inst.class}"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
data/lib/hoozuki/vm.rb
ADDED
data/lib/hoozuki.rb
CHANGED
|
@@ -1,35 +1,42 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative 'hoozuki/automaton'
|
|
4
|
+
require_relative 'hoozuki/instruction'
|
|
4
5
|
require_relative 'hoozuki/node'
|
|
5
6
|
require_relative 'hoozuki/parser'
|
|
6
7
|
require_relative 'hoozuki/version'
|
|
8
|
+
require_relative 'hoozuki/vm'
|
|
7
9
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@input = input
|
|
11
|
-
@method = method
|
|
10
|
+
module Hoozuki
|
|
11
|
+
module_function
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
def compile(input, engine: :dfa)
|
|
14
|
+
ast = Parser.new.parse(input)
|
|
15
|
+
case engine
|
|
15
16
|
when :dfa
|
|
16
|
-
nfa = Automaton::NFA.
|
|
17
|
-
|
|
17
|
+
nfa = Automaton::NFA.from_node(ast, Automaton::StateID.new(0))
|
|
18
|
+
Automaton::DFA.from_nfa(nfa, use_cache?(input))
|
|
19
|
+
when :vm
|
|
20
|
+
compiler = VM::Compiler.new
|
|
21
|
+
compiler.compile(ast)
|
|
22
|
+
compiler.instructions
|
|
23
|
+
else
|
|
24
|
+
raise ArgumentError, "Unknown engine: #{engine}"
|
|
18
25
|
end
|
|
19
26
|
end
|
|
20
27
|
|
|
21
|
-
def match?(input)
|
|
22
|
-
|
|
28
|
+
def match?(pattern, input, engine: :dfa)
|
|
29
|
+
compiled = compile(pattern, engine: engine)
|
|
30
|
+
case engine
|
|
23
31
|
when :dfa
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
32
|
+
compiled.match?(input, use_cache?(input))
|
|
33
|
+
when :vm
|
|
34
|
+
VM::Evaluator.evaluate(compiled, input, 0, 0)
|
|
27
35
|
end
|
|
28
36
|
end
|
|
29
37
|
|
|
30
|
-
private
|
|
31
|
-
|
|
32
38
|
def use_cache?(input)
|
|
33
39
|
input.length > 1000
|
|
34
40
|
end
|
|
41
|
+
private_class_method :use_cache?
|
|
35
42
|
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Hoozuki::Automaton::DFA::Builder do
|
|
4
|
+
describe '#call' do
|
|
5
|
+
let(:state) { Hoozuki::Automaton::StateID.new(0) }
|
|
6
|
+
|
|
7
|
+
context 'with simple NFA' do
|
|
8
|
+
it 'builds a DFA' do
|
|
9
|
+
node = Hoozuki::Node::Literal.new('a')
|
|
10
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
11
|
+
builder = described_class.new(nfa, false)
|
|
12
|
+
dfa = builder.call
|
|
13
|
+
|
|
14
|
+
expect(dfa).to be_a(Hoozuki::Automaton::DFA)
|
|
15
|
+
expect(dfa.start).to be_a(Integer)
|
|
16
|
+
expect(dfa.accept).to be_a(Set)
|
|
17
|
+
expect(dfa.transitions).to be_a(Set)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
context 'with NFA containing epsilon transitions' do
|
|
22
|
+
it 'eliminates epsilon transitions' do
|
|
23
|
+
node = Hoozuki::Parser.new.parse('a?')
|
|
24
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
25
|
+
builder = described_class.new(nfa, false)
|
|
26
|
+
dfa = builder.call
|
|
27
|
+
|
|
28
|
+
epsilon_transitions = dfa.transitions.select { |_, label, _| label.nil? }
|
|
29
|
+
expect(epsilon_transitions).to be_empty
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
context 'with alternation' do
|
|
35
|
+
it 'creates correct number of states' do
|
|
36
|
+
node = Hoozuki::Parser.new.parse('a|b')
|
|
37
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
38
|
+
builder = described_class.new(nfa, false)
|
|
39
|
+
dfa = builder.call
|
|
40
|
+
|
|
41
|
+
expect(dfa.transitions.size).to be >= 2
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
context 'with repetition' do
|
|
46
|
+
it 'handles loops correctly' do
|
|
47
|
+
node = Hoozuki::Parser.new.parse('a*')
|
|
48
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
49
|
+
builder = described_class.new(nfa, false)
|
|
50
|
+
dfa = builder.call
|
|
51
|
+
|
|
52
|
+
expect(dfa.accept).to include(dfa.start)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
context 'with concatenation' do
|
|
57
|
+
it 'builds sequential transitions' do
|
|
58
|
+
node = Hoozuki::Parser.new.parse('abc')
|
|
59
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
60
|
+
builder = described_class.new(nfa, false)
|
|
61
|
+
dfa = builder.call
|
|
62
|
+
|
|
63
|
+
expect(dfa.transitions.size).to be >= 3
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
context 'with complex pattern' do
|
|
68
|
+
it 'builds correct DFA structure' do
|
|
69
|
+
node = Hoozuki::Parser.new.parse('(a|b)*c')
|
|
70
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
71
|
+
builder = described_class.new(nfa, false)
|
|
72
|
+
dfa = builder.call
|
|
73
|
+
|
|
74
|
+
expect(dfa).to be_a(Hoozuki::Automaton::DFA)
|
|
75
|
+
expect(dfa.accept).not_to be_empty
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Hoozuki::Automaton::DFA do
|
|
4
|
+
describe '.from_nfa' do
|
|
5
|
+
let(:state) { Hoozuki::Automaton::StateID.new(0) }
|
|
6
|
+
|
|
7
|
+
it 'converts simple NFA to DFA' do
|
|
8
|
+
node = Hoozuki::Node::Literal.new('a')
|
|
9
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
10
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
11
|
+
|
|
12
|
+
expect(dfa).to be_a(described_class)
|
|
13
|
+
expect(dfa.start).to be_a(Integer)
|
|
14
|
+
expect(dfa.accept).to be_a(Set)
|
|
15
|
+
expect(dfa.transitions).not_to be_empty
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it 'converts choice NFA to DFA' do
|
|
19
|
+
node = Hoozuki::Node::Choice.new([
|
|
20
|
+
Hoozuki::Node::Literal.new('a'),
|
|
21
|
+
Hoozuki::Node::Literal.new('b')
|
|
22
|
+
])
|
|
23
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
24
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
25
|
+
|
|
26
|
+
expect(dfa.start).to be_a(Integer)
|
|
27
|
+
expect(dfa.transitions.map { |_, label, _| label }).to include('a', 'b')
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it 'converts concatenation NFA to DFA' do
|
|
31
|
+
node = Hoozuki::Node::Concatenation.new([
|
|
32
|
+
Hoozuki::Node::Literal.new('a'),
|
|
33
|
+
Hoozuki::Node::Literal.new('b')
|
|
34
|
+
])
|
|
35
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
36
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
37
|
+
|
|
38
|
+
expect(dfa.start).to be_a(Integer)
|
|
39
|
+
expect(dfa.accept).not_to be_empty
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it 'handles alternation patterns' do
|
|
43
|
+
node = Hoozuki::Parser.new.parse('a|b')
|
|
44
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
45
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
46
|
+
|
|
47
|
+
expect(dfa.transitions.size).to be >= 2
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it 'handles repetition patterns' do
|
|
51
|
+
node = Hoozuki::Parser.new.parse('a*')
|
|
52
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
53
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
54
|
+
|
|
55
|
+
expect(dfa.accept).to include(dfa.start)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
describe '#match?' do
|
|
60
|
+
let(:state) { Hoozuki::Automaton::StateID.new(0) }
|
|
61
|
+
|
|
62
|
+
it 'matches using DFA for single literal' do
|
|
63
|
+
node = Hoozuki::Node::Literal.new('a')
|
|
64
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
65
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
66
|
+
|
|
67
|
+
expect(dfa.match?('a', false)).to be true
|
|
68
|
+
expect(dfa.match?('b', false)).to be false
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it 'matches choice pattern using DFA' do
|
|
72
|
+
node = Hoozuki::Node::Choice.new([
|
|
73
|
+
Hoozuki::Node::Literal.new('a'),
|
|
74
|
+
Hoozuki::Node::Literal.new('b')
|
|
75
|
+
])
|
|
76
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
77
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
78
|
+
|
|
79
|
+
expect(dfa.match?('a', false)).to be true
|
|
80
|
+
expect(dfa.match?('b', false)).to be true
|
|
81
|
+
expect(dfa.match?('c', false)).to be false
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
it 'matches concatenation pattern using DFA' do
|
|
85
|
+
node = Hoozuki::Node::Concatenation.new([
|
|
86
|
+
Hoozuki::Node::Literal.new('a'),
|
|
87
|
+
Hoozuki::Node::Literal.new('b')
|
|
88
|
+
])
|
|
89
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
90
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
91
|
+
|
|
92
|
+
expect(dfa.match?('ab', false)).to be true
|
|
93
|
+
expect(dfa.match?('a', false)).to be false
|
|
94
|
+
expect(dfa.match?('abc', false)).to be false
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
context 'with simple literal' do
|
|
98
|
+
it 'matches exact string' do
|
|
99
|
+
node = Hoozuki::Parser.new.parse('abc')
|
|
100
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
101
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
102
|
+
|
|
103
|
+
expect(dfa.match?('abc', false)).to be true
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it 'does not match different string' do
|
|
107
|
+
node = Hoozuki::Parser.new.parse('abc')
|
|
108
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
109
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
110
|
+
|
|
111
|
+
expect(dfa.match?('abd', false)).to be false
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
context 'with alternation' do
|
|
116
|
+
it 'matches either branch' do
|
|
117
|
+
node = Hoozuki::Parser.new.parse('a|b')
|
|
118
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
119
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
120
|
+
|
|
121
|
+
expect(dfa.match?('a', false)).to be true
|
|
122
|
+
expect(dfa.match?('b', false)).to be true
|
|
123
|
+
expect(dfa.match?('c', false)).to be false
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
describe '#next_transition' do
|
|
129
|
+
let(:state) { Hoozuki::Automaton::StateID.new(0) }
|
|
130
|
+
|
|
131
|
+
it 'finds correct next state' do
|
|
132
|
+
node = Hoozuki::Parser.new.parse('a')
|
|
133
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
134
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
135
|
+
|
|
136
|
+
next_state = dfa.next_transition(dfa.start, 'a', false)
|
|
137
|
+
expect(next_state).not_to be_nil
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
it 'returns nil for invalid transition' do
|
|
141
|
+
node = Hoozuki::Parser.new.parse('a')
|
|
142
|
+
nfa = Hoozuki::Automaton::NFA.from_node(node, state)
|
|
143
|
+
dfa = described_class.from_nfa(nfa, false)
|
|
144
|
+
|
|
145
|
+
next_state = dfa.next_transition(dfa.start, 'b', false)
|
|
146
|
+
expect(next_state).to be_nil
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|