hoozuki 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.codespellignore +0 -0
- data/.rspec +4 -0
- data/CHANGELOG.md +11 -0
- data/LICENSE +21 -0
- data/README.md +23 -0
- data/Rakefile +8 -0
- data/lib/hoozuki/automaton/dfa.rb +82 -0
- data/lib/hoozuki/automaton/nfa.rb +147 -0
- data/lib/hoozuki/automaton/state_id.rb +30 -0
- data/lib/hoozuki/automaton.rb +5 -0
- data/lib/hoozuki/node/choice.rb +13 -0
- data/lib/hoozuki/node/concatenation.rb +13 -0
- data/lib/hoozuki/node/epsilon.rb +8 -0
- data/lib/hoozuki/node/literal.rb +13 -0
- data/lib/hoozuki/node/repetition.rb +26 -0
- data/lib/hoozuki/node.rb +7 -0
- data/lib/hoozuki/parser.rb +127 -0
- data/lib/hoozuki/version.rb +5 -0
- data/lib/hoozuki.rb +35 -0
- data/spec/hoozuki_spec.rb +211 -0
- data/spec/spec_helper.rb +15 -0
- metadata +66 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: c496655adac0eea264d8a058af5518711803be3441781a1e59c132b6ce67d4e7
|
|
4
|
+
data.tar.gz: '0481e2619ba549f46849b5471f60ee77388fdf79c7dd4c9decbb3f3f3a21093f'
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 1e9aae7d5afa4cc9cab9faf374528cbd2e334ec85305b75ac324b4cc6e4ecfe871d10c6a5478fc83e56dbe89605de0e377d4ad67ae9489464f128d661fad5709
|
|
7
|
+
data.tar.gz: e62195f96cdff93a141e1796a4e6b7322e57f50ab885b9e61c2815832e45b66137536b1d94782974e57d7ba4b174a5099f1c17b794b28041068292905a794171
|
data/.codespellignore
ADDED
|
File without changes
|
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Yudai Takada
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Hoozuki (鬼灯)
|
|
2
|
+
|
|
3
|
+
A hobby regex engine written in Ruby. Designed to be simple and efficient for educational purposes.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
gem install hoozuki
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
require 'hoozuki'
|
|
15
|
+
regex = Hoozuki::Regex.new('a(bc|de)f')
|
|
16
|
+
puts regex.match?('abcdef') # => true
|
|
17
|
+
puts regex.match?('adef') # => true
|
|
18
|
+
puts regex.match?('xyz') # => false
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## License
|
|
22
|
+
|
|
23
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class Hoozuki
|
|
4
|
+
module Automaton
|
|
5
|
+
class DFA
|
|
6
|
+
attr_reader :start, :accept, :transitions
|
|
7
|
+
|
|
8
|
+
def initialize(start, accept)
|
|
9
|
+
@start = start
|
|
10
|
+
@accept = accept
|
|
11
|
+
@transitions = Set.new
|
|
12
|
+
@cache = {}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
class << self
|
|
16
|
+
def from_nfa(nfa, use_cache)
|
|
17
|
+
dfa_states = {}
|
|
18
|
+
queue = []
|
|
19
|
+
nfa_accept_set = nfa.accept.to_set
|
|
20
|
+
|
|
21
|
+
start_set = Set.new([nfa.start])
|
|
22
|
+
start_states = nfa.epsilon_closure(start_set)
|
|
23
|
+
|
|
24
|
+
start_id = 0
|
|
25
|
+
dfa_states[start_states] = start_id
|
|
26
|
+
queue << start_states
|
|
27
|
+
|
|
28
|
+
dfa = new(start_id, Set.new)
|
|
29
|
+
|
|
30
|
+
while (current_nfa_states = queue.shift)
|
|
31
|
+
current_dfa_id = dfa_states[current_nfa_states]
|
|
32
|
+
dfa.accept.merge([current_dfa_id]) if current_nfa_states.any? { |state| nfa_accept_set.include?(state) }
|
|
33
|
+
|
|
34
|
+
transitions_map = Hash.new { |h, k| h[k] = Set.new }
|
|
35
|
+
|
|
36
|
+
current_nfa_states.each do |state|
|
|
37
|
+
nfa.transitions.each do |from, label, to|
|
|
38
|
+
transitions_map[label].merge(nfa.epsilon_closure(Set[to])) if from == state && !label.nil?
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
transitions_map.each do |char, next_nfa_states|
|
|
43
|
+
unless dfa_states.key?(next_nfa_states)
|
|
44
|
+
next_dfa_id = dfa_states.length
|
|
45
|
+
dfa_states[next_nfa_states] = next_dfa_id
|
|
46
|
+
queue.push(next_nfa_states)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
next_dfa_id = dfa_states[next_nfa_states]
|
|
50
|
+
dfa.transitions.add([current_dfa_id, char, next_dfa_id])
|
|
51
|
+
|
|
52
|
+
dfa.cache[[current_dfa_id, char]] = next_dfa_id if use_cache
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
dfa
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def next_transition(current, input, use_cache)
|
|
61
|
+
if use_cache && (next_state = @cache[[current, input]])
|
|
62
|
+
return next_state
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
@transitions.find { |from, label, _| from == current && label == input }&.last
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def match?(input, use_cache)
|
|
69
|
+
state = @start
|
|
70
|
+
|
|
71
|
+
input.each_char do |char|
|
|
72
|
+
next_state = next_transition(state, char, use_cache)
|
|
73
|
+
return false unless next_state
|
|
74
|
+
|
|
75
|
+
state = next_state
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
@accept.include?(state)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'sorted_set'
|
|
4
|
+
|
|
5
|
+
class Hoozuki
|
|
6
|
+
module Automaton
|
|
7
|
+
class NFA
|
|
8
|
+
attr_accessor :start, :accept, :transitions
|
|
9
|
+
|
|
10
|
+
def initialize(start, accept)
|
|
11
|
+
@start = start
|
|
12
|
+
@accept = accept
|
|
13
|
+
@transitions = Set.new
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class << self
|
|
17
|
+
def new_from_node(node, state)
|
|
18
|
+
raise ArgumentError, 'Node cannot be nil' if node.nil?
|
|
19
|
+
|
|
20
|
+
case node
|
|
21
|
+
when Node::Literal
|
|
22
|
+
start_state = state.new_state
|
|
23
|
+
accept_state = state.new_state
|
|
24
|
+
nfa = new(start_state, [accept_state])
|
|
25
|
+
nfa.add_transition(start_state, node.value, accept_state)
|
|
26
|
+
nfa
|
|
27
|
+
when Node::Epsilon
|
|
28
|
+
start_state = state.new_state
|
|
29
|
+
accept_state = state.new_state
|
|
30
|
+
nfa = new(start_state, [accept_state])
|
|
31
|
+
nfa.add_epsilon_transition(start_state, accept_state)
|
|
32
|
+
nfa
|
|
33
|
+
when Node::Repetition
|
|
34
|
+
if node.zero_or_more?
|
|
35
|
+
remain = new_from_node(node.child, state)
|
|
36
|
+
start_state = state.new_state
|
|
37
|
+
accepts = remain.accept.dup
|
|
38
|
+
accepts << start_state
|
|
39
|
+
|
|
40
|
+
nfa = new(start_state, accepts)
|
|
41
|
+
nfa.merge_nfa(remain)
|
|
42
|
+
nfa.add_epsilon_transition(start_state, remain.start)
|
|
43
|
+
|
|
44
|
+
remain.accept.each do |accept_state|
|
|
45
|
+
nfa.add_epsilon_transition(accept_state, remain.start)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
nfa
|
|
49
|
+
elsif node.one_or_more?
|
|
50
|
+
remain = new_from_node(node.child, state)
|
|
51
|
+
start_state = state.new_state
|
|
52
|
+
accept_state = state.new_state
|
|
53
|
+
nfa = new(start_state, [accept_state])
|
|
54
|
+
|
|
55
|
+
nfa.transitions.merge(remain.transitions)
|
|
56
|
+
nfa.add_epsilon_transition(start_state, remain.start)
|
|
57
|
+
remain.accept.each do |remain_accept|
|
|
58
|
+
nfa.add_epsilon_transition(remain_accept, remain.start)
|
|
59
|
+
nfa.add_epsilon_transition(remain_accept, accept_state)
|
|
60
|
+
end
|
|
61
|
+
nfa
|
|
62
|
+
elsif node.optional?
|
|
63
|
+
child = new_from_node(node.child, state)
|
|
64
|
+
start_state = state.new_state
|
|
65
|
+
accepts = child.accept.dup
|
|
66
|
+
accepts << start_state
|
|
67
|
+
|
|
68
|
+
nfa = new(start_state, accepts)
|
|
69
|
+
nfa.transitions.merge(child.transitions)
|
|
70
|
+
nfa.add_epsilon_transition(start_state, child.start)
|
|
71
|
+
nfa
|
|
72
|
+
end
|
|
73
|
+
when Node::Choice
|
|
74
|
+
remain1 = new_from_node(node.children[0], state)
|
|
75
|
+
remain2 = new_from_node(node.children[1], state)
|
|
76
|
+
start_state = state.new_state
|
|
77
|
+
accepts = remain1.accept if remain1.respond_to?(:accept)
|
|
78
|
+
accepts |= remain2.accept if remain2.respond_to?(:accept)
|
|
79
|
+
nfa = new(start_state, accepts)
|
|
80
|
+
nfa.merge_nfa(remain1)
|
|
81
|
+
nfa.merge_nfa(remain2)
|
|
82
|
+
nfa.add_epsilon_transition(start_state, remain1.start)
|
|
83
|
+
nfa.add_epsilon_transition(start_state, remain2.start)
|
|
84
|
+
nfa
|
|
85
|
+
when Node::Concatenation
|
|
86
|
+
nfas = node.children.map { |child| new_from_node(child, state) }
|
|
87
|
+
nfa = nfas.first
|
|
88
|
+
nfas.drop(1).each do |next_nfa|
|
|
89
|
+
nfa.transitions.merge(next_nfa.transitions)
|
|
90
|
+
nfa.accept.each do |accept_state|
|
|
91
|
+
nfa.add_epsilon_transition(accept_state, next_nfa.start)
|
|
92
|
+
end
|
|
93
|
+
nfa.accept = next_nfa.accept
|
|
94
|
+
end
|
|
95
|
+
nfa
|
|
96
|
+
else
|
|
97
|
+
raise ArgumentError, "Unsupported node type: #{node.class}"
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def epsilon_closure_with_bitset(start)
|
|
103
|
+
visited = Set.new
|
|
104
|
+
to_visit = []
|
|
105
|
+
|
|
106
|
+
start.each do |state|
|
|
107
|
+
to_visit << state unless visited.include?(state)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
until to_visit.empty?
|
|
111
|
+
state = to_visit.shift
|
|
112
|
+
|
|
113
|
+
next if visited.include?(state)
|
|
114
|
+
|
|
115
|
+
visited << state
|
|
116
|
+
|
|
117
|
+
transitions.each do |from, label, to|
|
|
118
|
+
to_visit << to if from == state && label.nil? && !visited.include?(to)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
visited
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def epsilon_closure(start)
|
|
126
|
+
bit_result = epsilon_closure_with_bitset(start.to_set)
|
|
127
|
+
::SortedSet.new(bit_result)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def add_epsilon_transition(from, to)
|
|
131
|
+
@transitions << [from, nil, to]
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def add_transition(from, char, to)
|
|
135
|
+
@transitions << [from, char, to]
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def merge_nfa(other)
|
|
139
|
+
@transitions.merge(other.transitions)
|
|
140
|
+
add_epsilon_transition(@start, other.start)
|
|
141
|
+
other.accept.each do |accept_state|
|
|
142
|
+
@accept << accept_state
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class Hoozuki
|
|
4
|
+
module Automaton
|
|
5
|
+
class StateID
|
|
6
|
+
attr_reader :id
|
|
7
|
+
|
|
8
|
+
def initialize(id)
|
|
9
|
+
@id = id
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
class << self
|
|
13
|
+
def new_state
|
|
14
|
+
@id += 1
|
|
15
|
+
StateID.new(@id)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def new_state
|
|
20
|
+
@id += 1
|
|
21
|
+
StateID.new(@id)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def <=>(other)
|
|
25
|
+
return nil unless other.is_a?(StateID)
|
|
26
|
+
@id <=> other.id
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class Hoozuki
|
|
4
|
+
module Node
|
|
5
|
+
class Repetition
|
|
6
|
+
attr_reader :child
|
|
7
|
+
|
|
8
|
+
def initialize(child, quantifier)
|
|
9
|
+
@child = child
|
|
10
|
+
@quantifier = quantifier
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def zero_or_more?
|
|
14
|
+
@quantifier == :zero_or_more
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def one_or_more?
|
|
18
|
+
@quantifier == :one_or_more
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def optional?
|
|
22
|
+
@quantifier == :optional
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
data/lib/hoozuki/node.rb
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class Hoozuki
|
|
4
|
+
class Parser
|
|
5
|
+
def initialize(pattern)
|
|
6
|
+
@pattern = pattern
|
|
7
|
+
@offset = 0
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def parse(pattern)
|
|
12
|
+
new(pattern).parse
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def parse
|
|
17
|
+
ast = parse_choice
|
|
18
|
+
|
|
19
|
+
raise 'Unexpected end of pattern' unless eol?
|
|
20
|
+
|
|
21
|
+
ast
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def current
|
|
27
|
+
@pattern[@offset]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def eol?
|
|
31
|
+
@pattern.size <= @offset
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def next_char
|
|
35
|
+
@offset += 1
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def parse_choice
|
|
39
|
+
children = []
|
|
40
|
+
children << parse_concatenation
|
|
41
|
+
|
|
42
|
+
while current == '|'
|
|
43
|
+
next_char
|
|
44
|
+
children << parse_concatenation
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
return children.first if children.size == 1
|
|
48
|
+
return Hoozuki::Node::Epsilon.new if children.empty?
|
|
49
|
+
|
|
50
|
+
Hoozuki::Node::Choice.new(children)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def parse_concatenation
|
|
54
|
+
children = []
|
|
55
|
+
|
|
56
|
+
children << parse_repetition until stop_parsing_concatenation?
|
|
57
|
+
|
|
58
|
+
return children.first if children.size == 1
|
|
59
|
+
return Hoozuki::Node::Epsilon.new if children.empty?
|
|
60
|
+
|
|
61
|
+
Hoozuki::Node::Concatenation.new(children)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def stop_parsing_concatenation?
|
|
65
|
+
eol? || current == '|' || current == ')'
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def parse_repetition
|
|
69
|
+
child = parse_group
|
|
70
|
+
|
|
71
|
+
quantifier = nil
|
|
72
|
+
case current
|
|
73
|
+
when '*'
|
|
74
|
+
quantifier = :zero_or_more
|
|
75
|
+
when '+'
|
|
76
|
+
quantifier = :one_or_more
|
|
77
|
+
when '?'
|
|
78
|
+
quantifier = :optional
|
|
79
|
+
end
|
|
80
|
+
return child if quantifier.nil?
|
|
81
|
+
|
|
82
|
+
next_char
|
|
83
|
+
|
|
84
|
+
Hoozuki::Node::Repetition.new(child, quantifier)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def parse_group
|
|
88
|
+
return parse_literal if current != '('
|
|
89
|
+
|
|
90
|
+
next_char
|
|
91
|
+
child = parse_choice
|
|
92
|
+
raise 'Expected closing parenthesis' unless current == ')'
|
|
93
|
+
|
|
94
|
+
next_char
|
|
95
|
+
child
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def parse_literal
|
|
99
|
+
raise 'Unexpected end of pattern' if eol?
|
|
100
|
+
|
|
101
|
+
if current == '\\'
|
|
102
|
+
next_char
|
|
103
|
+
raise 'Unexpected end of pattern' if eol?
|
|
104
|
+
|
|
105
|
+
value = current
|
|
106
|
+
case value
|
|
107
|
+
when '(', ')', '|', '*', '+', '?', '\\'
|
|
108
|
+
next_char
|
|
109
|
+
else
|
|
110
|
+
raise "Invalid escape sequence: \\#{value}"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
return Hoozuki::Node::Literal.new(value)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
value = current
|
|
117
|
+
case value
|
|
118
|
+
when '(', ')', '|', '*', '+', '?', '\\'
|
|
119
|
+
raise "Unexpected character: #{value}"
|
|
120
|
+
else
|
|
121
|
+
next_char
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
Hoozuki::Node::Literal.new(value)
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
data/lib/hoozuki.rb
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'hoozuki/automaton'
|
|
4
|
+
require_relative 'hoozuki/node'
|
|
5
|
+
require_relative 'hoozuki/parser'
|
|
6
|
+
require_relative 'hoozuki/version'
|
|
7
|
+
|
|
8
|
+
class Hoozuki
|
|
9
|
+
def initialize(input, method: :dfa)
|
|
10
|
+
@input = input
|
|
11
|
+
@method = method
|
|
12
|
+
|
|
13
|
+
ast = Hoozuki::Parser.new(input).parse
|
|
14
|
+
case method
|
|
15
|
+
when :dfa
|
|
16
|
+
nfa = Automaton::NFA.new_from_node(ast, Automaton::StateID.new(0))
|
|
17
|
+
@dfa = Automaton::DFA.from_nfa(nfa, use_cache?(input))
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def match?(input)
|
|
22
|
+
case @method
|
|
23
|
+
when :dfa
|
|
24
|
+
@dfa.match?(input, use_cache?(input))
|
|
25
|
+
else
|
|
26
|
+
raise ArgumentError, "Unknown method: #{@method}"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def use_cache?(input)
|
|
33
|
+
input.length > 1000
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Hoozuki do
|
|
4
|
+
describe '#match?' do
|
|
5
|
+
subject { described_class.new(pattern).match?(value) }
|
|
6
|
+
|
|
7
|
+
context 'with basic concatenation' do
|
|
8
|
+
let(:pattern) { 'abc' }
|
|
9
|
+
|
|
10
|
+
context 'when text is "abc"' do
|
|
11
|
+
let(:value) { 'abc' }
|
|
12
|
+
it { is_expected.to be true }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
context 'when text is "ab"' do
|
|
16
|
+
let(:value) { 'ab' }
|
|
17
|
+
it { is_expected.to be false }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
context 'when text is "abcd"' do
|
|
21
|
+
let(:value) { 'abcd' }
|
|
22
|
+
it { is_expected.to be false }
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
context 'with alternation "|"' do
|
|
27
|
+
let(:pattern) { 'a|b' }
|
|
28
|
+
|
|
29
|
+
context 'when text is "a"' do
|
|
30
|
+
let(:value) { 'a' }
|
|
31
|
+
it { is_expected.to be true }
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
context 'when text is "b"' do
|
|
35
|
+
let(:value) { 'b' }
|
|
36
|
+
it { is_expected.to be true }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
context 'when text is "ab"' do
|
|
40
|
+
let(:value) { 'ab' }
|
|
41
|
+
it { is_expected.to be false }
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
context 'with quantifiers "*", "+", "?"' do
|
|
46
|
+
context 'with pattern "b*"' do
|
|
47
|
+
let(:pattern) { 'b*' }
|
|
48
|
+
|
|
49
|
+
context 'when text is "" (empty)' do
|
|
50
|
+
let(:value) { '' }
|
|
51
|
+
it { is_expected.to be true }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
context 'when text is "b"' do
|
|
55
|
+
let(:value) { 'b' }
|
|
56
|
+
it { is_expected.to be true }
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
context 'when text is "bbb"' do
|
|
60
|
+
let(:value) { 'bbb' }
|
|
61
|
+
it { is_expected.to be true }
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
context 'when text is "c"' do
|
|
65
|
+
let(:value) { 'c' }
|
|
66
|
+
it { is_expected.to be false }
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
context 'with pattern "a+"' do
|
|
71
|
+
let(:pattern) { 'a+' }
|
|
72
|
+
|
|
73
|
+
context 'when text is "a"' do
|
|
74
|
+
let(:value) { 'a' }
|
|
75
|
+
it { is_expected.to be true }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
context 'when text is "aaa"' do
|
|
79
|
+
let(:value) { 'aaa' }
|
|
80
|
+
it { is_expected.to be true }
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
context 'when text is "" (empty)' do
|
|
84
|
+
let(:value) { '' }
|
|
85
|
+
it { is_expected.to be false }
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
context 'with pattern "c?"' do
|
|
90
|
+
let(:pattern) { 'c?' }
|
|
91
|
+
|
|
92
|
+
context 'when text is "" (empty)' do
|
|
93
|
+
let(:value) { '' }
|
|
94
|
+
it { is_expected.to be true }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
context 'when text is "c"' do
|
|
98
|
+
let(:value) { 'c' }
|
|
99
|
+
it { is_expected.to be true }
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
context 'when text is "cc"' do
|
|
103
|
+
let(:value) { 'cc' }
|
|
104
|
+
it { is_expected.to be false }
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
context 'with grouping "()"' do
|
|
110
|
+
let(:pattern) { 'ab(cd|)' }
|
|
111
|
+
|
|
112
|
+
context 'when text is "abcd"' do
|
|
113
|
+
let(:value) { 'abcd' }
|
|
114
|
+
it { is_expected.to be true }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
context 'when text is "ab"' do
|
|
118
|
+
let(:value) { 'ab' }
|
|
119
|
+
it { is_expected.to be true }
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
context 'when text is "abc"' do
|
|
123
|
+
let(:value) { 'abc' }
|
|
124
|
+
it { is_expected.to be false }
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
context 'with escape sequences "\\"' do
|
|
129
|
+
context 'with pattern "a\\|b\\*"' do
|
|
130
|
+
let(:pattern) { 'a\\|b\\*' }
|
|
131
|
+
|
|
132
|
+
context 'when text is "a|b*"' do
|
|
133
|
+
let(:value) { 'a|b*' }
|
|
134
|
+
it { is_expected.to be true }
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
context 'when text is "ab"' do
|
|
138
|
+
let(:value) { 'ab' }
|
|
139
|
+
it { is_expected.to be false }
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
context 'with pattern "\\(a\\+\\)"' do
|
|
144
|
+
let(:pattern) { '\\(a\\+\\)' }
|
|
145
|
+
|
|
146
|
+
context 'when text is "(a+)"' do
|
|
147
|
+
let(:value) { '(a+)' }
|
|
148
|
+
it { is_expected.to be true }
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
context 'when text is "a"' do
|
|
152
|
+
let(:value) { 'a' }
|
|
153
|
+
it { is_expected.to be false }
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
context 'with a combination of features' do
|
|
159
|
+
let(:pattern) { 'a|b*c(de)?' }
|
|
160
|
+
|
|
161
|
+
context 'when text is "a"' do
|
|
162
|
+
let(:value) { 'a' }
|
|
163
|
+
it { is_expected.to be true }
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
context 'when text is "bc"' do
|
|
167
|
+
let(:value) { 'bc' }
|
|
168
|
+
it { is_expected.to be true }
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
context 'when text is "cde"' do
|
|
172
|
+
let(:value) { 'cde' }
|
|
173
|
+
it { is_expected.to be true }
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
context 'when text is "bbbcde"' do
|
|
177
|
+
let(:value) { 'bbbcde' }
|
|
178
|
+
it { is_expected.to be true }
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
context 'when text is "bd"' do
|
|
182
|
+
let(:value) { 'bd' }
|
|
183
|
+
it { is_expected.to be false }
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
context 'with multi-byte characters' do
|
|
188
|
+
let(:pattern) { '(こん|おつ)*やっぴー' }
|
|
189
|
+
|
|
190
|
+
context 'when text is "こんやっぴー"' do
|
|
191
|
+
let(:value) { 'こんやっぴー' }
|
|
192
|
+
it { is_expected.to be true }
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
context 'when text is "おつやっぴー"' do
|
|
196
|
+
let(:value) { 'おつやっぴー' }
|
|
197
|
+
it { is_expected.to be true }
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
context 'when text is "こんおつやっぴー"' do
|
|
201
|
+
let(:value) { 'こんおつやっぴー' }
|
|
202
|
+
it { is_expected.to be true }
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
context 'when text is "こんこんきーつね"' do
|
|
206
|
+
let(:value) { 'こんこんきーつね' }
|
|
207
|
+
it { is_expected.to be false }
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../lib/hoozuki'
|
|
4
|
+
|
|
5
|
+
RSpec.configure do |config|
|
|
6
|
+
config.expect_with :rspec do |expectations|
|
|
7
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
config.mock_with :rspec do |mocks|
|
|
11
|
+
mocks.verify_partial_doubles = true
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
config.shared_context_metadata_behavior = :apply_to_host_groups
|
|
15
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: hoozuki
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Yudai Takada
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies: []
|
|
12
|
+
description: Hoozuki is a hobby regex engine written in Ruby, designed to be simple
|
|
13
|
+
and efficient for educational purposes.
|
|
14
|
+
email:
|
|
15
|
+
- t.yudai92@gmail.com
|
|
16
|
+
executables: []
|
|
17
|
+
extensions: []
|
|
18
|
+
extra_rdoc_files: []
|
|
19
|
+
files:
|
|
20
|
+
- ".codespellignore"
|
|
21
|
+
- ".rspec"
|
|
22
|
+
- CHANGELOG.md
|
|
23
|
+
- LICENSE
|
|
24
|
+
- README.md
|
|
25
|
+
- Rakefile
|
|
26
|
+
- lib/hoozuki.rb
|
|
27
|
+
- lib/hoozuki/automaton.rb
|
|
28
|
+
- lib/hoozuki/automaton/dfa.rb
|
|
29
|
+
- lib/hoozuki/automaton/nfa.rb
|
|
30
|
+
- lib/hoozuki/automaton/state_id.rb
|
|
31
|
+
- lib/hoozuki/node.rb
|
|
32
|
+
- lib/hoozuki/node/choice.rb
|
|
33
|
+
- lib/hoozuki/node/concatenation.rb
|
|
34
|
+
- lib/hoozuki/node/epsilon.rb
|
|
35
|
+
- lib/hoozuki/node/literal.rb
|
|
36
|
+
- lib/hoozuki/node/repetition.rb
|
|
37
|
+
- lib/hoozuki/parser.rb
|
|
38
|
+
- lib/hoozuki/version.rb
|
|
39
|
+
- spec/hoozuki_spec.rb
|
|
40
|
+
- spec/spec_helper.rb
|
|
41
|
+
homepage: https://github.com/ydah/hoozuki
|
|
42
|
+
licenses:
|
|
43
|
+
- MIT
|
|
44
|
+
metadata:
|
|
45
|
+
homepage_uri: https://github.com/ydah/hoozuki
|
|
46
|
+
source_code_uri: https://github.com/ydah/hoozuki
|
|
47
|
+
changelog_uri: https://github.com/ydah/hoozuki/blob/master/CHANGELOG.md
|
|
48
|
+
rubygems_mfa_required: 'true'
|
|
49
|
+
rdoc_options: []
|
|
50
|
+
require_paths:
|
|
51
|
+
- lib
|
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
53
|
+
requirements:
|
|
54
|
+
- - ">="
|
|
55
|
+
- !ruby/object:Gem::Version
|
|
56
|
+
version: '3.2'
|
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
requirements: []
|
|
63
|
+
rubygems_version: 3.8.0.dev
|
|
64
|
+
specification_version: 4
|
|
65
|
+
summary: A hobby regex engine written in Ruby.
|
|
66
|
+
test_files: []
|