hoozuki 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ff80e01946d63faf2012a01c6f7d2c04f0330b47aa63ba2a0f540c9e6cf290fc
4
- data.tar.gz: 515b27f972f50c5f4c35cf6d33422637f93cdedc7db54ffdbd7a3f363a10c18f
3
+ metadata.gz: f44a0df16a02460880d94f715f9b4c84234beedafcb2a9bf177669241960317a
4
+ data.tar.gz: 4bbe7704cdfe994f9d58e83c88416b179d531c3763374e37f1711367ee0127f2
5
5
  SHA512:
6
- metadata.gz: d26025dabd381db4ec2dc42ee4baa626e5389fd4ce7ade66f6e5a6d42879c929e2529e65413c919b8119e0127027986243a3be8a5bc0c702cbc52c57c276fee3
7
- data.tar.gz: edff1a212c89c9f322517649ef6b8daf27bbaa41682e312313b064e97002d3a0c395e2bc3c9a82c657ea8e38670e91f31616923f5d026776a5d81bac0892a3ec
6
+ metadata.gz: af0244c531d265c5a71b3655263d49023bb8bfaf9ca8bf267db3a2f0d143948f833cb5a540589d5eb33dd7b477a3601a38dbe7b917abba0bc9a874a38df820df
7
+ data.tar.gz: 66191863f3faec1f662364bba0cf4e0b839e0feadc3a45fbca3ad45c5ed30ae882e63760ef7df401e0597f5610e35babb3c901c532e1cfa3566e06857d5e6a4a
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  A hobby regex engine written in Ruby. Designed to be simple and efficient for educational purposes.
4
4
  Currently supports 2 engines:
5
- - NFA Based Engine
5
+ - DFA Based Engine
6
6
  - VM Based Engine
7
7
 
8
8
  ## Installation
data/Rakefile CHANGED
@@ -1,8 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'bundler/gem_tasks'
4
- require 'rspec/core/rake_task'
5
4
 
6
- RSpec::Core::RakeTask.new(:spec)
5
+ namespace 'build' do
6
+ desc 'build parser from parser.y'
7
+ task :parser do
8
+ sh 'bundle exec racc lib/hoozuki/parser.y --embedded --frozen -o lib/hoozuki/parser.rb -t --log-file=parser.output'
9
+ end
10
+ end
11
+
12
+ require 'rspec/core/rake_task'
13
+ RSpec::Core::RakeTask.new(:spec) do |spec|
14
+ spec.pattern = FileList['spec/**/*_spec.rb']
15
+ end
16
+ task :spec => "build:parser"
7
17
 
8
18
  task default: :spec
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Hoozuki
4
+ module Automaton
5
+ class DFA
6
+ class Builder
7
+ def initialize(nfa, use_cache)
8
+ @nfa = nfa
9
+ @use_cache = use_cache
10
+ @dfa_states = {}
11
+ @queue = []
12
+ @nfa_accept_set = nfa.accept.to_set
13
+ end
14
+
15
+ def call
16
+ initialize_dfa
17
+ process_states
18
+ @dfa
19
+ end
20
+
21
+ private
22
+
23
+ def initialize_dfa
24
+ start_states = @nfa.epsilon_closure(Set.new([@nfa.start]))
25
+ start_id = 0
26
+ @dfa_states[start_states] = start_id
27
+ @queue << start_states
28
+ @dfa = DFA.new(start_id, Set.new)
29
+ end
30
+
31
+ def process_states
32
+ while (current_nfa_states = @queue.shift)
33
+ current_dfa_id = @dfa_states[current_nfa_states]
34
+ mark_accept(current_nfa_states, current_dfa_id)
35
+ transitions_map = build_transitions(current_nfa_states)
36
+ process_transitions(transitions_map, current_dfa_id)
37
+ end
38
+ end
39
+
40
+ def mark_accept(nfa_states, dfa_id)
41
+ return unless nfa_states.any? { |state| @nfa_accept_set.include?(state) }
42
+
43
+ @dfa.accept.merge([dfa_id])
44
+ end
45
+
46
+ def build_transitions(nfa_states)
47
+ transitions_map = Hash.new { |h, k| h[k] = Set.new }
48
+
49
+ nfa_states.each do |state|
50
+ @nfa.transitions.each do |from, label, to|
51
+ next unless from == state && !label.nil?
52
+
53
+ transitions_map[label].merge(@nfa.epsilon_closure(Set[to]))
54
+ end
55
+ end
56
+
57
+ transitions_map
58
+ end
59
+
60
+ def process_transitions(transitions_map, current_dfa_id)
61
+ transitions_map.each do |char, next_nfa_states|
62
+ next_dfa_id = ensure_state(next_nfa_states)
63
+ @dfa.transitions.add([current_dfa_id, char, next_dfa_id])
64
+ @dfa.cache[[current_dfa_id, char]] = next_dfa_id if @use_cache
65
+ end
66
+ end
67
+
68
+ def ensure_state(nfa_states)
69
+ return @dfa_states[nfa_states] if @dfa_states.key?(nfa_states)
70
+
71
+ new_id = @dfa_states.length
72
+ @dfa_states[nfa_states] = new_id
73
+ @queue.push(nfa_states)
74
+ new_id
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ require_relative 'dfa/builder'
4
+
5
+ module Hoozuki
4
6
  module Automaton
5
7
  class DFA
6
8
  attr_reader :start, :accept, :transitions
@@ -14,46 +16,7 @@ class Hoozuki
14
16
 
15
17
  class << self
16
18
  def from_nfa(nfa, use_cache)
17
- dfa_states = {}
18
- queue = []
19
- nfa_accept_set = nfa.accept.to_set
20
-
21
- start_set = Set.new([nfa.start])
22
- start_states = nfa.epsilon_closure(start_set)
23
-
24
- start_id = 0
25
- dfa_states[start_states] = start_id
26
- queue << start_states
27
-
28
- dfa = new(start_id, Set.new)
29
-
30
- while (current_nfa_states = queue.shift)
31
- current_dfa_id = dfa_states[current_nfa_states]
32
- dfa.accept.merge([current_dfa_id]) if current_nfa_states.any? { |state| nfa_accept_set.include?(state) }
33
-
34
- transitions_map = Hash.new { |h, k| h[k] = Set.new }
35
-
36
- current_nfa_states.each do |state|
37
- nfa.transitions.each do |from, label, to|
38
- transitions_map[label].merge(nfa.epsilon_closure(Set[to])) if from == state && !label.nil?
39
- end
40
- end
41
-
42
- transitions_map.each do |char, next_nfa_states|
43
- unless dfa_states.key?(next_nfa_states)
44
- next_dfa_id = dfa_states.length
45
- dfa_states[next_nfa_states] = next_dfa_id
46
- queue.push(next_nfa_states)
47
- end
48
-
49
- next_dfa_id = dfa_states[next_nfa_states]
50
- dfa.transitions.add([current_dfa_id, char, next_dfa_id])
51
-
52
- dfa.cache[[current_dfa_id, char]] = next_dfa_id if use_cache
53
- end
54
- end
55
-
56
- dfa
19
+ Builder.new(nfa, use_cache).call
57
20
  end
58
21
  end
59
22
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'sorted_set'
4
4
 
5
- class Hoozuki
5
+ module Hoozuki
6
6
  module Automaton
7
7
  class NFA
8
8
  attr_accessor :start, :accept, :transitions
@@ -14,132 +14,53 @@ class Hoozuki
14
14
  end
15
15
 
16
16
  class << self
17
- def new_from_node(node, state)
17
+ def from_node(node, state)
18
18
  raise ArgumentError, 'Node cannot be nil' if node.nil?
19
19
 
20
- case node
21
- when Node::Literal
22
- start_state = state.new_state
23
- accept_state = state.new_state
24
- nfa = new(start_state, [accept_state])
25
- nfa.add_transition(start_state, node.value, accept_state)
26
- nfa
27
- when Node::Epsilon
28
- start_state = state.new_state
29
- accept_state = state.new_state
30
- nfa = new(start_state, [accept_state])
31
- nfa.add_epsilon_transition(start_state, accept_state)
32
- nfa
33
- when Node::Repetition
34
- if node.zero_or_more?
35
- remain = new_from_node(node.child, state)
36
- start_state = state.new_state
37
- accepts = remain.accept.dup
38
- accepts << start_state
39
-
40
- nfa = new(start_state, accepts)
41
- nfa.merge_nfa(remain)
42
- nfa.add_epsilon_transition(start_state, remain.start)
43
-
44
- remain.accept.each do |accept_state|
45
- nfa.add_epsilon_transition(accept_state, remain.start)
46
- end
47
-
48
- nfa
49
- elsif node.one_or_more?
50
- remain = new_from_node(node.child, state)
51
- start_state = state.new_state
52
- accept_state = state.new_state
53
- nfa = new(start_state, [accept_state])
54
-
55
- nfa.transitions.merge(remain.transitions)
56
- nfa.add_epsilon_transition(start_state, remain.start)
57
- remain.accept.each do |remain_accept|
58
- nfa.add_epsilon_transition(remain_accept, remain.start)
59
- nfa.add_epsilon_transition(remain_accept, accept_state)
60
- end
61
- nfa
62
- elsif node.optional?
63
- child = new_from_node(node.child, state)
64
- start_state = state.new_state
65
- accepts = child.accept.dup
66
- accepts << start_state
67
-
68
- nfa = new(start_state, accepts)
69
- nfa.transitions.merge(child.transitions)
70
- nfa.add_epsilon_transition(start_state, child.start)
71
- nfa
72
- end
73
- when Node::Choice
74
- remain1 = new_from_node(node.children[0], state)
75
- remain2 = new_from_node(node.children[1], state)
76
- start_state = state.new_state
77
- accepts = remain1.accept if remain1.respond_to?(:accept)
78
- accepts |= remain2.accept if remain2.respond_to?(:accept)
79
- nfa = new(start_state, accepts)
80
- nfa.merge_nfa(remain1)
81
- nfa.merge_nfa(remain2)
82
- nfa.add_epsilon_transition(start_state, remain1.start)
83
- nfa.add_epsilon_transition(start_state, remain2.start)
84
- nfa
85
- when Node::Concatenation
86
- nfas = node.children.map { |child| new_from_node(child, state) }
87
- nfa = nfas.first
88
- nfas.drop(1).each do |next_nfa|
89
- nfa.transitions.merge(next_nfa.transitions)
90
- nfa.accept.each do |accept_state|
91
- nfa.add_epsilon_transition(accept_state, next_nfa.start)
92
- end
93
- nfa.accept = next_nfa.accept
94
- end
95
- nfa
96
- else
97
- raise ArgumentError, "Unsupported node type: #{node.class}"
98
- end
20
+ node.to_nfa(state)
99
21
  end
100
22
  end
101
23
 
102
- def epsilon_closure_with_bitset(start)
103
- visited = Set.new
104
- to_visit = []
24
+ def epsilon_closure(start)
25
+ closure = compute_closure(start.to_set)
26
+ ::SortedSet.new(closure)
27
+ end
105
28
 
106
- start.each do |state|
107
- to_visit << state unless visited.include?(state)
108
- end
29
+ def merge_transitions(other)
30
+ @transitions.merge(other.transitions)
31
+ end
32
+
33
+ def add_epsilon_transition(from, to)
34
+ @transitions << [from, nil, to]
35
+ end
36
+
37
+ def add_transition(from, char, to)
38
+ @transitions << [from, char, to]
39
+ end
40
+
41
+ private
42
+
43
+ def compute_closure(start_states)
44
+ visited = Set.new
45
+ to_visit = start_states.to_a
109
46
 
110
47
  until to_visit.empty?
111
48
  state = to_visit.shift
112
-
113
49
  next if visited.include?(state)
114
50
 
115
51
  visited << state
116
52
 
117
- transitions.each do |from, label, to|
118
- to_visit << to if from == state && label.nil? && !visited.include?(to)
53
+ epsilon_from(state).each do |target_state|
54
+ to_visit << target_state unless visited.include?(target_state)
119
55
  end
120
56
  end
121
57
 
122
58
  visited
123
59
  end
124
60
 
125
- def epsilon_closure(start)
126
- bit_result = epsilon_closure_with_bitset(start.to_set)
127
- ::SortedSet.new(bit_result)
128
- end
129
-
130
- def add_epsilon_transition(from, to)
131
- @transitions << [from, nil, to]
132
- end
133
-
134
- def add_transition(from, char, to)
135
- @transitions << [from, char, to]
136
- end
137
-
138
- def merge_nfa(other)
139
- @transitions.merge(other.transitions)
140
- add_epsilon_transition(@start, other.start)
141
- other.accept.each do |accept_state|
142
- @accept << accept_state
61
+ def epsilon_from(state)
62
+ transitions.each_with_object([]) do |(from, label, to), result|
63
+ result << to if from == state && label.nil?
143
64
  end
144
65
  end
145
66
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Automaton
5
5
  class StateID
6
6
  attr_reader :id
@@ -23,6 +23,7 @@ class Hoozuki
23
23
 
24
24
  def <=>(other)
25
25
  return nil unless other.is_a?(StateID)
26
+
26
27
  @id <=> other.id
27
28
  end
28
29
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Instruction
5
5
  class Char
6
6
  attr_accessor :char
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Instruction
5
5
  class Jmp
6
6
  attr_accessor :target
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Instruction
5
5
  class Match
6
6
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Instruction
5
5
  class Split
6
6
  attr_accessor :left, :right
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Node
5
5
  class Choice
6
6
  attr_reader :children
@@ -8,6 +8,18 @@ class Hoozuki
8
8
  def initialize(children)
9
9
  @children = children
10
10
  end
11
+
12
+ def to_nfa(state)
13
+ child_nfas = @children.map { |child| child.to_nfa(state) }
14
+ start_state = state.new_state
15
+ accepts = child_nfas.flat_map(&:accept).to_set
16
+ nfa = Automaton::NFA.new(start_state, accepts)
17
+ child_nfas.each do |child_nfa|
18
+ nfa.merge_transitions(child_nfa)
19
+ nfa.add_epsilon_transition(start_state, child_nfa.start)
20
+ end
21
+ nfa
22
+ end
11
23
  end
12
24
  end
13
25
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Node
5
5
  class Concatenation
6
6
  attr_reader :children
@@ -8,6 +8,21 @@ class Hoozuki
8
8
  def initialize(children)
9
9
  @children = children
10
10
  end
11
+
12
+ def to_nfa(state)
13
+ nfas = @children.map { |child| child.to_nfa(state) }
14
+ nfa = nfas.first
15
+
16
+ nfas.drop(1).each do |next_nfa|
17
+ nfa.merge_transitions(next_nfa)
18
+ nfa.accept.each do |accept_state|
19
+ nfa.add_epsilon_transition(accept_state, next_nfa.start)
20
+ end
21
+ nfa.accept = next_nfa.accept
22
+ end
23
+
24
+ nfa
25
+ end
11
26
  end
12
27
  end
13
28
  end
@@ -1,8 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Node
5
5
  class Epsilon
6
+ def to_nfa(state)
7
+ start_state = state.new_state
8
+ accept_state = state.new_state
9
+ nfa = Automaton::NFA.new(start_state, [accept_state])
10
+ nfa.add_epsilon_transition(start_state, accept_state)
11
+ nfa
12
+ end
6
13
  end
7
14
  end
8
15
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Node
5
5
  class Literal
6
6
  attr_reader :value
@@ -8,6 +8,14 @@ class Hoozuki
8
8
  def initialize(value)
9
9
  @value = value
10
10
  end
11
+
12
+ def to_nfa(state)
13
+ start_state = state.new_state
14
+ accept_state = state.new_state
15
+ nfa = Automaton::NFA.new(start_state, [accept_state])
16
+ nfa.add_transition(start_state, @value, accept_state)
17
+ nfa
18
+ end
11
19
  end
12
20
  end
13
21
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class Hoozuki
3
+ module Hoozuki
4
4
  module Node
5
5
  class Repetition
6
6
  attr_reader :child
@@ -21,6 +21,60 @@ class Hoozuki
21
21
  def optional?
22
22
  @quantifier == :optional
23
23
  end
24
+
25
+ def to_nfa(state)
26
+ if zero_or_more?
27
+ to_nfa_zero_or_more(state)
28
+ elsif one_or_more?
29
+ to_nfa_one_or_more(state)
30
+ elsif optional?
31
+ to_nfa_optional(state)
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def to_nfa_zero_or_more(state)
38
+ remain = @child.to_nfa(state)
39
+ start_state = state.new_state
40
+ accepts = remain.accept.dup << start_state
41
+
42
+ nfa = Automaton::NFA.new(start_state, accepts)
43
+ nfa.merge_transitions(remain)
44
+ nfa.add_epsilon_transition(start_state, remain.start)
45
+
46
+ remain.accept.each do |accept_state|
47
+ nfa.add_epsilon_transition(accept_state, remain.start)
48
+ end
49
+
50
+ nfa
51
+ end
52
+
53
+ def to_nfa_one_or_more(state)
54
+ remain = @child.to_nfa(state)
55
+ start_state = state.new_state
56
+ accept_state = state.new_state
57
+ nfa = Automaton::NFA.new(start_state, [accept_state])
58
+
59
+ nfa.transitions.merge(remain.transitions)
60
+ nfa.add_epsilon_transition(start_state, remain.start)
61
+ remain.accept.each do |remain_accept|
62
+ nfa.add_epsilon_transition(remain_accept, remain.start)
63
+ nfa.add_epsilon_transition(remain_accept, accept_state)
64
+ end
65
+ nfa
66
+ end
67
+
68
+ def to_nfa_optional(state)
69
+ child_nfa = @child.to_nfa(state)
70
+ start_state = state.new_state
71
+ accepts = child_nfa.accept.dup << start_state
72
+
73
+ nfa = Automaton::NFA.new(start_state, accepts)
74
+ nfa.merge_transitions(child_nfa)
75
+ nfa.add_epsilon_transition(start_state, child_nfa.start)
76
+ nfa
77
+ end
24
78
  end
25
79
  end
26
80
  end