RubyGems - hoozuki - Versions diffs - 0.2.0 → 1.0.0 - Mend

hoozuki 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/Rakefile +12 -2
data/lib/hoozuki/automaton/dfa/builder.rb +79 -0
data/lib/hoozuki/automaton/dfa.rb +4 -41
data/lib/hoozuki/automaton/nfa.rb +29 -108
data/lib/hoozuki/automaton/state_id.rb +2 -1
data/lib/hoozuki/instruction/char.rb +1 -1
data/lib/hoozuki/instruction/jmp.rb +1 -1
data/lib/hoozuki/instruction/match.rb +1 -1
data/lib/hoozuki/instruction/split.rb +1 -1
data/lib/hoozuki/node/choice.rb +13 -1
data/lib/hoozuki/node/concatenation.rb +16 -1
data/lib/hoozuki/node/epsilon.rb +8 -1
data/lib/hoozuki/node/literal.rb +9 -1
data/lib/hoozuki/node/repetition.rb +55 -1
data/lib/hoozuki/parser.rb +888 -76
data/lib/hoozuki/parser.y +128 -0
data/lib/hoozuki/version.rb +2 -2
data/lib/hoozuki/vm/compiler.rb +92 -47
data/lib/hoozuki/vm/evaluator.rb +6 -6
data/lib/hoozuki.rb +15 -16
data/spec/hoozuki/automaton/dfa/builder_spec.rb +79 -0
data/spec/hoozuki/automaton/dfa_spec.rb +149 -0
data/spec/hoozuki/automaton/nfa_spec.rb +168 -0
data/spec/hoozuki/instruction_spec.rb +88 -0
data/spec/hoozuki/node_spec.rb +110 -0
data/spec/hoozuki/parser_spec.rb +168 -0
data/spec/hoozuki/vm/compiler_spec.rb +219 -0
data/spec/hoozuki/vm/evaluator_spec.rb +260 -0
data/spec/hoozuki_spec.rb +177 -3
metadata +12 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ff80e01946d63faf2012a01c6f7d2c04f0330b47aa63ba2a0f540c9e6cf290fc
-  data.tar.gz: 515b27f972f50c5f4c35cf6d33422637f93cdedc7db54ffdbd7a3f363a10c18f
+  metadata.gz: f44a0df16a02460880d94f715f9b4c84234beedafcb2a9bf177669241960317a
+  data.tar.gz: 4bbe7704cdfe994f9d58e83c88416b179d531c3763374e37f1711367ee0127f2
 SHA512:
-  metadata.gz: d26025dabd381db4ec2dc42ee4baa626e5389fd4ce7ade66f6e5a6d42879c929e2529e65413c919b8119e0127027986243a3be8a5bc0c702cbc52c57c276fee3
-  data.tar.gz: edff1a212c89c9f322517649ef6b8daf27bbaa41682e312313b064e97002d3a0c395e2bc3c9a82c657ea8e38670e91f31616923f5d026776a5d81bac0892a3ec
+  metadata.gz: af0244c531d265c5a71b3655263d49023bb8bfaf9ca8bf267db3a2f0d143948f833cb5a540589d5eb33dd7b477a3601a38dbe7b917abba0bc9a874a38df820df
+  data.tar.gz: 66191863f3faec1f662364bba0cf4e0b839e0feadc3a45fbca3ad45c5ed30ae882e63760ef7df401e0597f5610e35babb3c901c532e1cfa3566e06857d5e6a4a

data/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 A hobby regex engine written in Ruby. Designed to be simple and efficient for educational purposes.
 Currently supports 2 engines:
-- NFA Based Engine
+- DFA Based Engine
 - VM Based Engine
 ## Installation

data/Rakefile CHANGED Viewed

@@ -1,8 +1,18 @@
 # frozen_string_literal: true
 require 'bundler/gem_tasks'
-require 'rspec/core/rake_task'
-RSpec::Core::RakeTask.new(:spec)
+namespace 'build' do
+  desc 'build parser from parser.y'
+  task :parser do
+    sh 'bundle exec racc lib/hoozuki/parser.y --embedded --frozen -o lib/hoozuki/parser.rb -t --log-file=parser.output'
+  end
+end
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec) do |spec|
+  spec.pattern = FileList['spec/**/*_spec.rb']
+end
+task :spec => "build:parser"
 task default: :spec

data/lib/hoozuki/automaton/dfa/builder.rb ADDED Viewed

@@ -0,0 +1,79 @@
+# frozen_string_literal: true
+module Hoozuki
+  module Automaton
+    class DFA
+      class Builder
+        def initialize(nfa, use_cache)
+          @nfa = nfa
+          @use_cache = use_cache
+          @dfa_states = {}
+          @queue = []
+          @nfa_accept_set = nfa.accept.to_set
+        end
+        def call
+          initialize_dfa
+          process_states
+          @dfa
+        end
+        private
+        def initialize_dfa
+          start_states = @nfa.epsilon_closure(Set.new([@nfa.start]))
+          start_id = 0
+          @dfa_states[start_states] = start_id
+          @queue << start_states
+          @dfa = DFA.new(start_id, Set.new)
+        end
+        def process_states
+          while (current_nfa_states = @queue.shift)
+            current_dfa_id = @dfa_states[current_nfa_states]
+            mark_accept(current_nfa_states, current_dfa_id)
+            transitions_map = build_transitions(current_nfa_states)
+            process_transitions(transitions_map, current_dfa_id)
+          end
+        end
+        def mark_accept(nfa_states, dfa_id)
+          return unless nfa_states.any? { |state| @nfa_accept_set.include?(state) }
+          @dfa.accept.merge([dfa_id])
+        end
+        def build_transitions(nfa_states)
+          transitions_map = Hash.new { |h, k| h[k] = Set.new }
+          nfa_states.each do |state|
+            @nfa.transitions.each do |from, label, to|
+              next unless from == state && !label.nil?
+              transitions_map[label].merge(@nfa.epsilon_closure(Set[to]))
+            end
+          end
+          transitions_map
+        end
+        def process_transitions(transitions_map, current_dfa_id)
+          transitions_map.each do |char, next_nfa_states|
+            next_dfa_id = ensure_state(next_nfa_states)
+            @dfa.transitions.add([current_dfa_id, char, next_dfa_id])
+            @dfa.cache[[current_dfa_id, char]] = next_dfa_id if @use_cache
+          end
+        end
+        def ensure_state(nfa_states)
+          return @dfa_states[nfa_states] if @dfa_states.key?(nfa_states)
+          new_id = @dfa_states.length
+          @dfa_states[nfa_states] = new_id
+          @queue.push(nfa_states)
+          new_id
+        end
+      end
+    end
+  end
+end

data/lib/hoozuki/automaton/dfa.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
-class Hoozuki
+require_relative 'dfa/builder'
+module Hoozuki
   module Automaton
     class DFA
       attr_reader :start, :accept, :transitions
@@ -14,46 +16,7 @@ class Hoozuki
       class << self
         def from_nfa(nfa, use_cache)
-          dfa_states = {}
-          queue = []
-          nfa_accept_set = nfa.accept.to_set
-          start_set = Set.new([nfa.start])
-          start_states = nfa.epsilon_closure(start_set)
-          start_id = 0
-          dfa_states[start_states] = start_id
-          queue << start_states
-          dfa = new(start_id, Set.new)
-          while (current_nfa_states = queue.shift)
-            current_dfa_id = dfa_states[current_nfa_states]
-            dfa.accept.merge([current_dfa_id]) if current_nfa_states.any? { |state| nfa_accept_set.include?(state) }
-            transitions_map = Hash.new { |h, k| h[k] = Set.new }
-            current_nfa_states.each do |state|
-              nfa.transitions.each do |from, label, to|
-                transitions_map[label].merge(nfa.epsilon_closure(Set[to])) if from == state && !label.nil?
-              end
-            end
-            transitions_map.each do |char, next_nfa_states|
-              unless dfa_states.key?(next_nfa_states)
-                next_dfa_id = dfa_states.length
-                dfa_states[next_nfa_states] = next_dfa_id
-                queue.push(next_nfa_states)
-              end
-              next_dfa_id = dfa_states[next_nfa_states]
-              dfa.transitions.add([current_dfa_id, char, next_dfa_id])
-              dfa.cache[[current_dfa_id, char]] = next_dfa_id if use_cache
-            end
-          end
-          dfa
+          Builder.new(nfa, use_cache).call
         end
       end

data/lib/hoozuki/automaton/nfa.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 require 'sorted_set'
-class Hoozuki
+module Hoozuki
   module Automaton
     class NFA
       attr_accessor :start, :accept, :transitions
@@ -14,132 +14,53 @@ class Hoozuki
       end
       class << self
-        def new_from_node(node, state)
+        def from_node(node, state)
           raise ArgumentError, 'Node cannot be nil' if node.nil?
-          case node
-          when Node::Literal
-            start_state = state.new_state
-            accept_state = state.new_state
-            nfa = new(start_state, [accept_state])
-            nfa.add_transition(start_state, node.value, accept_state)
-            nfa
-          when Node::Epsilon
-            start_state = state.new_state
-            accept_state = state.new_state
-            nfa = new(start_state, [accept_state])
-            nfa.add_epsilon_transition(start_state, accept_state)
-            nfa
-          when Node::Repetition
-            if node.zero_or_more?
-              remain = new_from_node(node.child, state)
-              start_state = state.new_state
-              accepts = remain.accept.dup
-              accepts << start_state
-              nfa = new(start_state, accepts)
-              nfa.merge_nfa(remain)
-              nfa.add_epsilon_transition(start_state, remain.start)
-              remain.accept.each do |accept_state|
-                nfa.add_epsilon_transition(accept_state, remain.start)
-              end
-              nfa
-            elsif node.one_or_more?
-              remain = new_from_node(node.child, state)
-              start_state = state.new_state
-              accept_state = state.new_state
-              nfa = new(start_state, [accept_state])
-              nfa.transitions.merge(remain.transitions)
-              nfa.add_epsilon_transition(start_state, remain.start)
-              remain.accept.each do |remain_accept|
-                nfa.add_epsilon_transition(remain_accept, remain.start)
-                nfa.add_epsilon_transition(remain_accept, accept_state)
-              end
-              nfa
-            elsif node.optional?
-              child = new_from_node(node.child, state)
-              start_state = state.new_state
-              accepts = child.accept.dup
-              accepts << start_state
-              nfa = new(start_state, accepts)
-              nfa.transitions.merge(child.transitions)
-              nfa.add_epsilon_transition(start_state, child.start)
-              nfa
-            end
-          when Node::Choice
-            remain1 = new_from_node(node.children[0], state)
-            remain2 = new_from_node(node.children[1], state)
-            start_state = state.new_state
-            accepts = remain1.accept if remain1.respond_to?(:accept)
-            accepts |= remain2.accept if remain2.respond_to?(:accept)
-            nfa = new(start_state, accepts)
-            nfa.merge_nfa(remain1)
-            nfa.merge_nfa(remain2)
-            nfa.add_epsilon_transition(start_state, remain1.start)
-            nfa.add_epsilon_transition(start_state, remain2.start)
-            nfa
-          when Node::Concatenation
-            nfas = node.children.map { |child| new_from_node(child, state) }
-            nfa = nfas.first
-            nfas.drop(1).each do |next_nfa|
-              nfa.transitions.merge(next_nfa.transitions)
-              nfa.accept.each do |accept_state|
-                nfa.add_epsilon_transition(accept_state, next_nfa.start)
-              end
-              nfa.accept = next_nfa.accept
-            end
-            nfa
-          else
-            raise ArgumentError, "Unsupported node type: #{node.class}"
-          end
+          node.to_nfa(state)
         end
       end
-      def epsilon_closure_with_bitset(start)
-        visited = Set.new
-        to_visit = []
+      def epsilon_closure(start)
+        closure = compute_closure(start.to_set)
+        ::SortedSet.new(closure)
+      end
-        start.each do |state|
-          to_visit << state unless visited.include?(state)
-        end
+      def merge_transitions(other)
+        @transitions.merge(other.transitions)
+      end
+      def add_epsilon_transition(from, to)
+        @transitions << [from, nil, to]
+      end
+      def add_transition(from, char, to)
+        @transitions << [from, char, to]
+      end
+      private
+      def compute_closure(start_states)
+        visited = Set.new
+        to_visit = start_states.to_a
         until to_visit.empty?
           state = to_visit.shift
           next if visited.include?(state)
           visited << state
-          transitions.each do |from, label, to|
-            to_visit << to if from == state && label.nil? && !visited.include?(to)
+          epsilon_from(state).each do |target_state|
+            to_visit << target_state unless visited.include?(target_state)
           end
         end
         visited
       end
-      def epsilon_closure(start)
-        bit_result = epsilon_closure_with_bitset(start.to_set)
-        ::SortedSet.new(bit_result)
-      end
-      def add_epsilon_transition(from, to)
-        @transitions << [from, nil, to]
-      end
-      def add_transition(from, char, to)
-        @transitions << [from, char, to]
-      end
-      def merge_nfa(other)
-        @transitions.merge(other.transitions)
-        add_epsilon_transition(@start, other.start)
-        other.accept.each do |accept_state|
-          @accept << accept_state
+      def epsilon_from(state)
+        transitions.each_with_object([]) do |(from, label, to), result|
+          result << to if from == state && label.nil?
         end
       end
     end

data/lib/hoozuki/automaton/state_id.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Automaton
     class StateID
       attr_reader :id
@@ -23,6 +23,7 @@ class Hoozuki
       def <=>(other)
         return nil unless other.is_a?(StateID)
         @id <=> other.id
       end
     end

data/lib/hoozuki/instruction/char.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Instruction
     class Char
       attr_accessor :char

data/lib/hoozuki/instruction/jmp.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Instruction
     class Jmp
       attr_accessor :target

data/lib/hoozuki/instruction/match.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Instruction
     class Match
     end

data/lib/hoozuki/instruction/split.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Instruction
     class Split
       attr_accessor :left, :right

data/lib/hoozuki/node/choice.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Node
     class Choice
       attr_reader :children
@@ -8,6 +8,18 @@ class Hoozuki
       def initialize(children)
         @children = children
       end
+      def to_nfa(state)
+        child_nfas = @children.map { |child| child.to_nfa(state) }
+        start_state = state.new_state
+        accepts = child_nfas.flat_map(&:accept).to_set
+        nfa = Automaton::NFA.new(start_state, accepts)
+        child_nfas.each do |child_nfa|
+          nfa.merge_transitions(child_nfa)
+          nfa.add_epsilon_transition(start_state, child_nfa.start)
+        end
+        nfa
+      end
     end
   end
 end

data/lib/hoozuki/node/concatenation.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Node
     class Concatenation
       attr_reader :children
@@ -8,6 +8,21 @@ class Hoozuki
       def initialize(children)
         @children = children
       end
+      def to_nfa(state)
+        nfas = @children.map { |child| child.to_nfa(state) }
+        nfa = nfas.first
+        nfas.drop(1).each do |next_nfa|
+          nfa.merge_transitions(next_nfa)
+          nfa.accept.each do |accept_state|
+            nfa.add_epsilon_transition(accept_state, next_nfa.start)
+          end
+          nfa.accept = next_nfa.accept
+        end
+        nfa
+      end
     end
   end
 end

data/lib/hoozuki/node/epsilon.rb CHANGED Viewed

@@ -1,8 +1,15 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Node
     class Epsilon
+      def to_nfa(state)
+        start_state = state.new_state
+        accept_state = state.new_state
+        nfa = Automaton::NFA.new(start_state, [accept_state])
+        nfa.add_epsilon_transition(start_state, accept_state)
+        nfa
+      end
     end
   end
 end

data/lib/hoozuki/node/literal.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Node
     class Literal
       attr_reader :value
@@ -8,6 +8,14 @@ class Hoozuki
       def initialize(value)
         @value = value
       end
+      def to_nfa(state)
+        start_state = state.new_state
+        accept_state = state.new_state
+        nfa = Automaton::NFA.new(start_state, [accept_state])
+        nfa.add_transition(start_state, @value, accept_state)
+        nfa
+      end
     end
   end
 end

data/lib/hoozuki/node/repetition.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-class Hoozuki
+module Hoozuki
   module Node
     class Repetition
       attr_reader :child
@@ -21,6 +21,60 @@ class Hoozuki
       def optional?
         @quantifier == :optional
       end
+      def to_nfa(state)
+        if zero_or_more?
+          to_nfa_zero_or_more(state)
+        elsif one_or_more?
+          to_nfa_one_or_more(state)
+        elsif optional?
+          to_nfa_optional(state)
+        end
+      end
+      private
+      def to_nfa_zero_or_more(state)
+        remain = @child.to_nfa(state)
+        start_state = state.new_state
+        accepts = remain.accept.dup << start_state
+        nfa = Automaton::NFA.new(start_state, accepts)
+        nfa.merge_transitions(remain)
+        nfa.add_epsilon_transition(start_state, remain.start)
+        remain.accept.each do |accept_state|
+          nfa.add_epsilon_transition(accept_state, remain.start)
+        end
+        nfa
+      end
+      def to_nfa_one_or_more(state)
+        remain = @child.to_nfa(state)
+        start_state = state.new_state
+        accept_state = state.new_state
+        nfa = Automaton::NFA.new(start_state, [accept_state])
+        nfa.transitions.merge(remain.transitions)
+        nfa.add_epsilon_transition(start_state, remain.start)
+        remain.accept.each do |remain_accept|
+          nfa.add_epsilon_transition(remain_accept, remain.start)
+          nfa.add_epsilon_transition(remain_accept, accept_state)
+        end
+        nfa
+      end
+      def to_nfa_optional(state)
+        child_nfa = @child.to_nfa(state)
+        start_state = state.new_state
+        accepts = child_nfa.accept.dup << start_state
+        nfa = Automaton::NFA.new(start_state, accepts)
+        nfa.merge_transitions(child_nfa)
+        nfa.add_epsilon_transition(start_state, child_nfa.start)
+        nfa
+      end
     end
   end
 end