hoozuki 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c496655adac0eea264d8a058af5518711803be3441781a1e59c132b6ce67d4e7
4
- data.tar.gz: '0481e2619ba549f46849b5471f60ee77388fdf79c7dd4c9decbb3f3f3a21093f'
3
+ metadata.gz: ff80e01946d63faf2012a01c6f7d2c04f0330b47aa63ba2a0f540c9e6cf290fc
4
+ data.tar.gz: 515b27f972f50c5f4c35cf6d33422637f93cdedc7db54ffdbd7a3f363a10c18f
5
5
  SHA512:
6
- metadata.gz: 1e9aae7d5afa4cc9cab9faf374528cbd2e334ec85305b75ac324b4cc6e4ecfe871d10c6a5478fc83e56dbe89605de0e377d4ad67ae9489464f128d661fad5709
7
- data.tar.gz: e62195f96cdff93a141e1796a4e6b7322e57f50ab885b9e61c2815832e45b66137536b1d94782974e57d7ba4b174a5099f1c17b794b28041068292905a794171
6
+ metadata.gz: d26025dabd381db4ec2dc42ee4baa626e5389fd4ce7ade66f6e5a6d42879c929e2529e65413c919b8119e0127027986243a3be8a5bc0c702cbc52c57c276fee3
7
+ data.tar.gz: edff1a212c89c9f322517649ef6b8daf27bbaa41682e312313b064e97002d3a0c395e2bc3c9a82c657ea8e38670e91f31616923f5d026776a5d81bac0892a3ec
data/CHANGELOG.md CHANGED
@@ -2,7 +2,11 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
- ## 0.1.0 (2022-07-04)
5
+ ## 0.2.0 (2025-08-25)
6
+
7
+ - Support VM engine for regex matching. ([@ydah])
8
+
9
+ ## 0.1.0 (2025-08-23)
6
10
 
7
11
  - Initial release
8
12
 
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
- # Hoozuki (鬼灯)
1
+ # Hoozuki (鬼灯) [![Gem Version](https://badge.fury.io/rb/hoozuki.svg)](https://badge.fury.io/rb/hoozuki) [![CI](https://github.com/ydah/hoozuki/actions/workflows/ci.yml/badge.svg)](https://github.com/ydah/hoozuki/actions/workflows/ci.yml)
2
2
 
3
3
  A hobby regex engine written in Ruby. Designed to be simple and efficient for educational purposes.
4
+ Currently supports 2 engines:
5
+ - NFA Based Engine
6
+ - VM Based Engine
4
7
 
5
8
  ## Installation
6
9
 
@@ -12,10 +15,20 @@ gem install hoozuki
12
15
 
13
16
  ```ruby
14
17
  require 'hoozuki'
15
- regex = Hoozuki::Regex.new('a(bc|de)f')
16
- puts regex.match?('abcdef') # => true
17
- puts regex.match?('adef') # => true
18
- puts regex.match?('xyz') # => false
18
+ regex = Hoozuki.new('a(bc|de)*f') # Or Hoozuki.new('a(bc|de)*f', engine: :nfa) for NFA based engine
19
+ regex.match?('abcdef') # => true
20
+ regex.match?('adef') # => true
21
+ regex.match?('xyz') # => false
22
+ ```
23
+
24
+ If you want to use the VM based engine:
25
+
26
+ ```ruby
27
+ require 'hoozuki'
28
+ regex = Hoozuki.new('a(bc|de)*f', engine: :vm)
29
+ regex.match?('abcdef') # => true
30
+ regex.match?('adef') # => true
31
+ regex.match?('xyz') # => false
19
32
  ```
20
33
 
21
34
  ## License
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Hoozuki
4
+ module Instruction
5
+ class Char
6
+ attr_accessor :char
7
+
8
+ def initialize(char)
9
+ @char = char
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Hoozuki
4
+ module Instruction
5
+ class Jmp
6
+ attr_accessor :target
7
+
8
+ def initialize(target)
9
+ @target = target
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Hoozuki
4
+ module Instruction
5
+ class Match
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Hoozuki
4
+ module Instruction
5
+ class Split
6
+ attr_accessor :left, :right
7
+
8
+ def initialize(left, right)
9
+ @left = left
10
+ @right = right
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'instruction/char'
4
+ require_relative 'instruction/jmp'
5
+ require_relative 'instruction/match'
6
+ require_relative 'instruction/split'
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Hoozuki
4
- VERSION = '0.1.0'
4
+ VERSION = '0.2.0'
5
5
  end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Hoozuki
4
+ module VM
5
+ class Compiler
6
+ attr_reader :instructions
7
+
8
+ def initialize
9
+ @pc = 0
10
+ @instructions = []
11
+ end
12
+
13
+ def compile(ast)
14
+ _compile(ast)
15
+ @pc += 1
16
+ @instructions << Instruction::Match.new
17
+ end
18
+
19
+ private
20
+
21
+ def _compile(ast)
22
+ case ast
23
+ when Hoozuki::Node::Literal
24
+ emit(Hoozuki::Instruction::Char.new(ast.value))
25
+ when Hoozuki::Node::Epsilon
26
+ # Do nothing for epsilon
27
+ when Node::Repetition
28
+ if ast.zero_or_more?
29
+ split = @pc
30
+ emit(Hoozuki::Instruction::Split.new(@pc + 1, 0))
31
+ _compile(ast.child)
32
+ emit(Hoozuki::Instruction::Jmp.new(split))
33
+ patch(split, Hoozuki::Instruction::Split.new(split + 1, @pc))
34
+ elsif ast.one_or_more?
35
+ start = @pc
36
+ _compile(ast.child)
37
+ emit(Hoozuki::Instruction::Split.new(start, @pc + 1))
38
+ elsif ast.optional?
39
+ split = @pc
40
+ emit(Hoozuki::Instruction::Split.new(0, 0))
41
+ start = @pc
42
+ _compile(ast.child)
43
+ last = @pc
44
+ patch(split, Hoozuki::Instruction::Split.new(start, last))
45
+ end
46
+ when Node::Choice
47
+ split = @pc
48
+ @pc += 1
49
+ @instructions << Hoozuki::Instruction::Split.new(@pc, 0)
50
+ _compile(ast.children.first)
51
+ jump = @pc
52
+ emit(Hoozuki::Instruction::Jmp.new(0))
53
+
54
+ if @instructions[split].is_a?(Hoozuki::Instruction::Split)
55
+ @instructions[split].right = @pc
56
+ else
57
+ raise "Instruction at pc #{split} is not a Split"
58
+ end
59
+
60
+ _compile(ast.children.last)
61
+
62
+ if @instructions[jump].is_a?(Hoozuki::Instruction::Jmp)
63
+ @instructions[jump].target = @pc
64
+ else
65
+ raise "Instruction at pc #{jump} is not a Jmp"
66
+ end
67
+ when Node::Concatenation
68
+ ast.children.each do |child|
69
+ _compile(child)
70
+ end
71
+ end
72
+ end
73
+
74
+ def emit(instruction)
75
+ @instructions << instruction
76
+ @pc += 1
77
+ end
78
+
79
+ def patch(pc, instruction)
80
+ @instructions[pc] = instruction
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Hoozuki
4
+ module VM
5
+ class Evaluator
6
+ class << self
7
+ def evaluate(instructions, input, input_pos = 0, pc = 0)
8
+ new._evaluate(instructions, input, input_pos, pc)
9
+ end
10
+ end
11
+
12
+ def _evaluate(instructions, input, input_pos, pc)
13
+ loop do
14
+ return false if pc >= instructions.size
15
+
16
+ inst = instructions[pc]
17
+ case inst
18
+ when Hoozuki::Instruction::Char
19
+ return false if input_pos >= input.size || input[input_pos] != inst.char
20
+ input_pos += 1
21
+ pc += 1
22
+ when Hoozuki::Instruction::Jmp
23
+ pc = inst.target
24
+ when Hoozuki::Instruction::Split
25
+ if _evaluate(instructions, input, input_pos, inst.left)
26
+ return true
27
+ else
28
+ pc = inst.right
29
+ end
30
+ when Hoozuki::Instruction::Match
31
+ return input_pos == input.length
32
+ else
33
+ raise "Unknown instruction: #{inst.class}"
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
data/lib/hoozuki/vm.rb ADDED
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'vm/compiler'
4
+ require_relative 'vm/evaluator'
data/lib/hoozuki.rb CHANGED
@@ -1,29 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'hoozuki/automaton'
4
+ require_relative 'hoozuki/instruction'
4
5
  require_relative 'hoozuki/node'
5
6
  require_relative 'hoozuki/parser'
6
7
  require_relative 'hoozuki/version'
8
+ require_relative 'hoozuki/vm'
7
9
 
8
10
  class Hoozuki
9
- def initialize(input, method: :dfa)
11
+ def initialize(input, engine: :dfa)
10
12
  @input = input
11
- @method = method
13
+ @engine = engine
12
14
 
13
15
  ast = Hoozuki::Parser.new(input).parse
14
- case method
16
+ case engine
15
17
  when :dfa
16
18
  nfa = Automaton::NFA.new_from_node(ast, Automaton::StateID.new(0))
17
19
  @dfa = Automaton::DFA.from_nfa(nfa, use_cache?(input))
20
+ when :vm
21
+ compiler = VM::Compiler.new
22
+ compiler.compile(ast)
23
+ @bytecode = compiler.instructions
18
24
  end
19
25
  end
20
26
 
21
27
  def match?(input)
22
- case @method
28
+ case @engine
23
29
  when :dfa
24
30
  @dfa.match?(input, use_cache?(input))
31
+ when :vm
32
+ VM::Evaluator.evaluate(@bytecode, input, 0, 0)
25
33
  else
26
- raise ArgumentError, "Unknown method: #{@method}"
34
+ raise ArgumentError, "Unknown engine: #{@engine}"
27
35
  end
28
36
  end
29
37
 
data/spec/hoozuki_spec.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  RSpec.describe Hoozuki do
4
- describe '#match?' do
5
- subject { described_class.new(pattern).match?(value) }
4
+ shared_examples 'regex matching behavior' do |mode|
5
+ subject { described_class.new(pattern, engine: mode).match?(value) }
6
6
 
7
7
  context 'with basic concatenation' do
8
8
  let(:pattern) { 'abc' }
@@ -208,4 +208,14 @@ RSpec.describe Hoozuki do
208
208
  end
209
209
  end
210
210
  end
211
+
212
+ describe '#match?' do
213
+ context 'with :vm mode' do
214
+ include_examples 'regex matching behavior', :vm
215
+ end
216
+
217
+ context 'with :dfa mode' do
218
+ include_examples 'regex matching behavior', :dfa
219
+ end
220
+ end
211
221
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hoozuki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yudai Takada
@@ -28,6 +28,11 @@ files:
28
28
  - lib/hoozuki/automaton/dfa.rb
29
29
  - lib/hoozuki/automaton/nfa.rb
30
30
  - lib/hoozuki/automaton/state_id.rb
31
+ - lib/hoozuki/instruction.rb
32
+ - lib/hoozuki/instruction/char.rb
33
+ - lib/hoozuki/instruction/jmp.rb
34
+ - lib/hoozuki/instruction/match.rb
35
+ - lib/hoozuki/instruction/split.rb
31
36
  - lib/hoozuki/node.rb
32
37
  - lib/hoozuki/node/choice.rb
33
38
  - lib/hoozuki/node/concatenation.rb
@@ -36,6 +41,9 @@ files:
36
41
  - lib/hoozuki/node/repetition.rb
37
42
  - lib/hoozuki/parser.rb
38
43
  - lib/hoozuki/version.rb
44
+ - lib/hoozuki/vm.rb
45
+ - lib/hoozuki/vm/compiler.rb
46
+ - lib/hoozuki/vm/evaluator.rb
39
47
  - spec/hoozuki_spec.rb
40
48
  - spec/spec_helper.rb
41
49
  homepage: https://github.com/ydah/hoozuki