regular_expression 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/main.yml +36 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +42 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +76 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +69 -0
- data/LICENSE +21 -0
- data/README.md +51 -0
- data/Rakefile +18 -0
- data/bin/console +8 -0
- data/bin/parse +50 -0
- data/build/.gitignore +1 -0
- data/lib/regular_expression.rb +19 -0
- data/lib/regular_expression/ast.rb +364 -0
- data/lib/regular_expression/bytecode.rb +189 -0
- data/lib/regular_expression/cfg.rb +154 -0
- data/lib/regular_expression/compiler/ruby.rb +104 -0
- data/lib/regular_expression/compiler/x86.rb +281 -0
- data/lib/regular_expression/interpreter.rb +92 -0
- data/lib/regular_expression/lexer.rb +53 -0
- data/lib/regular_expression/nfa.rb +118 -0
- data/lib/regular_expression/parser.rb +399 -0
- data/lib/regular_expression/parser.y +96 -0
- data/lib/regular_expression/pattern.rb +23 -0
- data/lib/regular_expression/version.rb +5 -0
- data/regular_expression.gemspec +25 -0
- metadata +99 -0
@@ -0,0 +1,154 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RegularExpression
|
4
|
+
# The CFG is a directed graph of extended basic blocks of bytecode
|
5
|
+
# instructions. This module has objects to represent the EBB, a graph object
|
6
|
+
# which contains a set of EBB, and a builder that creates a CFG from a
|
7
|
+
# compiled bytecode object.
|
8
|
+
module CFG
|
9
|
+
def self.build(compiled)
|
10
|
+
# Each label in the compiled bytecode starts a block, as does the first
|
11
|
+
# instruction
|
12
|
+
all_blocks = { start: 0 }.merge(compiled.labels)
|
13
|
+
all_block_addresses = all_blocks.values
|
14
|
+
|
15
|
+
# We're going to create a potentially larger map of labels, and we'll be
|
16
|
+
# maintaining a reverse map as well.
|
17
|
+
all_labels = compiled.labels.dup
|
18
|
+
all_labels_reverse = all_labels.invert
|
19
|
+
|
20
|
+
# These are the blocks we're finding - indexed by their start address.
|
21
|
+
blocks = {}
|
22
|
+
|
23
|
+
# Go through each block.
|
24
|
+
all_blocks.each do |name, start_n|
|
25
|
+
# We're going to collect up the instructions in the block, and the
|
26
|
+
# labels it exits to.
|
27
|
+
block_insns = []
|
28
|
+
block_exits = Set.new
|
29
|
+
|
30
|
+
insn_n = start_n
|
31
|
+
|
32
|
+
loop do
|
33
|
+
# Does another instruction jump here? If so it's the end of the EBB,
|
34
|
+
# as EBBs have only one entry point.
|
35
|
+
if insn_n != start_n && all_block_addresses.include?(insn_n)
|
36
|
+
# As the EBB ends here - we should jump to the next EBB.
|
37
|
+
target = all_labels_reverse[insn_n]
|
38
|
+
unless target
|
39
|
+
target = :"extra#{insn_n}"
|
40
|
+
all_labels[target] = insn_n
|
41
|
+
all_labels_reverse[insn_n] = target
|
42
|
+
end
|
43
|
+
block_insns.push(Bytecode::Insns::Jump.new(target))
|
44
|
+
block_exits.add(target)
|
45
|
+
break
|
46
|
+
end
|
47
|
+
|
48
|
+
# Examine each instruction.
|
49
|
+
insn = compiled.insns[insn_n]
|
50
|
+
block_insns.push(insn)
|
51
|
+
|
52
|
+
# Remember which blocks exit to this target.
|
53
|
+
case insn
|
54
|
+
when Bytecode::Insns::PushIndex, Bytecode::Insns::PopIndex
|
55
|
+
insn_n += 1
|
56
|
+
when Bytecode::Insns::GuardBegin, Bytecode::Insns::GuardEnd
|
57
|
+
block_exits.add(insn.guarded)
|
58
|
+
insn_n += 1
|
59
|
+
when Bytecode::Insns::JumpAny, Bytecode::Insns::JumpValuesInvert,
|
60
|
+
Bytecode::Insns::JumpRange, Bytecode::Insns::JumpRangeInvert,
|
61
|
+
Bytecode::Insns::JumpValue
|
62
|
+
block_exits.add(insn.target)
|
63
|
+
insn_n += 1
|
64
|
+
when Bytecode::Insns::Jump
|
65
|
+
block_exits.add(insn.target)
|
66
|
+
break
|
67
|
+
when Bytecode::Insns::Match, Bytecode::Insns::Fail
|
68
|
+
break
|
69
|
+
else
|
70
|
+
raise
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
blocks[start_n] = ExtendedBasicBlock.new(name, block_insns, block_exits.to_a)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Create a map of jump target labels to the blocks that contain them.
|
78
|
+
exit_map = {}
|
79
|
+
blocks.each_value do |block|
|
80
|
+
block.exits.each do |exit|
|
81
|
+
exit_map[exit] ||= blocks[all_labels[exit]]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
Graph.new(blocks.values, exit_map)
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.to_dot(cfg)
|
89
|
+
graph = Graphviz::Graph.new
|
90
|
+
cfg.to_dot(graph)
|
91
|
+
|
92
|
+
Graphviz.output(graph, path: "build/cfg.svg", format: "svg")
|
93
|
+
graph.to_dot
|
94
|
+
end
|
95
|
+
|
96
|
+
# An Extended Basic Block is a linear sequence of instructions with one
|
97
|
+
# entry point and zero or more exit points.
|
98
|
+
class ExtendedBasicBlock
|
99
|
+
attr_reader :name, :insns, :exits
|
100
|
+
|
101
|
+
def initialize(name, insns, exits)
|
102
|
+
@name = name
|
103
|
+
@insns = insns
|
104
|
+
@exits = exits
|
105
|
+
end
|
106
|
+
|
107
|
+
def dump(exit_map, io: $stdout)
|
108
|
+
io.puts("#{name}:")
|
109
|
+
insns.each { |insn| io.puts(" #{insn}") }
|
110
|
+
exits.each { |exit| io.puts(" #{exit} -> #{exit_map[exit].name}") }
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# A graph is a set of EBBs.
|
115
|
+
class Graph
|
116
|
+
attr_reader :blocks, :exit_map
|
117
|
+
|
118
|
+
def initialize(blocks, exit_map)
|
119
|
+
@blocks = blocks
|
120
|
+
@exit_map = exit_map
|
121
|
+
end
|
122
|
+
|
123
|
+
def start
|
124
|
+
blocks.first
|
125
|
+
end
|
126
|
+
|
127
|
+
def dump
|
128
|
+
output = StringIO.new
|
129
|
+
blocks.each { |block| block.dump(exit_map, io: output) }
|
130
|
+
output.string
|
131
|
+
end
|
132
|
+
|
133
|
+
def to_dot(graph)
|
134
|
+
nodes = {}
|
135
|
+
|
136
|
+
blocks.each do |block|
|
137
|
+
label = []
|
138
|
+
|
139
|
+
label.push("#{block.name}:")
|
140
|
+
block.insns.each { |insn| label.push(" #{insn}") }
|
141
|
+
|
142
|
+
nodes[block] = graph.add_node(block.object_id, label: label.join($/), labeljust: "l", shape: "box")
|
143
|
+
end
|
144
|
+
|
145
|
+
blocks.each do |block|
|
146
|
+
successors = block.exits.map { |exit| nodes[exit_map[exit]] }.uniq
|
147
|
+
successors.each do |successor|
|
148
|
+
nodes[block].connect(successor)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RegularExpression
|
4
|
+
module Compiler
|
5
|
+
module Ruby
|
6
|
+
class Compiled
|
7
|
+
attr_reader :source
|
8
|
+
|
9
|
+
def initialize(source)
|
10
|
+
@source = source
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_proc
|
14
|
+
eval(source) # rubocop:disable Security/Eval
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Generate Ruby code for a CFG. This looks just like the intepreter, but
|
19
|
+
# abstracted in time one level!
|
20
|
+
# rubocop:disable Layout/LineLength
|
21
|
+
def self.compile(cfg)
|
22
|
+
ruby_src = []
|
23
|
+
ruby_src.push "-> (string) {"
|
24
|
+
ruby_src.push " start_n = 0"
|
25
|
+
ruby_src.push " stack = []"
|
26
|
+
ruby_src.push " while start_n <= string.size"
|
27
|
+
ruby_src.push " string_n = start_n"
|
28
|
+
ruby_src.push " block = #{cfg.start.name.inspect}"
|
29
|
+
ruby_src.push " loop do"
|
30
|
+
ruby_src.push " case block"
|
31
|
+
|
32
|
+
cfg.blocks.each do |block|
|
33
|
+
ruby_src.push " when #{block.name.inspect}"
|
34
|
+
|
35
|
+
block.insns.each do |insn|
|
36
|
+
case insn
|
37
|
+
when Bytecode::Insns::PushIndex
|
38
|
+
ruby_src.push " stack << string_n"
|
39
|
+
when Bytecode::Insns::PopIndex
|
40
|
+
ruby_src.push " string_n = stack.pop"
|
41
|
+
when Bytecode::Insns::GuardBegin
|
42
|
+
ruby_src.push " return false if start_n != 0"
|
43
|
+
when Bytecode::Insns::GuardEnd
|
44
|
+
ruby_src.push " if string_n == string.size"
|
45
|
+
ruby_src.push " block = #{cfg.exit_map[insn.guarded].name.inspect}"
|
46
|
+
ruby_src.push " next"
|
47
|
+
ruby_src.push " end"
|
48
|
+
when Bytecode::Insns::JumpAny
|
49
|
+
ruby_src.push " if string_n < string.size"
|
50
|
+
ruby_src.push " string_n += 1"
|
51
|
+
ruby_src.push " block = #{cfg.exit_map[insn.target].name.inspect}"
|
52
|
+
ruby_src.push " next"
|
53
|
+
ruby_src.push " end"
|
54
|
+
when Bytecode::Insns::JumpValue
|
55
|
+
ruby_src.push " if string_n < string.size && string[string_n] == #{insn.char.inspect}"
|
56
|
+
ruby_src.push " string_n += 1"
|
57
|
+
ruby_src.push " block = #{cfg.exit_map[insn.target].name.inspect}"
|
58
|
+
ruby_src.push " next"
|
59
|
+
ruby_src.push " end"
|
60
|
+
when Bytecode::Insns::JumpValuesInvert
|
61
|
+
ruby_src.push " if string_n < string.size && !#{insn.chars.inspect}.include?(string[string_n])"
|
62
|
+
ruby_src.push " string_n += 1"
|
63
|
+
ruby_src.push " block = #{cfg.exit_map[insn.target].name.inspect}"
|
64
|
+
ruby_src.push " next"
|
65
|
+
ruby_src.push " end"
|
66
|
+
when Bytecode::Insns::JumpRange
|
67
|
+
ruby_src.push " if string_n < string.size && string[string_n] >= #{insn.left.inspect} && string[string_n] <= #{insn.right.inspect}"
|
68
|
+
ruby_src.push " string_n += 1"
|
69
|
+
ruby_src.push " block = #{cfg.exit_map[insn.target].name.inspect}"
|
70
|
+
ruby_src.push " next"
|
71
|
+
ruby_src.push " end"
|
72
|
+
when Bytecode::Insns::JumpRangeInvert
|
73
|
+
ruby_src.push " if string_n < string.size && (string[string_n] < #{insn.left.inspect} || string[string_n] > #{insn.right.inspect})"
|
74
|
+
ruby_src.push " string_n += 1"
|
75
|
+
ruby_src.push " block = #{cfg.exit_map[insn.target].name.inspect}"
|
76
|
+
ruby_src.push " next"
|
77
|
+
ruby_src.push " end"
|
78
|
+
when Bytecode::Insns::Jump
|
79
|
+
ruby_src.push " block = #{cfg.exit_map[insn.target].name.inspect}"
|
80
|
+
ruby_src.push " next"
|
81
|
+
when Bytecode::Insns::Match
|
82
|
+
ruby_src.push " return true"
|
83
|
+
when Bytecode::Insns::Fail
|
84
|
+
ruby_src.push " start_n += 1"
|
85
|
+
ruby_src.push " break"
|
86
|
+
else
|
87
|
+
raise
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
ruby_src.push " end"
|
93
|
+
ruby_src.push " end"
|
94
|
+
ruby_src.push " end"
|
95
|
+
ruby_src.push " false"
|
96
|
+
ruby_src.push "}"
|
97
|
+
ruby_src.push ""
|
98
|
+
|
99
|
+
Compiled.new(ruby_src.join($/))
|
100
|
+
end
|
101
|
+
# rubocop:enable Layout/LineLength
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,281 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RegularExpression
|
4
|
+
module Compiler
|
5
|
+
module X86
|
6
|
+
class Compiled
|
7
|
+
attr_reader :buffer
|
8
|
+
|
9
|
+
def initialize(buffer)
|
10
|
+
@buffer = buffer
|
11
|
+
end
|
12
|
+
|
13
|
+
def disasm
|
14
|
+
output = StringIO.new
|
15
|
+
|
16
|
+
crabstone = Crabstone::Disassembler.new(Crabstone::ARCH_X86, Crabstone::MODE_64)
|
17
|
+
crabstone.disasm(buffer.memory.to_s(buffer.pos), buffer.memory.to_i).each do |insn|
|
18
|
+
output.printf(
|
19
|
+
"0x%<address>x:\t%<instruction>s\t%<details>s\n",
|
20
|
+
address: insn.address,
|
21
|
+
instruction: insn.mnemonic,
|
22
|
+
details: insn.op_str
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
output.string
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_proc
|
30
|
+
function = buffer.to_function([Fiddle::TYPE_VOIDP, Fiddle::TYPE_SIZE_T], Fiddle::TYPE_SIZE_T)
|
31
|
+
|
32
|
+
lambda do |string|
|
33
|
+
value = function.call(string, string.length)
|
34
|
+
value if value != string.length + 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Generate native code for a CFG. This looks just like the Ruby generator
|
40
|
+
# but abstracted one level, or just like the interpreter but abstracted
|
41
|
+
# two levels!
|
42
|
+
def self.compile(cfg)
|
43
|
+
fisk = Fisk.new
|
44
|
+
buffer = Fisk::Helpers.jitbuffer(1024)
|
45
|
+
|
46
|
+
fisk.asm(buffer) do
|
47
|
+
# Here we're setting up a couple of local variables that point to
|
48
|
+
# registers so that it's easier to see what's actually going on
|
49
|
+
|
50
|
+
# rax is a scratch register that is used for the return value of the
|
51
|
+
# function
|
52
|
+
return_value = rax
|
53
|
+
|
54
|
+
# rcx is a scratch register that is used to track the index of the
|
55
|
+
# string where we're currently looking
|
56
|
+
string_index = rcx
|
57
|
+
|
58
|
+
# rdx is a scratch register that is used to track the index of the
|
59
|
+
# string where we've started the match
|
60
|
+
match_index = rdx
|
61
|
+
|
62
|
+
# rsp is a reserved register that stores a pointer to the stack
|
63
|
+
stack_pointer = rsp
|
64
|
+
|
65
|
+
# rbp is a reserved register that stores a pointer to the base of the
|
66
|
+
# stack. It is also known as the frame pointer
|
67
|
+
frame_pointer = rbp
|
68
|
+
|
69
|
+
# rsi is a scratch register that stores the second argument to the
|
70
|
+
# function, and in our case stores the length of the string
|
71
|
+
string_length = rsi
|
72
|
+
|
73
|
+
# rdi is a scratch register that stores the first argument to the
|
74
|
+
# function, and in our case stores a pointer to the base of the string
|
75
|
+
string_pointer = rdi
|
76
|
+
|
77
|
+
# r8 is a scratch register that we're using to store the last read
|
78
|
+
# character value from the string
|
79
|
+
character_buffer = r8
|
80
|
+
|
81
|
+
# First we're going to do some initialization of the frame pointer and
|
82
|
+
# stack pointer so we can clear the stack when we're done with this
|
83
|
+
# function
|
84
|
+
push frame_pointer
|
85
|
+
mov frame_pointer, stack_pointer
|
86
|
+
|
87
|
+
# Now we're going to initialize the counter to 0 so that we attempt to
|
88
|
+
# match at each index of the input string
|
89
|
+
xor match_index, match_index
|
90
|
+
|
91
|
+
# This is the start of our loop, where at the beginning of the loop
|
92
|
+
# we check if we have already finished looking at each index (in which
|
93
|
+
# case we'll jump to a failure condition)
|
94
|
+
make_label :start_loop_head
|
95
|
+
cmp match_index, string_length
|
96
|
+
jg label(:exit)
|
97
|
+
|
98
|
+
# Set the string_index value to the match_index value so that we begin
|
99
|
+
# each loop at the current match index
|
100
|
+
mov string_index, match_index
|
101
|
+
|
102
|
+
cfg.blocks.each do |block|
|
103
|
+
# Label the start of each block so that we can jump between them
|
104
|
+
make_label block.name
|
105
|
+
|
106
|
+
block.insns.each do |insn|
|
107
|
+
case insn
|
108
|
+
when Bytecode::Insns::PushIndex
|
109
|
+
push string_index
|
110
|
+
when Bytecode::Insns::PopIndex
|
111
|
+
pop string_index
|
112
|
+
when Bytecode::Insns::GuardBegin
|
113
|
+
cmp string_index, imm8(0)
|
114
|
+
jne label(:exit)
|
115
|
+
jmp label(cfg.exit_map[insn.guarded].name)
|
116
|
+
when Bytecode::Insns::GuardEnd
|
117
|
+
cmp string_index, string_length
|
118
|
+
je label(cfg.exit_map[insn.guarded].name)
|
119
|
+
when Bytecode::Insns::JumpAny
|
120
|
+
no_match_label = :"no_match_#{insn.object_id}"
|
121
|
+
|
122
|
+
# Ensure we have a character we can read
|
123
|
+
cmp string_index, string_length
|
124
|
+
je label(no_match_label)
|
125
|
+
|
126
|
+
# Move the string index forward and jump to the target
|
127
|
+
# instruction
|
128
|
+
inc string_index
|
129
|
+
jmp label(cfg.exit_map[insn.target].name)
|
130
|
+
|
131
|
+
make_label no_match_label
|
132
|
+
when Bytecode::Insns::JumpValue
|
133
|
+
no_match_label = :"no_match_#{insn.object_id}"
|
134
|
+
|
135
|
+
# Ensure we have a character we can read
|
136
|
+
cmp string_index, string_length
|
137
|
+
je label(no_match_label)
|
138
|
+
|
139
|
+
# Read the character into the character buffer
|
140
|
+
mov character_buffer, string_pointer
|
141
|
+
add character_buffer, string_index
|
142
|
+
mov character_buffer, m64(character_buffer)
|
143
|
+
|
144
|
+
# Compare the character buffer to the instruction's character,
|
145
|
+
# continue on to the next instruction if it's not equal
|
146
|
+
cmp character_buffer, imm8(insn.char.ord)
|
147
|
+
jne label(no_match_label)
|
148
|
+
|
149
|
+
# Move the string index forward and jump to the target
|
150
|
+
# instruction
|
151
|
+
inc string_index
|
152
|
+
jmp label(cfg.exit_map[insn.target].name)
|
153
|
+
|
154
|
+
make_label no_match_label
|
155
|
+
when Bytecode::Insns::JumpValuesInvert
|
156
|
+
no_match_label = :"no_match_#{insn.object_id}"
|
157
|
+
|
158
|
+
# Ensure we have a character we can read
|
159
|
+
cmp string_index, string_length
|
160
|
+
je label(no_match_label)
|
161
|
+
|
162
|
+
# Read the character into the character buffer
|
163
|
+
mov character_buffer, string_pointer
|
164
|
+
add character_buffer, string_index
|
165
|
+
mov character_buffer, m64(character_buffer)
|
166
|
+
|
167
|
+
# Compare the character buffer to each of the instruction's
|
168
|
+
# characters, continue on to the next instruction if any of them
|
169
|
+
# are equal
|
170
|
+
insn.chars.each do |value|
|
171
|
+
cmp character_buffer, imm8(value.ord)
|
172
|
+
je label(no_match_label)
|
173
|
+
end
|
174
|
+
|
175
|
+
# Move the string index forward and jump to the target
|
176
|
+
# instruction
|
177
|
+
inc string_index
|
178
|
+
jmp label(cfg.exit_map[insn.target].name)
|
179
|
+
|
180
|
+
make_label no_match_label
|
181
|
+
when Bytecode::Insns::JumpRange
|
182
|
+
no_match_label = :"no_match_#{insn.object_id}"
|
183
|
+
|
184
|
+
# Ensure we have a character we can read
|
185
|
+
cmp string_index, string_length
|
186
|
+
je label(no_match_label)
|
187
|
+
|
188
|
+
# Read the character into the character buffer
|
189
|
+
mov character_buffer, string_pointer
|
190
|
+
add character_buffer, string_index
|
191
|
+
mov character_buffer, m64(character_buffer)
|
192
|
+
|
193
|
+
# Compare the character buffer to the left hand side of the
|
194
|
+
# instruction's range, continue on to the next instruction if
|
195
|
+
# it's outside the range
|
196
|
+
cmp character_buffer, imm8(insn.left.ord)
|
197
|
+
jl label(no_match_label)
|
198
|
+
|
199
|
+
# Compare the character buffer to the right hand side of the
|
200
|
+
# instruction's range, continue on to the next instruction if
|
201
|
+
# it's outside the range
|
202
|
+
cmp character_buffer, imm8(insn.right.ord)
|
203
|
+
jg label(no_match_label)
|
204
|
+
|
205
|
+
# Move the string index forward and jump to the target
|
206
|
+
# instruction
|
207
|
+
inc string_index
|
208
|
+
jmp label(cfg.exit_map[insn.target].name)
|
209
|
+
|
210
|
+
make_label no_match_label
|
211
|
+
when Bytecode::Insns::JumpRangeInvert
|
212
|
+
no_match_label = :"no_match_#{insn.object_id}"
|
213
|
+
match_label = :"match_#{insn.object_id}"
|
214
|
+
|
215
|
+
# Ensure we have a character we can read
|
216
|
+
cmp string_index, string_length
|
217
|
+
je label(no_match_label)
|
218
|
+
|
219
|
+
# Read the character into the character buffer
|
220
|
+
mov character_buffer, string_pointer
|
221
|
+
add character_buffer, string_index
|
222
|
+
mov character_buffer, m64(character_buffer)
|
223
|
+
|
224
|
+
# Compare the character buffer to the left hand side of the
|
225
|
+
# instruction's range, jump down to the success case if it's
|
226
|
+
# outside the range
|
227
|
+
cmp character_buffer, imm8(insn.left.ord)
|
228
|
+
jl label(match_label)
|
229
|
+
|
230
|
+
# Compare the character buffer to the right hand side of the
|
231
|
+
# instruction's range, continue on to the next instruction if
|
232
|
+
# it's inside the range
|
233
|
+
cmp character_buffer, imm8(insn.right.ord)
|
234
|
+
jle label(no_match_label)
|
235
|
+
|
236
|
+
# Move the string index forward and jump to the target
|
237
|
+
# instruction
|
238
|
+
make_label match_label
|
239
|
+
inc string_index
|
240
|
+
jmp label(cfg.exit_map[insn.target].name)
|
241
|
+
|
242
|
+
make_label no_match_label
|
243
|
+
when Bytecode::Insns::Jump
|
244
|
+
jmp label(cfg.exit_map[insn.target].name)
|
245
|
+
when Bytecode::Insns::Match
|
246
|
+
# If we reach this instruction, then we've successfully matched
|
247
|
+
# against the input string, so we're going to return the integer
|
248
|
+
# that represents the index at which this match began
|
249
|
+
mov return_value, match_index
|
250
|
+
mov stack_pointer, frame_pointer
|
251
|
+
pop frame_pointer
|
252
|
+
ret
|
253
|
+
when Bytecode::Insns::Fail
|
254
|
+
inc match_index
|
255
|
+
jmp label(:start_loop_head)
|
256
|
+
else
|
257
|
+
raise
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
# If we reach this instruction, then we've failed to match at every
|
263
|
+
# possible index in the string, so we're going to return the length
|
264
|
+
# of the string + 1 so that the caller knows that this match failed
|
265
|
+
make_label :exit
|
266
|
+
mov return_value, string_length
|
267
|
+
inc return_value
|
268
|
+
|
269
|
+
# Here we make sure to clean up after ourselves by returning the frame
|
270
|
+
# pointer to its former position
|
271
|
+
mov stack_pointer, frame_pointer
|
272
|
+
pop frame_pointer
|
273
|
+
|
274
|
+
ret
|
275
|
+
end
|
276
|
+
|
277
|
+
Compiled.new(buffer)
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|