regular_expression 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/main.yml +36 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +42 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +76 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +69 -0
- data/LICENSE +21 -0
- data/README.md +51 -0
- data/Rakefile +18 -0
- data/bin/console +8 -0
- data/bin/parse +50 -0
- data/build/.gitignore +1 -0
- data/lib/regular_expression.rb +19 -0
- data/lib/regular_expression/ast.rb +364 -0
- data/lib/regular_expression/bytecode.rb +189 -0
- data/lib/regular_expression/cfg.rb +154 -0
- data/lib/regular_expression/compiler/ruby.rb +104 -0
- data/lib/regular_expression/compiler/x86.rb +281 -0
- data/lib/regular_expression/interpreter.rb +92 -0
- data/lib/regular_expression/lexer.rb +53 -0
- data/lib/regular_expression/nfa.rb +118 -0
- data/lib/regular_expression/parser.rb +399 -0
- data/lib/regular_expression/parser.y +96 -0
- data/lib/regular_expression/pattern.rb +23 -0
- data/lib/regular_expression/version.rb +5 -0
- data/regular_expression.gemspec +25 -0
- metadata +99 -0
data/bin/parse
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
$:.unshift(File.expand_path("../lib", __dir__))
|
5
|
+
require "regular_expression"
|
6
|
+
require "crabstone"
|
7
|
+
require "graphviz"
|
8
|
+
|
9
|
+
# Pass the source through the various parsing phases
|
10
|
+
source = ARGV.shift
|
11
|
+
ast = RegularExpression::Parser.new.parse(source)
|
12
|
+
nfa = ast.to_nfa
|
13
|
+
bytecode = RegularExpression::Bytecode.compile(nfa)
|
14
|
+
|
15
|
+
# Compile the graph into various outputs
|
16
|
+
cfg = RegularExpression::CFG.build(bytecode)
|
17
|
+
ruby = RegularExpression::Compiler::Ruby.compile(cfg)
|
18
|
+
x86 = RegularExpression::Compiler::X86.compile(cfg)
|
19
|
+
|
20
|
+
# Make sure we get some nice dot output
|
21
|
+
RegularExpression::AST.to_dot(ast)
|
22
|
+
RegularExpression::NFA.to_dot(nfa)
|
23
|
+
RegularExpression::CFG.to_dot(cfg)
|
24
|
+
|
25
|
+
# Dump out the bytecode and cfg to strings
|
26
|
+
puts "#{bytecode.dump}\n"
|
27
|
+
puts "#{cfg.dump}\n"
|
28
|
+
|
29
|
+
check =
|
30
|
+
if ARGV.any?
|
31
|
+
lambda do |compiled|
|
32
|
+
checker = compiled.to_proc
|
33
|
+
ARGV.each { |string| puts "#{string}: #{checker.call(string)}" }
|
34
|
+
puts
|
35
|
+
end
|
36
|
+
else
|
37
|
+
->(_compiled) {}
|
38
|
+
end
|
39
|
+
|
40
|
+
# Test the interpreter against any passed strings
|
41
|
+
interpreter = RegularExpression::Interpreter.new(bytecode)
|
42
|
+
check.call(interpreter)
|
43
|
+
|
44
|
+
# Dump out the compiled ruby source and match against any passed values
|
45
|
+
puts "#{ruby.source}\n"
|
46
|
+
check.call(ruby)
|
47
|
+
|
48
|
+
# Dump out the diassembled x86 source and match against any passed values
|
49
|
+
puts "#{x86.disasm}\n"
|
50
|
+
check.call(x86)
|
data/build/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
*.svg
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "fisk"
|
4
|
+
require "fisk/helpers"
|
5
|
+
require "set"
|
6
|
+
require "stringio"
|
7
|
+
|
8
|
+
require_relative "./regular_expression/ast"
|
9
|
+
require_relative "./regular_expression/bytecode"
|
10
|
+
require_relative "./regular_expression/cfg"
|
11
|
+
require_relative "./regular_expression/interpreter"
|
12
|
+
require_relative "./regular_expression/lexer"
|
13
|
+
require_relative "./regular_expression/nfa"
|
14
|
+
require_relative "./regular_expression/parser"
|
15
|
+
require_relative "./regular_expression/pattern"
|
16
|
+
require_relative "./regular_expression/version"
|
17
|
+
|
18
|
+
require_relative "./regular_expression/compiler/ruby"
|
19
|
+
require_relative "./regular_expression/compiler/x86"
|
@@ -0,0 +1,364 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RegularExpression
|
4
|
+
module AST
|
5
|
+
def self.to_dot(root)
|
6
|
+
graph = Graphviz::Graph.new
|
7
|
+
root.to_dot(graph)
|
8
|
+
|
9
|
+
Graphviz.output(graph, path: "build/ast.svg", format: "svg")
|
10
|
+
graph.to_dot
|
11
|
+
end
|
12
|
+
|
13
|
+
class Root
|
14
|
+
attr_reader :expressions # Array[Expression]
|
15
|
+
attr_reader :at_start # bool
|
16
|
+
|
17
|
+
def initialize(expressions, at_start: false)
|
18
|
+
@expressions = expressions
|
19
|
+
@at_start = at_start
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_dot(graph)
|
23
|
+
label = "Root"
|
24
|
+
label = "#{label} (at start)" if at_start
|
25
|
+
|
26
|
+
node = graph.add_node(object_id, label: label)
|
27
|
+
expressions.each { |expression| expression.to_dot(node) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_nfa
|
31
|
+
start = NFA::StartState.new
|
32
|
+
current = start
|
33
|
+
|
34
|
+
if at_start
|
35
|
+
current = NFA::State.new
|
36
|
+
start.add_transition(NFA::Transition::BeginAnchor.new(current))
|
37
|
+
end
|
38
|
+
|
39
|
+
finish = NFA::FinishState.new
|
40
|
+
expressions.each do |expression|
|
41
|
+
expression.to_nfa(current, finish)
|
42
|
+
end
|
43
|
+
|
44
|
+
start
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class Expression
|
49
|
+
attr_reader :items # Group | Match | Anchor
|
50
|
+
|
51
|
+
def initialize(items)
|
52
|
+
@items = items
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_dot(parent)
|
56
|
+
node = parent.add_node(object_id, label: "Expression")
|
57
|
+
|
58
|
+
items.each { |item| item.to_dot(node) }
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_nfa(start, finish)
|
62
|
+
inner = Array.new(items.length - 1) { NFA::State.new }
|
63
|
+
states = [start, *inner, finish]
|
64
|
+
|
65
|
+
items.each_with_index do |item, index|
|
66
|
+
item.to_nfa(states[index], states[index + 1])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
class Group
|
72
|
+
attr_reader :expressions # Array[Expression]
|
73
|
+
attr_reader :quantifier # Quantifier
|
74
|
+
|
75
|
+
def initialize(expressions, quantifier: Quantifier::Once.new)
|
76
|
+
@expressions = expressions
|
77
|
+
@quantifier = quantifier
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_dot(parent)
|
81
|
+
node = parent.add_node(object_id, label: "Group")
|
82
|
+
|
83
|
+
expressions.each { |expression| expression.to_dot(node) }
|
84
|
+
quantifier.to_dot(node)
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_nfa(start, finish)
|
88
|
+
quantifier.quantify(start, finish) do |qstart, qfinish|
|
89
|
+
expressions.each { |expression| expression.to_nfa(qstart, qfinish) }
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class Match
|
95
|
+
attr_reader :item # CharacterGroup | CharacterClass | Character | Period
|
96
|
+
attr_reader :quantifier # Quantifier
|
97
|
+
|
98
|
+
def initialize(item, quantifier: Quantifier::Once.new)
|
99
|
+
@item = item
|
100
|
+
@quantifier = quantifier
|
101
|
+
end
|
102
|
+
|
103
|
+
def to_dot(parent)
|
104
|
+
node = parent.add_node(object_id, label: "Match")
|
105
|
+
|
106
|
+
item.to_dot(node)
|
107
|
+
quantifier.to_dot(node)
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_nfa(start, finish)
|
111
|
+
quantifier.quantify(start, finish) do |qstart, qfinish|
|
112
|
+
item.to_nfa(qstart, qfinish)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class CharacterGroup
|
118
|
+
attr_reader :items # Array[CharacterRange | Character]
|
119
|
+
attr_reader :invert # bool
|
120
|
+
|
121
|
+
def initialize(items, invert: false)
|
122
|
+
@items = items
|
123
|
+
@invert = invert
|
124
|
+
end
|
125
|
+
|
126
|
+
def to_dot(parent)
|
127
|
+
label = "CharacterGroup"
|
128
|
+
label = "#{label} (invert)" if invert
|
129
|
+
|
130
|
+
node = parent.add_node(object_id, label: label)
|
131
|
+
items.each { |item| item.to_dot(node) }
|
132
|
+
end
|
133
|
+
|
134
|
+
def to_nfa(start, finish)
|
135
|
+
if invert
|
136
|
+
transition = NFA::Transition::Invert.new(finish, items.flat_map(&:to_nfa_values).sort)
|
137
|
+
start.add_transition(transition)
|
138
|
+
else
|
139
|
+
items.each do |item|
|
140
|
+
item.to_nfa(start, finish)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class CharacterClass
|
147
|
+
attr_reader :value # "\w" | "\W" | "\d" | "\D"
|
148
|
+
|
149
|
+
def initialize(value)
|
150
|
+
@value = value
|
151
|
+
end
|
152
|
+
|
153
|
+
def to_dot(parent)
|
154
|
+
parent.add_node(object_id, label: value, shape: "box")
|
155
|
+
end
|
156
|
+
|
157
|
+
def to_nfa(start, finish)
|
158
|
+
case value
|
159
|
+
when "\\w"
|
160
|
+
start.add_transition(NFA::Transition::Range.new(finish, "a", "z"))
|
161
|
+
start.add_transition(NFA::Transition::Range.new(finish, "A", "Z"))
|
162
|
+
start.add_transition(NFA::Transition::Range.new(finish, "0", "9"))
|
163
|
+
start.add_transition(NFA::Transition::Value.new(finish, "_"))
|
164
|
+
when "\\W"
|
165
|
+
start.add_transition(NFA::Transition::Invert.new(finish, [*("a".."z"), *("A".."Z"), *("0".."9"), "_"]))
|
166
|
+
when "\\d"
|
167
|
+
start.add_transition(NFA::Transition::Range.new(finish, "0", "9"))
|
168
|
+
when "\\D"
|
169
|
+
start.add_transition(NFA::Transition::Range.new(finish, "0", "9", invert: true))
|
170
|
+
else
|
171
|
+
raise
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
class Character
|
177
|
+
attr_reader :value # String
|
178
|
+
|
179
|
+
def initialize(value)
|
180
|
+
@value = value
|
181
|
+
end
|
182
|
+
|
183
|
+
def to_dot(parent)
|
184
|
+
parent.add_node(object_id, label: value, shape: "box")
|
185
|
+
end
|
186
|
+
|
187
|
+
def to_nfa_values
|
188
|
+
[value]
|
189
|
+
end
|
190
|
+
|
191
|
+
def to_nfa(start, finish)
|
192
|
+
start.add_transition(NFA::Transition::Value.new(finish, value))
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
class Period
|
197
|
+
def to_dot(parent)
|
198
|
+
parent.add_node(object_id, label: ".", shape: "box")
|
199
|
+
end
|
200
|
+
|
201
|
+
def to_nfa(start, finish)
|
202
|
+
transition = NFA::Transition::Any.new(finish)
|
203
|
+
start.add_transition(transition)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
class CharacterRange
|
208
|
+
attr_reader :left, :right # String
|
209
|
+
|
210
|
+
def initialize(left, right)
|
211
|
+
@left = left
|
212
|
+
@right = right
|
213
|
+
end
|
214
|
+
|
215
|
+
def to_dot(parent)
|
216
|
+
parent.add_node(object_id, label: "#{left}-#{right}", shape: "box")
|
217
|
+
end
|
218
|
+
|
219
|
+
def to_nfa_values
|
220
|
+
(left..right).to_a
|
221
|
+
end
|
222
|
+
|
223
|
+
def to_nfa(start, finish)
|
224
|
+
transition = NFA::Transition::Range.new(finish, left, right)
|
225
|
+
start.add_transition(transition)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
class Anchor
|
230
|
+
attr_reader :value # "\A" | "\z" | "$"
|
231
|
+
|
232
|
+
def initialize(value)
|
233
|
+
@value = value
|
234
|
+
end
|
235
|
+
|
236
|
+
def to_dot(parent)
|
237
|
+
parent.add_node(object_id, label: value, shape: "box")
|
238
|
+
end
|
239
|
+
|
240
|
+
def to_nfa(start, finish)
|
241
|
+
transition =
|
242
|
+
case value
|
243
|
+
when "\\A"
|
244
|
+
NFA::Transition::BeginAnchor.new(finish)
|
245
|
+
when "\\z", "$"
|
246
|
+
NFA::Transition::EndAnchor.new(finish)
|
247
|
+
end
|
248
|
+
|
249
|
+
start.add_transition(transition)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
module Quantifier
|
254
|
+
class Once
|
255
|
+
def to_dot(parent); end
|
256
|
+
|
257
|
+
def quantify(start, finish)
|
258
|
+
yield start, finish
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
class ZeroOrMore
|
263
|
+
def to_dot(parent)
|
264
|
+
parent.add_node(object_id, label: "*", shape: "box")
|
265
|
+
end
|
266
|
+
|
267
|
+
def quantify(start, finish)
|
268
|
+
yield start, start
|
269
|
+
start.add_transition(NFA::Transition::Epsilon.new(finish))
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
class OneOrMore
|
274
|
+
def to_dot(parent)
|
275
|
+
parent.add_node(object_id, label: "+", shape: "box")
|
276
|
+
end
|
277
|
+
|
278
|
+
def quantify(start, finish)
|
279
|
+
yield start, finish
|
280
|
+
finish.add_transition(NFA::Transition::Epsilon.new(start))
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
class Optional
|
285
|
+
def to_dot(parent)
|
286
|
+
parent.add_node(object_id, label: "?", shape: "box")
|
287
|
+
end
|
288
|
+
|
289
|
+
def quantify(start, finish)
|
290
|
+
yield start, finish
|
291
|
+
start.add_transition(NFA::Transition::Epsilon.new(finish))
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
class Exact
|
296
|
+
attr_reader :value # Integer
|
297
|
+
|
298
|
+
def initialize(value)
|
299
|
+
@value = value
|
300
|
+
end
|
301
|
+
|
302
|
+
def to_dot(parent)
|
303
|
+
parent.add_node(object_id, label: "{#{value}}", shape: "box")
|
304
|
+
end
|
305
|
+
|
306
|
+
def quantify(start, finish)
|
307
|
+
states = [start, *(value - 1).times.map { NFA::State.new }, finish]
|
308
|
+
|
309
|
+
value.times do |index|
|
310
|
+
yield states[index], states[index + 1]
|
311
|
+
end
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
class AtLeast
|
316
|
+
attr_reader :value # Integer
|
317
|
+
|
318
|
+
def initialize(value)
|
319
|
+
@value = value
|
320
|
+
end
|
321
|
+
|
322
|
+
def to_dot(parent)
|
323
|
+
parent.add_node(object_id, label: "{#{value},}", shape: "box")
|
324
|
+
end
|
325
|
+
|
326
|
+
def quantify(start, finish)
|
327
|
+
states = [start, *(value - 1).times.map { NFA::State.new }, finish]
|
328
|
+
|
329
|
+
value.times do |index|
|
330
|
+
yield states[index], states[index + 1]
|
331
|
+
end
|
332
|
+
|
333
|
+
states[-1].add_transition(NFA::Transition::Epsilon.new(states[-2]))
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
class Range
|
338
|
+
attr_reader :lower, :upper # Integer
|
339
|
+
|
340
|
+
def initialize(lower, upper)
|
341
|
+
@lower = lower
|
342
|
+
@upper = upper
|
343
|
+
end
|
344
|
+
|
345
|
+
def to_dot(parent)
|
346
|
+
parent.add_node(object_id, label: "{#{lower},#{upper}}", shape: "box")
|
347
|
+
end
|
348
|
+
|
349
|
+
def quantify(start, finish)
|
350
|
+
states = [start, *(upper - 1).times.map { NFA::State.new }, finish]
|
351
|
+
|
352
|
+
upper.times do |index|
|
353
|
+
yield states[index], states[index + 1]
|
354
|
+
end
|
355
|
+
|
356
|
+
(upper - lower).times do |index|
|
357
|
+
transition = NFA::Transition::Epsilon.new(states[-1])
|
358
|
+
states[lower + index].add_transition(transition)
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
end
|
364
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RegularExpression
|
4
|
+
# The bytecode module defines instructions, and has a compiled object for
|
5
|
+
# storing a stream of them, and a builder object for creating the compiled
|
6
|
+
# object.
|
7
|
+
module Bytecode
|
8
|
+
# Never recurse a graph in a compiler! We don't know how deep it is and
|
9
|
+
# don't want to limit how large a program we can accept due to arbitrary
|
10
|
+
# stack space. Always use a worklist.
|
11
|
+
def self.compile(nfa)
|
12
|
+
builder = Builder.new
|
13
|
+
label = ->(state, index = 0) { :"state_#{state.object_id}_#{index}" }
|
14
|
+
|
15
|
+
visited = Set.new
|
16
|
+
worklist = [[nfa, [Insns::Jump.new(:fail)]]]
|
17
|
+
|
18
|
+
# For each state in the NFA.
|
19
|
+
until worklist.empty?
|
20
|
+
state, fallback = worklist.pop
|
21
|
+
next if visited.include?(state)
|
22
|
+
|
23
|
+
# Label the start of the state.
|
24
|
+
builder.mark_label(label[state])
|
25
|
+
visited.add(state)
|
26
|
+
|
27
|
+
if state.is_a?(NFA::FinishState)
|
28
|
+
builder.push(Insns::Match.new)
|
29
|
+
next
|
30
|
+
end
|
31
|
+
|
32
|
+
# Other states have transitions out of them. Go through each
|
33
|
+
# transition.
|
34
|
+
state.transitions.each_with_index do |transition, index|
|
35
|
+
builder.mark_label(label[state, index])
|
36
|
+
|
37
|
+
if state.transitions.length > 1 && index != state.transitions.length - 1
|
38
|
+
builder.push(Insns::PushIndex.new)
|
39
|
+
end
|
40
|
+
|
41
|
+
case transition
|
42
|
+
when NFA::Transition::BeginAnchor
|
43
|
+
builder.push(Insns::GuardBegin.new(label[transition.state]))
|
44
|
+
when NFA::Transition::EndAnchor
|
45
|
+
builder.push(Insns::GuardEnd.new(label[transition.state]))
|
46
|
+
when NFA::Transition::Any
|
47
|
+
builder.push(Insns::JumpAny.new(label[transition.state]))
|
48
|
+
when NFA::Transition::Value
|
49
|
+
builder.push(Insns::JumpValue.new(transition.value, label[transition.state]))
|
50
|
+
when NFA::Transition::Invert
|
51
|
+
builder.push(Insns::JumpValuesInvert.new(transition.values, label[transition.state]))
|
52
|
+
when NFA::Transition::Range
|
53
|
+
if transition.invert
|
54
|
+
builder.push(Insns::JumpRangeInvert.new(transition.left, transition.right, label[transition.state]))
|
55
|
+
else
|
56
|
+
builder.push(Insns::JumpRange.new(transition.left, transition.right, label[transition.state]))
|
57
|
+
end
|
58
|
+
when NFA::Transition::Epsilon
|
59
|
+
builder.push(Insns::Jump.new(label[transition.state]))
|
60
|
+
else
|
61
|
+
raise
|
62
|
+
end
|
63
|
+
|
64
|
+
next_fallback =
|
65
|
+
if state.transitions.length > 1 && index != state.transitions.length - 1
|
66
|
+
[Insns::PopIndex.new, Insns::Jump.new(label[state, index + 1])]
|
67
|
+
else
|
68
|
+
fallback
|
69
|
+
end
|
70
|
+
|
71
|
+
worklist.push([transition.state, next_fallback])
|
72
|
+
end
|
73
|
+
|
74
|
+
# If we don't have one of the transitions that always executes, then we
|
75
|
+
# need to add the fallback to the output for this state.
|
76
|
+
if state.transitions.none? { |t| t.is_a?(NFA::Transition::BeginAnchor) || t.is_a?(NFA::Transition::Epsilon) }
|
77
|
+
builder.push(*fallback)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# We always have a failure case - it's just the failure instruction.
|
82
|
+
builder.mark_label(:fail)
|
83
|
+
builder.push(Insns::Fail.new)
|
84
|
+
builder.build
|
85
|
+
end
|
86
|
+
|
87
|
+
module Insns
|
88
|
+
# Push the current string index onto the stack. This is necessary to
|
89
|
+
# support backtracking so that we can pop it off later when we want to go
|
90
|
+
# backward.
|
91
|
+
PushIndex = Class.new
|
92
|
+
|
93
|
+
# Pop the string index off the stack. This is necessary so that we can
|
94
|
+
# support backtracking.
|
95
|
+
PopIndex = Class.new
|
96
|
+
|
97
|
+
# If we're at the beginning of the string, then jump to the then
|
98
|
+
# instruction. Otherwise fail the entire match.
|
99
|
+
GuardBegin = Struct.new(:guarded)
|
100
|
+
|
101
|
+
# If we're at the end of the string, then jump to the then instruction.
|
102
|
+
# Otherwise fail the match at the current index.
|
103
|
+
GuardEnd = Struct.new(:guarded)
|
104
|
+
|
105
|
+
# If it's possible to read a character off the input, then do so and jump
|
106
|
+
# to the target instruction.
|
107
|
+
JumpAny = Struct.new(:target)
|
108
|
+
|
109
|
+
# If it's possible to read a character off the input and that character
|
110
|
+
# matches the char value, then do so and jump to the target instruction.
|
111
|
+
JumpValue = Struct.new(:char, :target)
|
112
|
+
|
113
|
+
# If it's possible to read a character off the input and that character is
|
114
|
+
# not contained within the list of values, then do so and jump to the
|
115
|
+
# target instruction.
|
116
|
+
JumpValuesInvert = Struct.new(:chars, :target)
|
117
|
+
|
118
|
+
# If it's possible to read a character off the input and that character is
|
119
|
+
# within the range of possible values, then do so and jump to the target
|
120
|
+
# instruction.
|
121
|
+
JumpRange = Struct.new(:left, :right, :target)
|
122
|
+
|
123
|
+
# If it's possible to read a character off the input and that character is
|
124
|
+
# not within the range of possible values, then do so and jump to the
|
125
|
+
# target instruction.
|
126
|
+
JumpRangeInvert = Struct.new(:left, :right, :target)
|
127
|
+
|
128
|
+
# Jump directly to the target instruction.
|
129
|
+
Jump = Struct.new(:target)
|
130
|
+
|
131
|
+
# Successfully match the string and stop executing instructions.
|
132
|
+
Match = Class.new
|
133
|
+
|
134
|
+
# Fail to match the string at the current index. Increment the starting
|
135
|
+
# index and try again if possible.
|
136
|
+
Fail = Class.new
|
137
|
+
end
|
138
|
+
|
139
|
+
class Builder
|
140
|
+
attr_reader :insns # Array[Insns]
|
141
|
+
attr_reader :labels # Hash[Symbol, Integer]
|
142
|
+
|
143
|
+
def initialize
|
144
|
+
@insns = []
|
145
|
+
@labels = {}
|
146
|
+
end
|
147
|
+
|
148
|
+
def mark_label(label)
|
149
|
+
labels[label] = insns.size
|
150
|
+
end
|
151
|
+
|
152
|
+
def push(*new_insns)
|
153
|
+
insns.push(*new_insns)
|
154
|
+
end
|
155
|
+
|
156
|
+
def build
|
157
|
+
Compiled.new(insns, labels)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class Compiled
|
162
|
+
attr_reader :insns, :labels
|
163
|
+
|
164
|
+
def initialize(insns, labels)
|
165
|
+
@insns = insns
|
166
|
+
@labels = labels
|
167
|
+
end
|
168
|
+
|
169
|
+
def dump
|
170
|
+
output = StringIO.new
|
171
|
+
|
172
|
+
# Labels store name -> address, but if we want to print the label name
|
173
|
+
# at its address, we need to store the address to the name as well.
|
174
|
+
reverse_labels = {}
|
175
|
+
labels.each do |label, n|
|
176
|
+
reverse_labels[n] = label
|
177
|
+
end
|
178
|
+
|
179
|
+
insns.each_with_index do |insn, n|
|
180
|
+
label = reverse_labels[n]
|
181
|
+
output.puts("#{label}:") if label
|
182
|
+
output.puts(" #{insn}")
|
183
|
+
end
|
184
|
+
|
185
|
+
output.string
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|