kompiler 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +201 -0
- data/bin/kompile +51 -0
- data/lib/kompiler/arch/armv8a/instructions.rb +490 -0
- data/lib/kompiler/arch/armv8a/load.rb +8 -0
- data/lib/kompiler/arch/armv8a/registers.rb +67 -0
- data/lib/kompiler/architecture.rb +30 -0
- data/lib/kompiler/compiler_functions.rb +262 -0
- data/lib/kompiler/directives.rb +167 -0
- data/lib/kompiler/mc_builder.rb +88 -0
- data/lib/kompiler/parsers.rb +508 -0
- data/lib/kompiler.rb +19 -0
- metadata +60 -0
@@ -0,0 +1,508 @@
|
|
1
|
+
# Copyright 2024 Kyrylo Shyshko
|
2
|
+
# Licensed under the Apache License, Version 2.0. See LICENSE file for details.
|
3
|
+
|
4
|
+
module Kompiler
|
5
|
+
|
6
|
+
class Parsers
|
7
|
+
|
8
|
+
def self.parse_str(code)
|
9
|
+
|
10
|
+
# Skip the "
|
11
|
+
i = 1
|
12
|
+
|
13
|
+
next_char_backslashed = false
|
14
|
+
|
15
|
+
str_content = ""
|
16
|
+
|
17
|
+
while true
|
18
|
+
if next_char_backslashed
|
19
|
+
if code[i] == "n"
|
20
|
+
str_content << "\n"
|
21
|
+
elsif code[i] == "r"
|
22
|
+
str_content << "\r"
|
23
|
+
elsif code[i] == "\\"
|
24
|
+
str_content << "\\"
|
25
|
+
else
|
26
|
+
str_content << "\\"
|
27
|
+
str_content << code[i]
|
28
|
+
end
|
29
|
+
next_char_backslashed = false
|
30
|
+
else
|
31
|
+
if code[i] == "\""
|
32
|
+
break
|
33
|
+
elsif code[i] == "\\"
|
34
|
+
next_char_backslashed = true
|
35
|
+
else
|
36
|
+
str_content << code[i]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
i += 1
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
return [str_content, i + 1]
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.get_code_lines(code)
|
48
|
+
|
49
|
+
lines = []
|
50
|
+
|
51
|
+
i = 0
|
52
|
+
|
53
|
+
curr_line = ""
|
54
|
+
|
55
|
+
# EOL - end of line
|
56
|
+
skip_to_eol = false
|
57
|
+
|
58
|
+
while i < code.size
|
59
|
+
|
60
|
+
if code[i] == "\n"
|
61
|
+
lines << curr_line
|
62
|
+
curr_line = ""
|
63
|
+
i += 1
|
64
|
+
skip_to_eol = false
|
65
|
+
elsif code[i] == "/" && code[i + 1] == "/"
|
66
|
+
skip_to_eol = true
|
67
|
+
i += 1
|
68
|
+
elsif code[i] == "\""
|
69
|
+
str_content, parse_size = parse_str(code[i..])
|
70
|
+
curr_line << code[i...(i+parse_size)] if !skip_to_eol
|
71
|
+
i += parse_size
|
72
|
+
else
|
73
|
+
curr_line << code[i] if !skip_to_eol
|
74
|
+
i += 1
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
if curr_line.size > 0
|
79
|
+
lines << curr_line
|
80
|
+
end
|
81
|
+
|
82
|
+
return lines
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
def self.check_register_operand(str)
|
87
|
+
Kompiler::Architecture.registers.each do |register|
|
88
|
+
return [true, register] if str == register[:reg_name]
|
89
|
+
end
|
90
|
+
return [false, nil]
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.check_binary_operand(str)
|
94
|
+
return [false, nil] if !str.start_with?("0b")
|
95
|
+
binary = str[2..]
|
96
|
+
|
97
|
+
# Check if the definition contains only 0 and 1
|
98
|
+
zero_or_one = binary.each_char.map{|c| ["0", "1"].include?(c)}
|
99
|
+
incorrect_definition = zero_or_one.include?(false)
|
100
|
+
|
101
|
+
return [false, nil] if incorrect_definition
|
102
|
+
|
103
|
+
binary.reverse!
|
104
|
+
|
105
|
+
binary_val = (0...(binary.size)).map{|i| binary[i].to_i * 2 ** i}.sum
|
106
|
+
return [true, binary_val]
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
def self.check_hex_operand(str)
|
111
|
+
return [false, nil] if !str.start_with?("0x")
|
112
|
+
hex = str[2..].downcase
|
113
|
+
|
114
|
+
# Check if the definition contains only 0-9 + a-f
|
115
|
+
valid_characters = ("0".."9").to_a + ("a".."f").to_a
|
116
|
+
is_hex_chars = hex.each_char.map{|c| valid_characters.include?(c)}
|
117
|
+
|
118
|
+
# Return false if not only hex characters
|
119
|
+
return [false, nil] if is_hex_chars.include?(false)
|
120
|
+
|
121
|
+
# Convert to hex with base 16
|
122
|
+
hex_value = hex.to_i(16)
|
123
|
+
|
124
|
+
return [true, hex_value]
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
def self.check_decimal_operand(str)
|
129
|
+
minus_sign = false
|
130
|
+
|
131
|
+
# Check if the string starts with a minus sign. If yes remove it and set minus_sign to true
|
132
|
+
if str[0] == '-'
|
133
|
+
minus_sign = true
|
134
|
+
str = str[1..]
|
135
|
+
end
|
136
|
+
|
137
|
+
only_numbers = !str.each_char.map{|c| ("0".."9").to_a.include?(c)}.include?(false)
|
138
|
+
return [false, nil] if !only_numbers
|
139
|
+
|
140
|
+
# If the minus sign is present, multiply the number part by -1
|
141
|
+
int = str.to_i
|
142
|
+
int *= -1 if minus_sign
|
143
|
+
|
144
|
+
return [true, int]
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
def self.check_immediate_operand(operand_str)
|
149
|
+
|
150
|
+
is_bin, bin_value = check_binary_operand(operand_str)
|
151
|
+
return [true, {type: "immediate", value: bin_value, def_type: "binary"}] if is_bin
|
152
|
+
|
153
|
+
is_decimal, decimal_value = check_decimal_operand(operand_str)
|
154
|
+
return [true, {type: "immediate", value: decimal_value, def_type: "decimal"}] if is_decimal
|
155
|
+
|
156
|
+
is_hex, hex_value = check_hex_operand(operand_str)
|
157
|
+
return [true, {type: "immediate", value: hex_value, def_type: "hex"}] if is_hex
|
158
|
+
|
159
|
+
return [false, nil]
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
def self.check_label_operand(str)
|
164
|
+
# If first character is a number, return false
|
165
|
+
return false if ("0".."9").to_a.include?(str[0])
|
166
|
+
|
167
|
+
# Check if it's only made up of allowed characters
|
168
|
+
allowed_chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["_"]
|
169
|
+
|
170
|
+
is_each_char_allowed = str.each_char.map{|c| allowed_chars.include?(c)}
|
171
|
+
|
172
|
+
# If some characters aren't allowed, return false
|
173
|
+
return false if is_each_char_allowed.include?(false)
|
174
|
+
|
175
|
+
# Return true if none of the checks returned false
|
176
|
+
return true
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
def self.parse_operand_str(operand_str)
|
181
|
+
|
182
|
+
|
183
|
+
# Check if the operand is a register
|
184
|
+
is_register, register = check_register_operand(operand_str)
|
185
|
+
return {type: "register", value: register, register: register} if is_register
|
186
|
+
|
187
|
+
# Check if the operand is a string
|
188
|
+
is_string = operand_str[0] == "\""
|
189
|
+
return {type: "string", value: operand_str[1...-1], string: operand_str[1...-1]} if is_string
|
190
|
+
|
191
|
+
# Checks if it's an immediate
|
192
|
+
is_immediate, immediate_val = check_immediate_operand(operand_str)
|
193
|
+
return immediate_val if is_immediate
|
194
|
+
|
195
|
+
|
196
|
+
#
|
197
|
+
# Check if it's a label
|
198
|
+
#
|
199
|
+
|
200
|
+
# The operand is a label if it doesn't start with a number and doesn't include spaces
|
201
|
+
is_label = check_label_operand(operand_str)
|
202
|
+
return {type: "label", value: operand_str} if is_label
|
203
|
+
|
204
|
+
|
205
|
+
# If no checks succeeded, return false
|
206
|
+
return false
|
207
|
+
end
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
|
212
|
+
def self.parse_instruction_line(line)
|
213
|
+
keyword = ""
|
214
|
+
i = 0
|
215
|
+
|
216
|
+
# Loop until a non-whitespace character
|
217
|
+
while i < line.size
|
218
|
+
break if ![" ", "\t"].include?(line[i])
|
219
|
+
i += 1
|
220
|
+
end
|
221
|
+
|
222
|
+
# Loop to get the keyword
|
223
|
+
loop do
|
224
|
+
# Exit out of the loop if the character is a whitespace
|
225
|
+
break if [" ", "\t"].include?(line[i]) || i >= line.size
|
226
|
+
# Add the character if not a whitespace
|
227
|
+
keyword << line[i]
|
228
|
+
# Proceed to the next character
|
229
|
+
i += 1
|
230
|
+
end
|
231
|
+
|
232
|
+
operand_strings = []
|
233
|
+
|
234
|
+
# Loop for operands
|
235
|
+
loop do
|
236
|
+
break if i >= line.size
|
237
|
+
|
238
|
+
# # Whitespace - skip
|
239
|
+
# if [" ", "\t"].include? line[i]
|
240
|
+
# i += 1
|
241
|
+
# next
|
242
|
+
# end
|
243
|
+
|
244
|
+
operand_content = ""
|
245
|
+
|
246
|
+
# Collect the operand's content until a comma or end of line
|
247
|
+
loop do
|
248
|
+
break if (i >= line.size)
|
249
|
+
|
250
|
+
# If the character is a comma, move to the next character and break out of the operand's content loop
|
251
|
+
if line[i] == ","
|
252
|
+
i += 1
|
253
|
+
break
|
254
|
+
end
|
255
|
+
|
256
|
+
# Skip whitespace
|
257
|
+
if [" ", "\t"].include? line[i]
|
258
|
+
i += 1
|
259
|
+
next
|
260
|
+
end
|
261
|
+
|
262
|
+
# If a string definition, parse to the end of the string
|
263
|
+
if line[i] == "\""
|
264
|
+
str_content, parsed_size = parse_str(line[i..])
|
265
|
+
operand_content += '"' + str_content + '"'
|
266
|
+
i += parsed_size
|
267
|
+
next
|
268
|
+
end
|
269
|
+
|
270
|
+
# Else just add the character to the operand content
|
271
|
+
operand_content += line[i]
|
272
|
+
|
273
|
+
# Move to the next character
|
274
|
+
i += 1
|
275
|
+
end
|
276
|
+
|
277
|
+
# After operand content was collected, add it to the list of operands
|
278
|
+
operand_strings << operand_content
|
279
|
+
end
|
280
|
+
|
281
|
+
# Parse operand strings into operand types and values
|
282
|
+
|
283
|
+
operands = []
|
284
|
+
|
285
|
+
operand_strings.each do |operand_str|
|
286
|
+
operand = parse_operand_str(operand_str)
|
287
|
+
return false if operand == false
|
288
|
+
operands << operand
|
289
|
+
end
|
290
|
+
|
291
|
+
return [keyword, operands]
|
292
|
+
end
|
293
|
+
|
294
|
+
|
295
|
+
|
296
|
+
|
297
|
+
|
298
|
+
# def self.parse_instruction_line(line)
|
299
|
+
# keyword = ""
|
300
|
+
# i = 0
|
301
|
+
#
|
302
|
+
# # Loop until a non-whitespace character
|
303
|
+
# while i < line.size
|
304
|
+
# break if ![" ", "\t"].include?(line[i])
|
305
|
+
# i += 1
|
306
|
+
# end
|
307
|
+
#
|
308
|
+
# # Loop to get the keyword
|
309
|
+
# loop do
|
310
|
+
# # Exit out of the loop if the character is a whitespace
|
311
|
+
# break if [" ", "\t"].include?(line[i]) || i >= line.size
|
312
|
+
# # Add the character if not a whitespace
|
313
|
+
# keyword << line[i]
|
314
|
+
# # Proceed to the next character
|
315
|
+
# i += 1
|
316
|
+
# end
|
317
|
+
#
|
318
|
+
# operand_strings = []
|
319
|
+
#
|
320
|
+
# # Loop for operands
|
321
|
+
# loop do
|
322
|
+
# break if i >= line.size
|
323
|
+
#
|
324
|
+
# # Whitespace - skip
|
325
|
+
# if [" ", "\t"].include? line[i]
|
326
|
+
# i += 1
|
327
|
+
# next
|
328
|
+
# end
|
329
|
+
#
|
330
|
+
# # If a string operand - parse the string
|
331
|
+
# if line[i] == "\""
|
332
|
+
#
|
333
|
+
# str_content, parsed_size = parse_str(line[i..])
|
334
|
+
# operand_strings << line[i...(i + parsed_size)]
|
335
|
+
# i += parsed_size
|
336
|
+
#
|
337
|
+
# # If anything else - parse until whitespace, comma or end of line
|
338
|
+
# else
|
339
|
+
# content = ""
|
340
|
+
#
|
341
|
+
# while i < line.size
|
342
|
+
# break if [" ", ","].include? line[i]
|
343
|
+
# content << line[i]
|
344
|
+
# i += 1
|
345
|
+
# end
|
346
|
+
#
|
347
|
+
# operand_strings << content
|
348
|
+
# end
|
349
|
+
#
|
350
|
+
#
|
351
|
+
# # After operand parsed
|
352
|
+
# # Loop to meet a comma or end of line
|
353
|
+
# # Give error if stuff except whitespace
|
354
|
+
#
|
355
|
+
# while i < line.size
|
356
|
+
# # If comma, move to next character and repeat the bigger operand loop
|
357
|
+
# if line[i] == ","
|
358
|
+
# i += 1
|
359
|
+
# break
|
360
|
+
# end
|
361
|
+
# # If non-whitespace, raise an error
|
362
|
+
# # raise "Error: Unparsed content - exiting" if ![" ", "\t"].include?(line[i])
|
363
|
+
# return false if ![" ", "\t"].include?(line[i])
|
364
|
+
# i += 1
|
365
|
+
# end
|
366
|
+
# end
|
367
|
+
#
|
368
|
+
# # If end of line not reached, return an error
|
369
|
+
# if i != line.size
|
370
|
+
# return false
|
371
|
+
# end
|
372
|
+
#
|
373
|
+
#
|
374
|
+
# # Parse operand strings into operand types and values
|
375
|
+
#
|
376
|
+
# operands = []
|
377
|
+
#
|
378
|
+
# operand_strings.each do |operand_str|
|
379
|
+
# operand = parse_operand_str(operand_str)
|
380
|
+
# return false if operand == false
|
381
|
+
# operands << operand
|
382
|
+
# end
|
383
|
+
#
|
384
|
+
# return [keyword, operands]
|
385
|
+
# end
|
386
|
+
|
387
|
+
|
388
|
+
|
389
|
+
def self.check_operand_match(operand_description, operand)
|
390
|
+
|
391
|
+
# If operand type doesn't not match, return false
|
392
|
+
return false if operand[:type] != operand_description[:type]
|
393
|
+
|
394
|
+
# If no operand restrictions, return true
|
395
|
+
return true if !operand_description.keys.include?(:restrictions)
|
396
|
+
|
397
|
+
case operand_description[:type]
|
398
|
+
when "register"
|
399
|
+
|
400
|
+
# Check register type match
|
401
|
+
if operand_description[:restrictions].keys.include?(:reg_type)
|
402
|
+
return false if operand[:register][:reg_type] != operand_description[:restrictions][:reg_type]
|
403
|
+
end
|
404
|
+
|
405
|
+
# Check register size match
|
406
|
+
if operand_description[:restrictions].keys.include?(:reg_size)
|
407
|
+
return false if operand[:register][:reg_size] != operand_description[:restrictions][:reg_size]
|
408
|
+
end
|
409
|
+
|
410
|
+
when "immediate"
|
411
|
+
|
412
|
+
|
413
|
+
|
414
|
+
when "label"
|
415
|
+
|
416
|
+
|
417
|
+
|
418
|
+
end
|
419
|
+
|
420
|
+
|
421
|
+
# If the restrictions match (by not returning a negative answer), return true
|
422
|
+
return true
|
423
|
+
end
|
424
|
+
|
425
|
+
|
426
|
+
# Returns array of [status, operands]
|
427
|
+
# If status = false, operands = nil; otherwise, status = true, operands = instruction operands
|
428
|
+
def self.match_instruction(line, instruction)
|
429
|
+
|
430
|
+
keyword, operands = parse_instruction_line(line)
|
431
|
+
|
432
|
+
|
433
|
+
# Check if the keyword matches
|
434
|
+
if instruction[:keyword] != keyword
|
435
|
+
return [false, nil]
|
436
|
+
end
|
437
|
+
|
438
|
+
# Check if there's the right amount of operands
|
439
|
+
if operands.size != instruction[:operands].size
|
440
|
+
return [false, nil]
|
441
|
+
end
|
442
|
+
|
443
|
+
# Check if operands match descriptions
|
444
|
+
operands.zip(instruction[:operands]).each do |operand, operand_description|
|
445
|
+
return [false, nil] if !check_operand_match(operand_description, operand)
|
446
|
+
end
|
447
|
+
|
448
|
+
return [true, operands]
|
449
|
+
end
|
450
|
+
|
451
|
+
|
452
|
+
|
453
|
+
def self.check_instruction(line)
|
454
|
+
|
455
|
+
instruction = nil
|
456
|
+
operands = nil
|
457
|
+
|
458
|
+
Kompiler::Architecture.instructions.each do |curr_instruction|
|
459
|
+
# If the instruction matches - break
|
460
|
+
status, curr_operands = match_instruction(line, curr_instruction)
|
461
|
+
if status == true
|
462
|
+
instruction = curr_instruction
|
463
|
+
operands = curr_operands
|
464
|
+
break
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
if instruction != nil
|
469
|
+
return [true, {instruction: instruction, operands: operands}]
|
470
|
+
else
|
471
|
+
return [false, nil]
|
472
|
+
end
|
473
|
+
end
|
474
|
+
|
475
|
+
|
476
|
+
|
477
|
+
def self.check_directive(line)
|
478
|
+
status = parse_instruction_line(line)
|
479
|
+
|
480
|
+
return [false, nil] if status == false
|
481
|
+
|
482
|
+
keyword, operands = status
|
483
|
+
|
484
|
+
if keyword[0] == "."
|
485
|
+
keyword = keyword[1..]
|
486
|
+
end
|
487
|
+
|
488
|
+
directive = nil
|
489
|
+
|
490
|
+
Kompiler::Directives.directives.each do |curr_directive|
|
491
|
+
if curr_directive[:keyword] == keyword
|
492
|
+
directive = curr_directive
|
493
|
+
break
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
497
|
+
if directive == nil
|
498
|
+
return [false, nil]
|
499
|
+
else
|
500
|
+
return [true, {directive: directive, operands: operands}]
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
|
505
|
+
end # End Kompiler::Parsers
|
506
|
+
|
507
|
+
|
508
|
+
end # End Kompiler
|
data/lib/kompiler.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# Copyright 2024 Kyrylo Shyshko
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
require 'kompiler/mc_builder.rb'
|
16
|
+
require 'kompiler/parsers.rb'
|
17
|
+
require 'kompiler/compiler_functions.rb'
|
18
|
+
require 'kompiler/architecture.rb'
|
19
|
+
require 'kompiler/directives.rb'
|
metadata
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: kompiler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kyryl Shyshko
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: 'Kompiler is a low-level, modular and extendable compiler for any architecture.
|
14
|
+
By default Kompiler supports ARMv8-a, but other architecture extensions can be downloaded
|
15
|
+
in the future.
|
16
|
+
|
17
|
+
'
|
18
|
+
email: kyryloshy@gmail.com
|
19
|
+
executables:
|
20
|
+
- kompile
|
21
|
+
extensions: []
|
22
|
+
extra_rdoc_files: []
|
23
|
+
files:
|
24
|
+
- LICENSE
|
25
|
+
- bin/kompile
|
26
|
+
- lib/kompiler.rb
|
27
|
+
- lib/kompiler/arch/armv8a/instructions.rb
|
28
|
+
- lib/kompiler/arch/armv8a/load.rb
|
29
|
+
- lib/kompiler/arch/armv8a/registers.rb
|
30
|
+
- lib/kompiler/architecture.rb
|
31
|
+
- lib/kompiler/compiler_functions.rb
|
32
|
+
- lib/kompiler/directives.rb
|
33
|
+
- lib/kompiler/mc_builder.rb
|
34
|
+
- lib/kompiler/parsers.rb
|
35
|
+
homepage: https://github.com/kyryloshy/kompiler
|
36
|
+
licenses:
|
37
|
+
- Apache-2.0
|
38
|
+
metadata:
|
39
|
+
source_code_uri: https://github.com/kyryloshy/kompiler
|
40
|
+
bug_tracker_uri: https://github.com/kyryloshy/kompiler/issues
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 3.0.0
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
requirements: []
|
56
|
+
rubygems_version: 3.5.10
|
57
|
+
signing_key:
|
58
|
+
specification_version: 4
|
59
|
+
summary: Kir's compiler for low-level machine code
|
60
|
+
test_files: []
|