kompiler 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,508 @@
1
+ # Copyright 2024 Kyrylo Shyshko
2
+ # Licensed under the Apache License, Version 2.0. See LICENSE file for details.
3
+
4
+ module Kompiler
5
+
6
+ class Parsers
7
+
8
+ def self.parse_str(code)
9
+
10
+ # Skip the "
11
+ i = 1
12
+
13
+ next_char_backslashed = false
14
+
15
+ str_content = ""
16
+
17
+ while true
18
+ if next_char_backslashed
19
+ if code[i] == "n"
20
+ str_content << "\n"
21
+ elsif code[i] == "r"
22
+ str_content << "\r"
23
+ elsif code[i] == "\\"
24
+ str_content << "\\"
25
+ else
26
+ str_content << "\\"
27
+ str_content << code[i]
28
+ end
29
+ next_char_backslashed = false
30
+ else
31
+ if code[i] == "\""
32
+ break
33
+ elsif code[i] == "\\"
34
+ next_char_backslashed = true
35
+ else
36
+ str_content << code[i]
37
+ end
38
+ end
39
+
40
+ i += 1
41
+ end
42
+
43
+
44
+ return [str_content, i + 1]
45
+ end
46
+
47
+ def self.get_code_lines(code)
48
+
49
+ lines = []
50
+
51
+ i = 0
52
+
53
+ curr_line = ""
54
+
55
+ # EOL - end of line
56
+ skip_to_eol = false
57
+
58
+ while i < code.size
59
+
60
+ if code[i] == "\n"
61
+ lines << curr_line
62
+ curr_line = ""
63
+ i += 1
64
+ skip_to_eol = false
65
+ elsif code[i] == "/" && code[i + 1] == "/"
66
+ skip_to_eol = true
67
+ i += 1
68
+ elsif code[i] == "\""
69
+ str_content, parse_size = parse_str(code[i..])
70
+ curr_line << code[i...(i+parse_size)] if !skip_to_eol
71
+ i += parse_size
72
+ else
73
+ curr_line << code[i] if !skip_to_eol
74
+ i += 1
75
+ end
76
+ end
77
+
78
+ if curr_line.size > 0
79
+ lines << curr_line
80
+ end
81
+
82
+ return lines
83
+ end
84
+
85
+
86
+ def self.check_register_operand(str)
87
+ Kompiler::Architecture.registers.each do |register|
88
+ return [true, register] if str == register[:reg_name]
89
+ end
90
+ return [false, nil]
91
+ end
92
+
93
+ def self.check_binary_operand(str)
94
+ return [false, nil] if !str.start_with?("0b")
95
+ binary = str[2..]
96
+
97
+ # Check if the definition contains only 0 and 1
98
+ zero_or_one = binary.each_char.map{|c| ["0", "1"].include?(c)}
99
+ incorrect_definition = zero_or_one.include?(false)
100
+
101
+ return [false, nil] if incorrect_definition
102
+
103
+ binary.reverse!
104
+
105
+ binary_val = (0...(binary.size)).map{|i| binary[i].to_i * 2 ** i}.sum
106
+ return [true, binary_val]
107
+ end
108
+
109
+
110
+ def self.check_hex_operand(str)
111
+ return [false, nil] if !str.start_with?("0x")
112
+ hex = str[2..].downcase
113
+
114
+ # Check if the definition contains only 0-9 + a-f
115
+ valid_characters = ("0".."9").to_a + ("a".."f").to_a
116
+ is_hex_chars = hex.each_char.map{|c| valid_characters.include?(c)}
117
+
118
+ # Return false if not only hex characters
119
+ return [false, nil] if is_hex_chars.include?(false)
120
+
121
+ # Convert to hex with base 16
122
+ hex_value = hex.to_i(16)
123
+
124
+ return [true, hex_value]
125
+ end
126
+
127
+
128
+ def self.check_decimal_operand(str)
129
+ minus_sign = false
130
+
131
+ # Check if the string starts with a minus sign. If yes remove it and set minus_sign to true
132
+ if str[0] == '-'
133
+ minus_sign = true
134
+ str = str[1..]
135
+ end
136
+
137
+ only_numbers = !str.each_char.map{|c| ("0".."9").to_a.include?(c)}.include?(false)
138
+ return [false, nil] if !only_numbers
139
+
140
+ # If the minus sign is present, multiply the number part by -1
141
+ int = str.to_i
142
+ int *= -1 if minus_sign
143
+
144
+ return [true, int]
145
+ end
146
+
147
+
148
+ def self.check_immediate_operand(operand_str)
149
+
150
+ is_bin, bin_value = check_binary_operand(operand_str)
151
+ return [true, {type: "immediate", value: bin_value, def_type: "binary"}] if is_bin
152
+
153
+ is_decimal, decimal_value = check_decimal_operand(operand_str)
154
+ return [true, {type: "immediate", value: decimal_value, def_type: "decimal"}] if is_decimal
155
+
156
+ is_hex, hex_value = check_hex_operand(operand_str)
157
+ return [true, {type: "immediate", value: hex_value, def_type: "hex"}] if is_hex
158
+
159
+ return [false, nil]
160
+ end
161
+
162
+
163
+ def self.check_label_operand(str)
164
+ # If first character is a number, return false
165
+ return false if ("0".."9").to_a.include?(str[0])
166
+
167
+ # Check if it's only made up of allowed characters
168
+ allowed_chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["_"]
169
+
170
+ is_each_char_allowed = str.each_char.map{|c| allowed_chars.include?(c)}
171
+
172
+ # If some characters aren't allowed, return false
173
+ return false if is_each_char_allowed.include?(false)
174
+
175
+ # Return true if none of the checks returned false
176
+ return true
177
+ end
178
+
179
+
180
+ def self.parse_operand_str(operand_str)
181
+
182
+
183
+ # Check if the operand is a register
184
+ is_register, register = check_register_operand(operand_str)
185
+ return {type: "register", value: register, register: register} if is_register
186
+
187
+ # Check if the operand is a string
188
+ is_string = operand_str[0] == "\""
189
+ return {type: "string", value: operand_str[1...-1], string: operand_str[1...-1]} if is_string
190
+
191
+ # Checks if it's an immediate
192
+ is_immediate, immediate_val = check_immediate_operand(operand_str)
193
+ return immediate_val if is_immediate
194
+
195
+
196
+ #
197
+ # Check if it's a label
198
+ #
199
+
200
+ # The operand is a label if it doesn't start with a number and doesn't include spaces
201
+ is_label = check_label_operand(operand_str)
202
+ return {type: "label", value: operand_str} if is_label
203
+
204
+
205
+ # If no checks succeeded, return false
206
+ return false
207
+ end
208
+
209
+
210
+
211
+
212
+ def self.parse_instruction_line(line)
213
+ keyword = ""
214
+ i = 0
215
+
216
+ # Loop until a non-whitespace character
217
+ while i < line.size
218
+ break if ![" ", "\t"].include?(line[i])
219
+ i += 1
220
+ end
221
+
222
+ # Loop to get the keyword
223
+ loop do
224
+ # Exit out of the loop if the character is a whitespace
225
+ break if [" ", "\t"].include?(line[i]) || i >= line.size
226
+ # Add the character if not a whitespace
227
+ keyword << line[i]
228
+ # Proceed to the next character
229
+ i += 1
230
+ end
231
+
232
+ operand_strings = []
233
+
234
+ # Loop for operands
235
+ loop do
236
+ break if i >= line.size
237
+
238
+ # # Whitespace - skip
239
+ # if [" ", "\t"].include? line[i]
240
+ # i += 1
241
+ # next
242
+ # end
243
+
244
+ operand_content = ""
245
+
246
+ # Collect the operand's content until a comma or end of line
247
+ loop do
248
+ break if (i >= line.size)
249
+
250
+ # If the character is a comma, move to the next character and break out of the operand's content loop
251
+ if line[i] == ","
252
+ i += 1
253
+ break
254
+ end
255
+
256
+ # Skip whitespace
257
+ if [" ", "\t"].include? line[i]
258
+ i += 1
259
+ next
260
+ end
261
+
262
+ # If a string definition, parse to the end of the string
263
+ if line[i] == "\""
264
+ str_content, parsed_size = parse_str(line[i..])
265
+ operand_content += '"' + str_content + '"'
266
+ i += parsed_size
267
+ next
268
+ end
269
+
270
+ # Else just add the character to the operand content
271
+ operand_content += line[i]
272
+
273
+ # Move to the next character
274
+ i += 1
275
+ end
276
+
277
+ # After operand content was collected, add it to the list of operands
278
+ operand_strings << operand_content
279
+ end
280
+
281
+ # Parse operand strings into operand types and values
282
+
283
+ operands = []
284
+
285
+ operand_strings.each do |operand_str|
286
+ operand = parse_operand_str(operand_str)
287
+ return false if operand == false
288
+ operands << operand
289
+ end
290
+
291
+ return [keyword, operands]
292
+ end
293
+
294
+
295
+
296
+
297
+
298
+ # def self.parse_instruction_line(line)
299
+ # keyword = ""
300
+ # i = 0
301
+ #
302
+ # # Loop until a non-whitespace character
303
+ # while i < line.size
304
+ # break if ![" ", "\t"].include?(line[i])
305
+ # i += 1
306
+ # end
307
+ #
308
+ # # Loop to get the keyword
309
+ # loop do
310
+ # # Exit out of the loop if the character is a whitespace
311
+ # break if [" ", "\t"].include?(line[i]) || i >= line.size
312
+ # # Add the character if not a whitespace
313
+ # keyword << line[i]
314
+ # # Proceed to the next character
315
+ # i += 1
316
+ # end
317
+ #
318
+ # operand_strings = []
319
+ #
320
+ # # Loop for operands
321
+ # loop do
322
+ # break if i >= line.size
323
+ #
324
+ # # Whitespace - skip
325
+ # if [" ", "\t"].include? line[i]
326
+ # i += 1
327
+ # next
328
+ # end
329
+ #
330
+ # # If a string operand - parse the string
331
+ # if line[i] == "\""
332
+ #
333
+ # str_content, parsed_size = parse_str(line[i..])
334
+ # operand_strings << line[i...(i + parsed_size)]
335
+ # i += parsed_size
336
+ #
337
+ # # If anything else - parse until whitespace, comma or end of line
338
+ # else
339
+ # content = ""
340
+ #
341
+ # while i < line.size
342
+ # break if [" ", ","].include? line[i]
343
+ # content << line[i]
344
+ # i += 1
345
+ # end
346
+ #
347
+ # operand_strings << content
348
+ # end
349
+ #
350
+ #
351
+ # # After operand parsed
352
+ # # Loop to meet a comma or end of line
353
+ # # Give error if stuff except whitespace
354
+ #
355
+ # while i < line.size
356
+ # # If comma, move to next character and repeat the bigger operand loop
357
+ # if line[i] == ","
358
+ # i += 1
359
+ # break
360
+ # end
361
+ # # If non-whitespace, raise an error
362
+ # # raise "Error: Unparsed content - exiting" if ![" ", "\t"].include?(line[i])
363
+ # return false if ![" ", "\t"].include?(line[i])
364
+ # i += 1
365
+ # end
366
+ # end
367
+ #
368
+ # # If end of line not reached, return an error
369
+ # if i != line.size
370
+ # return false
371
+ # end
372
+ #
373
+ #
374
+ # # Parse operand strings into operand types and values
375
+ #
376
+ # operands = []
377
+ #
378
+ # operand_strings.each do |operand_str|
379
+ # operand = parse_operand_str(operand_str)
380
+ # return false if operand == false
381
+ # operands << operand
382
+ # end
383
+ #
384
+ # return [keyword, operands]
385
+ # end
386
+
387
+
388
+
389
+ def self.check_operand_match(operand_description, operand)
390
+
391
+ # If operand type doesn't not match, return false
392
+ return false if operand[:type] != operand_description[:type]
393
+
394
+ # If no operand restrictions, return true
395
+ return true if !operand_description.keys.include?(:restrictions)
396
+
397
+ case operand_description[:type]
398
+ when "register"
399
+
400
+ # Check register type match
401
+ if operand_description[:restrictions].keys.include?(:reg_type)
402
+ return false if operand[:register][:reg_type] != operand_description[:restrictions][:reg_type]
403
+ end
404
+
405
+ # Check register size match
406
+ if operand_description[:restrictions].keys.include?(:reg_size)
407
+ return false if operand[:register][:reg_size] != operand_description[:restrictions][:reg_size]
408
+ end
409
+
410
+ when "immediate"
411
+
412
+
413
+
414
+ when "label"
415
+
416
+
417
+
418
+ end
419
+
420
+
421
+ # If the restrictions match (by not returning a negative answer), return true
422
+ return true
423
+ end
424
+
425
+
426
+ # Returns array of [status, operands]
427
+ # If status = false, operands = nil; otherwise, status = true, operands = instruction operands
428
+ def self.match_instruction(line, instruction)
429
+
430
+ keyword, operands = parse_instruction_line(line)
431
+
432
+
433
+ # Check if the keyword matches
434
+ if instruction[:keyword] != keyword
435
+ return [false, nil]
436
+ end
437
+
438
+ # Check if there's the right amount of operands
439
+ if operands.size != instruction[:operands].size
440
+ return [false, nil]
441
+ end
442
+
443
+ # Check if operands match descriptions
444
+ operands.zip(instruction[:operands]).each do |operand, operand_description|
445
+ return [false, nil] if !check_operand_match(operand_description, operand)
446
+ end
447
+
448
+ return [true, operands]
449
+ end
450
+
451
+
452
+
453
+ def self.check_instruction(line)
454
+
455
+ instruction = nil
456
+ operands = nil
457
+
458
+ Kompiler::Architecture.instructions.each do |curr_instruction|
459
+ # If the instruction matches - break
460
+ status, curr_operands = match_instruction(line, curr_instruction)
461
+ if status == true
462
+ instruction = curr_instruction
463
+ operands = curr_operands
464
+ break
465
+ end
466
+ end
467
+
468
+ if instruction != nil
469
+ return [true, {instruction: instruction, operands: operands}]
470
+ else
471
+ return [false, nil]
472
+ end
473
+ end
474
+
475
+
476
+
477
+ def self.check_directive(line)
478
+ status = parse_instruction_line(line)
479
+
480
+ return [false, nil] if status == false
481
+
482
+ keyword, operands = status
483
+
484
+ if keyword[0] == "."
485
+ keyword = keyword[1..]
486
+ end
487
+
488
+ directive = nil
489
+
490
+ Kompiler::Directives.directives.each do |curr_directive|
491
+ if curr_directive[:keyword] == keyword
492
+ directive = curr_directive
493
+ break
494
+ end
495
+ end
496
+
497
+ if directive == nil
498
+ return [false, nil]
499
+ else
500
+ return [true, {directive: directive, operands: operands}]
501
+ end
502
+ end
503
+
504
+
505
+ end # End Kompiler::Parsers
506
+
507
+
508
+ end # End Kompiler
data/lib/kompiler.rb ADDED
@@ -0,0 +1,19 @@
1
+ # Copyright 2024 Kyrylo Shyshko
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ require 'kompiler/mc_builder.rb'
16
+ require 'kompiler/parsers.rb'
17
+ require 'kompiler/compiler_functions.rb'
18
+ require 'kompiler/architecture.rb'
19
+ require 'kompiler/directives.rb'
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kompiler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Kyryl Shyshko
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-11-19 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: 'Kompiler is a low-level, modular and extendable compiler for any architecture.
14
+ By default Kompiler supports ARMv8-a, but other architecture extensions can be downloaded
15
+ in the future.
16
+
17
+ '
18
+ email: kyryloshy@gmail.com
19
+ executables:
20
+ - kompile
21
+ extensions: []
22
+ extra_rdoc_files: []
23
+ files:
24
+ - LICENSE
25
+ - bin/kompile
26
+ - lib/kompiler.rb
27
+ - lib/kompiler/arch/armv8a/instructions.rb
28
+ - lib/kompiler/arch/armv8a/load.rb
29
+ - lib/kompiler/arch/armv8a/registers.rb
30
+ - lib/kompiler/architecture.rb
31
+ - lib/kompiler/compiler_functions.rb
32
+ - lib/kompiler/directives.rb
33
+ - lib/kompiler/mc_builder.rb
34
+ - lib/kompiler/parsers.rb
35
+ homepage: https://github.com/kyryloshy/kompiler
36
+ licenses:
37
+ - Apache-2.0
38
+ metadata:
39
+ source_code_uri: https://github.com/kyryloshy/kompiler
40
+ bug_tracker_uri: https://github.com/kyryloshy/kompiler/issues
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 3.0.0
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubygems_version: 3.5.10
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Kir's compiler for low-level machine code
60
+ test_files: []