kompiler 0.3.0.pre.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,37 @@
1
1
  # Copyright 2024 Kyrylo Shyshko
2
2
  # Licensed under the Apache License, Version 2.0. See LICENSE file for details.
3
3
 
4
+ #
5
+ # Implements a custom AST structure and interpreter for instructions on how to build the instruction's machine code (MC).
6
+ #
7
+ # @MC_AST_NODES contains a list of all available instructions / AST nodes for building machine code.
8
+ # Each entry's structure is:
9
+ # name - contains the node's / instruction's name
10
+ # n_args - either an integer or "any". Contains the amount of arguments this instruction must receive.
11
+ # func - a lambda receiving the arguments and the current program's state as inputs. Should output the instruction's result.
12
+ # eval_args - optional, default true. Specifies whether to pre-evaluate the node's arguments.
13
+ #
14
+ # A MC instruction example:
15
+ # ["get_bits", ["get_current_address"], 0, 10]
16
+ # Which returns an array of ten integers, or bits, of the current address.
17
+ #
18
+ # Each MC instruction is in the form of an array with a string, the instruction's name, as the first element.
19
+ # All other elements will count as arguments.
20
+ # For most nodes, the arguments will be evaluated / computed before calling the node's logic. For example, in:
21
+ # ["get_bits", ["get_current_address"], 0, 10]
22
+ # ["get_current_address"] will be evaluted first, and then the result will be passed into get_bits. This is similar to a Ruby piece of code like this:
23
+ # get_bits(get_current_address(), 0, 10)
24
+ #
25
+ # In more special nodes that have eval_args = false, such as the if_eq_else node, the arguments aren't pre-evaluated, which is required in an if-statement scenario. E.g., an error shouldn't be thrown before a check that the error must be raised.
26
+ #
27
+ #
28
+ # Main functions are:
29
+ # build_mc - builds machine code from an input AST
30
+ # run_mc_ast - runs an MC AST node
31
+ # is_ast_node - returns if an object is an AST node, by checking whether it is an array with the first element being a string
32
+ #
33
+
34
+
4
35
  module Kompiler
5
36
 
6
37
  module MachineCode_AST
@@ -44,6 +75,13 @@ module MachineCode_AST
44
75
  {name: "raise_warning", n_args: 1, func: lambda {|args, state| puts args[0]; [] } },
45
76
 
46
77
  {name: "get_key", n_args: 2, func: lambda {|args, state| args[0].keys.include?(args[1]) ? args[0][args[1]] : raise("MC Constructor get_key Error: The key \"#{args[1]}\" doesn't exist - Program build not possible. This is likely a problem with the ISA configuration, not the program being compiled.") }},
78
+
79
+ # Concatenation of get_key and get_operand through get_key(get_operand(arg1), arg2)
80
+ {name: "get_operand_key", n_args: 2, func: lambda do |args, state|
81
+ op = state[:operands][args[0]][:value]
82
+ op.keys.include?(args[1]) ? op[args[1]] : raise("MC Constructor get_operand_key Error: key \"#{args[1]}\" doesn't exist. This is likely an error with the ISA configuration, not the program being compiled.")
83
+ end},
84
+
47
85
  {name: "concat", n_args: "any", func: lambda {|args, state| args.flatten}},
48
86
  {name: "set_var", n_args: 2, func: lambda {|args, state| state[:instruction_variables][args[0]] = args[1]; [] }},
49
87
  {name: "get_var", n_args: 1, func: lambda {|args, state| state[:instruction_variables].keys.include?(args[0]) ? state[:instruction_variables][args[0]] : raise("Instruction variable \"#{args[0]}\" not found: Program build not possible. This is likely a program with the ISA configuration, not the program being compiled.") }},
@@ -54,6 +92,16 @@ module MachineCode_AST
54
92
  # Bit manipulations
55
93
  {name: "bit_and", n_args: 2, func: lambda {|args, state| args[0] & args[1] }},
56
94
  {name: "bit_or", n_args: 2, func: lambda {|args, state| args[0] | args[1] }},
95
+
96
+ # Ensure equality between all arguments. Last argument provides the error message if not equal
97
+ {name: "ensure_eq", n_args: "any", func: lambda do |args, state|
98
+ args[1...-1].each do |arg|
99
+ if args[0] != arg
100
+ raise args.last
101
+ end
102
+ end
103
+ []
104
+ end}
57
105
  ]
58
106
 
59
107
  def self.is_ast_node(val)
@@ -1,6 +1,27 @@
1
1
  # Copyright 2024 Kyrylo Shyshko
2
2
  # Licensed under the Apache License, Version 2.0. See LICENSE file for details.
3
3
 
4
+ #
5
+ # Implements generic parsers used everywhere and checks specific to the compilation process
6
+ #
7
+ # Functions:
8
+ # parse_str - parses a string definition from the input text, and returns the amount of characters parsed and the string's contents
9
+ # get_code_lines - parses the initial raw code text into lines, removing comments along the way
10
+ #
11
+ # Compilation specific functions:
12
+ # check_instruction - checks whether a line is a valid instruction with the current architecture (Kompiler::Architecture)
13
+ # check_directive - checks whether a line is a directive call
14
+ #
15
+ # parse_instruction_line - parses an instruction line into its keyword (string) and operands with their descriptions (e.g., type of operand, content, value)
16
+ # extract_instruction_parts - parses an instruction line into a string keyword and a list of operand definitions (used by parse_instruction_line)
17
+ # extract_instruction_operands - parses the string after the keyword to extract only the operand definitions (used by extract_instruction_parts)
18
+ # parse_operand_str - parses an operand definition (raw string) into its type, value, and other type-dependent information (uses check_register_operand, check_immediate_operand, check_expression_operand, check_label_operand)
19
+ # check_operand_match - checks whether an operand's info (returned by parse_operand_str) matches the input operand description. Operand descriptions are mostly stored in instruction files (e.g., lib/kompiler/architectures/armv8a/instructions.rb) in the :operands key
20
+ # match_parsed_line_to_instruction - checks whether a parsed instruction line (keyword + operand info) matches an instruction entry, mostly stored in instruction files (example one line above) (used by check_instruction)
21
+ #
22
+ #
23
+
24
+
4
25
  module Kompiler
5
26
 
6
27
  module Parsers
@@ -171,6 +192,29 @@ def self.check_char_operand(str)
171
192
  end
172
193
 
173
194
 
195
+ def self.check_expression_operand(str)
196
+ begin
197
+
198
+ ast = Kompiler::Parsers::SymAST.parse str
199
+
200
+ run_block = lambda do |state|
201
+ state[:labels]["here"] = state[:current_address]
202
+
203
+ ast_result = Kompiler::Parsers::SymAST.run_ast state[:block_args][:ast], state[:labels], []
204
+
205
+ return {type: "immediate", value: ast_result, def_type: "sym_ast", definition: state[:block_args][:definition]}
206
+ end
207
+
208
+ return [true, {type: "run_block", block: run_block, block_args: {ast: ast, definition: str}, block_output_type: "immediate"}]
209
+
210
+ rescue RuntimeError => e
211
+ p e
212
+ # If an error was caused, return false
213
+ return [false, nil]
214
+ end
215
+
216
+ end
217
+
174
218
 
175
219
  def self.check_immediate_operand(operand_str)
176
220
 
@@ -231,45 +275,22 @@ def self.parse_operand_str(operand_str)
231
275
  return {type: "label", value: operand_str, definition: operand_str} if is_label
232
276
 
233
277
 
278
+ is_expr, expr_operand = check_expression_operand(operand_str)
279
+ return expr_operand if is_expr
280
+
234
281
  # If no checks succeeded, return false
235
282
  return false
236
283
  end
237
284
 
238
-
239
-
240
-
241
- def self.parse_instruction_line(line)
242
- keyword = ""
285
+ # Extract operand strings from the structure "op1, op2, op3, ..."
286
+ # Returns an array of the operand strings
287
+ def self.extract_instruction_operands(line)
243
288
  i = 0
244
-
245
- # Loop until a non-whitespace character
246
- while i < line.size
247
- break if ![" ", "\t"].include?(line[i])
248
- i += 1
249
- end
250
-
251
- # Loop to get the keyword
252
- loop do
253
- # Exit out of the loop if the character is a whitespace
254
- break if [" ", "\t"].include?(line[i]) || i >= line.size
255
- # Add the character if not a whitespace
256
- keyword << line[i]
257
- # Proceed to the next character
258
- i += 1
259
- end
260
-
261
289
  operand_strings = []
262
290
 
263
- # Loop for operands
264
291
  loop do
265
292
  break if i >= line.size
266
-
267
- # # Whitespace - skip
268
- # if [" ", "\t"].include? line[i]
269
- # i += 1
270
- # next
271
- # end
272
-
293
+
273
294
  operand_content = ""
274
295
 
275
296
  # Collect the operand's content until a comma or end of line
@@ -283,13 +304,13 @@ def self.parse_instruction_line(line)
283
304
  end
284
305
 
285
306
  # Skip whitespace
286
- if [" ", "\t"].include? line[i]
307
+ if Kompiler::Config.whitespace_chars.include? line[i]
287
308
  i += 1
288
309
  next
289
310
  end
290
311
 
291
312
  # If a string definition, parse to the end of the string
292
- if ["\"", "'"].include?(line[i])
313
+ if Kompiler::Config.string_delimiters.include?(line[i])
293
314
  str_content, parsed_size = parse_str(line[i..])
294
315
  operand_content += line[i] + str_content + line[i]
295
316
  i += parsed_size
@@ -305,7 +326,45 @@ def self.parse_instruction_line(line)
305
326
 
306
327
  # After operand content was collected, add it to the list of operands if the content isn't empty
307
328
  operand_strings << operand_content if operand_content.size != 0
308
- end
329
+ end
330
+
331
+ operand_strings
332
+ end
333
+
334
+
335
+ def self.extract_instruction_parts(line)
336
+
337
+ keyword = ""
338
+ i = 0
339
+
340
+ # Loop until a non-whitespace character
341
+ while i < line.size
342
+ break if !Kompiler::Config.whitespace_chars.include?(line[i])
343
+ i += 1
344
+ end
345
+
346
+ # Loop to get the keyword
347
+ loop do
348
+ # Exit out of the loop if the character is a whitespace
349
+ break if Kompiler::Config.whitespace_chars.include?(line[i]) || i >= line.size
350
+ # Add the character if not a whitespace
351
+ keyword << line[i]
352
+ # Proceed to the next character
353
+ i += 1
354
+ end
355
+
356
+ operand_strings = extract_instruction_operands(line[i..])
357
+
358
+ # Loop for operands
359
+
360
+ return keyword, operand_strings
361
+ end
362
+
363
+
364
+
365
+ def self.parse_instruction_line(line)
366
+
367
+ keyword, operand_strings = extract_instruction_parts(line)
309
368
 
310
369
  # Parse operand strings into operand types and values
311
370
 
@@ -324,8 +383,12 @@ end
324
383
 
325
384
  def self.check_operand_match(operand_description, operand)
326
385
 
327
- # If operand type doesn't not match, return false
328
- return false if operand[:type] != operand_description[:type]
386
+ if operand[:type] == "run_block" # A special check for a run block
387
+ return false if operand[:block_output_type] != operand_description[:type]
388
+ else
389
+ # If operand type doesn't not match, return false
390
+ return false if operand[:type] != operand_description[:type]
391
+ end
329
392
 
330
393
  # Get the restrictions
331
394
  operand_restrictions = operand_description[:restrictions]
@@ -337,6 +400,8 @@ def self.check_operand_match(operand_description, operand)
337
400
  operand_encoding = operand[:value]
338
401
  when "immediate"
339
402
  operand_encoding = operand[:value]
403
+ when "run_block"
404
+ operand_encoding = Hash.new
340
405
  when "string"
341
406
  operand_encoding = Hash.new
342
407
  end
@@ -418,12 +483,20 @@ end
418
483
 
419
484
 
420
485
  def self.check_directive(line)
421
- status = parse_instruction_line(line)
422
-
423
- return [false, nil] if status == false
424
-
425
- keyword, operands = status
426
-
486
+ # Skip whitespace
487
+ char_i = 0
488
+ while char_i < line.size && Kompiler::Config.whitespace_chars.include?(line[char_i])
489
+ char_i += 1
490
+ end
491
+
492
+ # Collect the keyword
493
+ keyword = ""
494
+
495
+ while char_i < line.size && Kompiler::Config.keyword_chars.include?(line[char_i])
496
+ keyword << line[char_i]
497
+ char_i += 1
498
+ end
499
+
427
500
  if keyword[0] == "."
428
501
  keyword = keyword[1..]
429
502
  end
@@ -431,16 +504,35 @@ def self.check_directive(line)
431
504
  directive = nil
432
505
 
433
506
  Kompiler::Directives.directives.each do |curr_directive|
434
- if curr_directive[:keyword] == keyword
435
- directive = curr_directive
436
- break
507
+ if curr_directive[:keyword].is_a? String
508
+ if curr_directive[:keyword] == keyword
509
+ directive = curr_directive
510
+ break
511
+ end
512
+ elsif curr_directive[:keyword].is_a? Array
513
+ if curr_directive[:keyword].include? keyword
514
+ directive = curr_directive
515
+ break
516
+ end
517
+ else
518
+ raise "Directive name error"
437
519
  end
438
520
  end
439
521
 
440
522
  if directive == nil
441
523
  return [false, nil]
524
+ end
525
+
526
+ # Check if the directive requires pre-collected operands (with the :collect_operands key that is true by default)
527
+ if !directive.keys.include?([:collect_operands]) || directive[:collect_operands] == true
528
+ parse_status, operands = parse_instruction_line(line)
529
+
530
+ return [false, nil] if parse_status == false # Return negative if operands can't be parsed
531
+
532
+ return [true, {directive: directive, operands: operands}] # Otherwise, return the directive
442
533
  else
443
- return [true, {directive: directive, operands: operands}]
534
+ # If operand collection isn't required, return the directive
535
+ return [true, {directive: directive, operands: []}]
444
536
  end
445
537
  end
446
538