kompiler 0.3.0.pre.4 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ # Copyright 2024 Kyrylo Shyshko
2
+ # Licensed under the Apache License, Version 2.0. See LICENSE file for details.
3
+
4
+ module Kompiler
5
+
6
+ module ARMv8A
7
+
8
+ def self.simd_fp_registers
9
+ @simd_fp_registers
10
+ end
11
+
12
+ @simd_fp_registers = [
13
+
14
+ ]
15
+
16
+ (0..31).each do |reg_i|
17
+ @simd_fp_registers << {reg_name: "q#{reg_i}", reg_type: "simd_fp_reg", re_num: reg_i}
18
+ end
19
+
20
+
21
+ end # Kompiler::ARMv8A
22
+
23
+ end # Kompiler
@@ -54,6 +54,9 @@ end
54
54
 
55
55
  {reg_name: "ISR_EL1", reg_size: 64, reg_type: "sr", reg_encoding: {"op0"=>0b11, "op1"=>0b000, "CRn"=>0b1100, "CRm"=>0b0001, "op2"=>0b000}},
56
56
 
57
+ {reg_name: "CPACR_EL1", reg_size: 64, reg_type: "sr", reg_encoding: {"op0"=>0b11, "op1"=>0b000, "CRn"=>0b0001, "CRm"=>0b0000, "op2"=>0b010}},
58
+ {reg_name: "CPACR_EL12", reg_size: 64, reg_type: "sr", reg_encoding: {"op0"=>0b11, "op1"=>0b101, "CRn"=>0b0001, "CRm"=>0b0000, "op2"=>0b010}},
59
+
57
60
  # Special registers for the MSR (immediate) instruction (some of them were previously defined already)
58
61
  {reg_name: "SPSel", reg_type: "pstate_reg"},
59
62
  {reg_name: "DAIFSet", reg_type: "pstate_reg"},
@@ -1,6 +1,16 @@
1
1
  # Copyright 2024 Kyrylo Shyshko
2
2
  # Licensed under the Apache License, Version 2.0. See LICENSE file for details.
3
3
 
4
+ #
5
+ # Implements main logic happening during compilation.
6
+ # Main functions:
7
+ # parse_code - main code parser that turns all lines into an abstract structure (determining types of lines and connecting them into a program)
8
+ # construct_program_mc - transforms the parse_code's AST into machine code (MC)
9
+ # detailed_compile - stitches parse_code and construct_program_mc to fully compile a program with a detailed output
10
+ # compile - calls detailed_compile and removes the extra output information
11
+ #
12
+
13
+
4
14
  module Kompiler
5
15
 
6
16
  module CompilerFunctions
@@ -138,8 +148,11 @@ def self.parse_code(lines)
138
148
 
139
149
  parsed_lines = state[:parsed_lines] + parsed_lines[insert_i..]
140
150
  end
141
-
142
- parsed_lines
151
+
152
+
153
+ state = {parsed_lines: parsed_lines, current_address: instr_adr, lines: lines, line_i: line_i, extra_state: extra_state}
154
+
155
+ return state
143
156
  end
144
157
 
145
158
 
@@ -204,7 +217,7 @@ def self.construct_program_mc(parsed_lines, labels)
204
217
  end
205
218
  end
206
219
  end
207
-
220
+
208
221
  lines_bytes
209
222
  end
210
223
 
@@ -250,15 +263,26 @@ def self.bit_lines_to_bytes(bit_lines)
250
263
  end
251
264
 
252
265
 
253
- def self.compile(code, included_files=[])
266
+ def self.compile(code)
267
+ detailed_result = detailed_compile(code)
268
+
269
+ return detailed_result[:machine_code]
270
+ end
254
271
 
272
+ def self.detailed_compile(code)
255
273
  lines = Kompiler::Parsers.get_code_lines(code)
256
-
257
- parsed_lines = parse_code(lines)
258
-
274
+
275
+ parsed_state = parse_code(lines)
276
+
277
+ # pp parsed_state
278
+
279
+ parsed_lines = parsed_state[:parsed_lines]
280
+
259
281
  labels = get_labels(parsed_lines)
260
-
282
+
261
283
  machine_code_bytes = construct_program_mc(parsed_lines, labels)
284
+
285
+ return {machine_code: machine_code_bytes, labels: labels, parsed_state: parsed_state}
262
286
  end
263
287
 
264
288
 
@@ -1,10 +1,21 @@
1
+
2
+ #
3
+ # Contains config options for how Kompiler interprets different characters.
4
+ #
5
+ # Main config options:
6
+ # keyword_chars - list of characters that a keyword can contain
7
+ # label_chars - list of characters that a label name can contain
8
+ # whitespace_chars - list of characters that qualify as whitespace / separators of words
9
+ # string_delimiters - a list of characters that denote the start and end of a string
10
+ #
11
+
1
12
  module Kompiler
2
13
 
3
14
  module Config
4
15
 
5
16
  @keyword_chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["_", "."]
6
17
  @label_chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["_", "."]
7
- @whitespace_chars = [" ", "\t"]
18
+ @whitespace_chars = [" ", "\t", "\r"]
8
19
  @string_delimiters = ['"', "'"]
9
20
 
10
21
  # Returns the permittable keyword characters
@@ -1,6 +1,19 @@
1
1
  # Copyright 2024 Kyrylo Shyshko
2
2
  # Licensed under the Apache License, Version 2.0. See LICENSE file for details.
3
3
 
4
+ #
5
+ # Implements all directives available in programs.
6
+ #
7
+ # Kompiler::Directives.directives is a list of all available directives.
8
+ #
9
+ # An entry's structure is:
10
+ # keyword - string or list of strings specifying the keywords by which the directive can be accessed.
11
+ # func - a Ruby lambda that receives the call operands and current state as arguments, and outputs the new state.
12
+ # collect_operands - optional (default is true). Specifies whether the operands should be parsed before calling the :func key lambda.
13
+ # If false, operands will be a raw string containing the string after the keyword.
14
+ #
15
+
16
+
4
17
  module Kompiler
5
18
 
6
19
  module Directives
@@ -1,3 +1,31 @@
1
+
2
+ #
3
+ # Implements logic to parse math-like expressions into an ASTs.
4
+ #
5
+ # Main functions:
6
+ # str_to_ast - converts a raw string into an AST.
7
+ # run_ast - runs the AST created by str_to_ast.
8
+ #
9
+ # Config options are available in Kompiler::Parsers::SymAST::Config :
10
+ # word_begin_chars - a list of characters that a word can begin with
11
+ # word_chars - a list of characters that a word can contain
12
+ # number_begin_chars - a list of characters that a number can begin with
13
+ # number_chars - a list of characters that a number can contain
14
+ # whitespace_chars - a list of whitespace / separator characters
15
+ # parse_functions - a boolean specifying whether functions with syntax func(x + 2) should be parsed or throw an error
16
+ # sign_types - a list of available signs, their names and character sequences that qualify as the sign.
17
+ # Entries appearing earlier in the list are prioritized.
18
+ # one_element_ast_operations - a list of one element operations, their names, sign types, and checking direction (1 for left to right, -1 for right to left).
19
+ # For example, the negation "-x" is a one element operation, with the sign type "sub", and check_direction -1 (checks from right to left) because it is on the left of the 'x'
20
+ # Entries appearing earlier in the list are prioritized.
21
+ # two_element_ast_operations - a list of two element operation 'groups'. Groups are implemented to list operations on the same priority level with the same check direction.
22
+ # Each group has a check_direction (1 for left to right, -1 for opposite), and a list of operations in this group, their names and sign types, similar to one element operations.
23
+ # Entries appearing earlier in the list are prioritized.
24
+ # An example group could be multiplication and division. The check_direction will be 1, and there will be two operations (mul and div). This group will be below the power (a ** b) group.
25
+ # functions - a list of available functions in expressions in Kompiler programs
26
+ #
27
+
28
+
1
29
  module Kompiler
2
30
 
3
31
  module Parsers
@@ -662,4 +690,4 @@ end # Kompiler::Parsers::SymAST
662
690
 
663
691
  end # Kompiler::Parsers
664
692
 
665
- end # Kompiler
693
+ end # Kompiler
@@ -1,6 +1,37 @@
1
1
  # Copyright 2024 Kyrylo Shyshko
2
2
  # Licensed under the Apache License, Version 2.0. See LICENSE file for details.
3
3
 
4
+ #
5
+ # Implements a custom AST structure and interpreter for instructions on how to build the instruction's machine code (MC).
6
+ #
7
+ # @MC_AST_NODES contains a list of all available instructions / AST nodes for building machine code.
8
+ # Each entry's structure is:
9
+ # name - contains the node's / instruction's name
10
+ # n_args - either an integer or "any". Contains the amount of arguments this instruction must receive.
11
+ # func - a lambda receiving the arguments and the current program's state as inputs. Should output the instruction's result.
12
+ # eval_args - optional, default true. Specifies whether to pre-evaluate the node's arguments.
13
+ #
14
+ # A MC instruction example:
15
+ # ["get_bits", ["get_current_address"], 0, 10]
16
+ # Which returns an array of ten integers, or bits, of the current address.
17
+ #
18
+ # Each MC instruction is in the form of an array with a string, the instruction's name, as the first element.
19
+ # All other elements will count as arguments.
20
+ # For most nodes, the arguments will be evaluated / computed before calling the node's logic. For example, in:
21
+ # ["get_bits", ["get_current_address"], 0, 10]
22
+ # ["get_current_address"] will be evaluted first, and then the result will be passed into get_bits. This is similar to a Ruby piece of code like this:
23
+ # get_bits(get_current_address(), 0, 10)
24
+ #
25
+ # In more special nodes that have eval_args = false, such as the if_eq_else node, the arguments aren't pre-evaluated, which is required in an if-statement scenario. E.g., an error shouldn't be thrown before a check that the error must be raised.
26
+ #
27
+ #
28
+ # Main functions are:
29
+ # build_mc - builds machine code from an input AST
30
+ # run_mc_ast - runs an MC AST node
31
+ # is_ast_node - returns if an object is an AST node, by checking whether it is an array with the first element being a string
32
+ #
33
+
34
+
4
35
  module Kompiler
5
36
 
6
37
  module MachineCode_AST
@@ -44,6 +75,13 @@ module MachineCode_AST
44
75
  {name: "raise_warning", n_args: 1, func: lambda {|args, state| puts args[0]; [] } },
45
76
 
46
77
  {name: "get_key", n_args: 2, func: lambda {|args, state| args[0].keys.include?(args[1]) ? args[0][args[1]] : raise("MC Constructor get_key Error: The key \"#{args[1]}\" doesn't exist - Program build not possible. This is likely a problem with the ISA configuration, not the program being compiled.") }},
78
+
79
+ # Concatenation of get_key and get_operand through get_key(get_operand(arg1), arg2)
80
+ {name: "get_operand_key", n_args: 2, func: lambda do |args, state|
81
+ op = state[:operands][args[0]][:value]
82
+ op.keys.include?(args[1]) ? op[args[1]] : raise("MC Constructor get_operand_key Error: key \"#{args[1]}\" doesn't exist. This is likely an error with the ISA configuration, not the program being compiled.")
83
+ end},
84
+
47
85
  {name: "concat", n_args: "any", func: lambda {|args, state| args.flatten}},
48
86
  {name: "set_var", n_args: 2, func: lambda {|args, state| state[:instruction_variables][args[0]] = args[1]; [] }},
49
87
  {name: "get_var", n_args: 1, func: lambda {|args, state| state[:instruction_variables].keys.include?(args[0]) ? state[:instruction_variables][args[0]] : raise("Instruction variable \"#{args[0]}\" not found: Program build not possible. This is likely a program with the ISA configuration, not the program being compiled.") }},
@@ -54,6 +92,16 @@ module MachineCode_AST
54
92
  # Bit manipulations
55
93
  {name: "bit_and", n_args: 2, func: lambda {|args, state| args[0] & args[1] }},
56
94
  {name: "bit_or", n_args: 2, func: lambda {|args, state| args[0] | args[1] }},
95
+
96
+ # Ensure equality between all arguments. Last argument provides the error message if not equal
97
+ {name: "ensure_eq", n_args: "any", func: lambda do |args, state|
98
+ args[1...-1].each do |arg|
99
+ if args[0] != arg
100
+ raise args.last
101
+ end
102
+ end
103
+ []
104
+ end}
57
105
  ]
58
106
 
59
107
  def self.is_ast_node(val)
@@ -1,6 +1,27 @@
1
1
  # Copyright 2024 Kyrylo Shyshko
2
2
  # Licensed under the Apache License, Version 2.0. See LICENSE file for details.
3
3
 
4
+ #
5
+ # Implements generic parsers used everywhere and checks specific to the compilation process
6
+ #
7
+ # Functions:
8
+ # parse_str - parses a string definition from the input text, and returns the amount of characters parsed and the string's contents
9
+ # get_code_lines - parses the initial raw code text into lines, removing comments along the way
10
+ #
11
+ # Compilation specific functions:
12
+ # check_instruction - checks whether a line is a valid instruction with the current architecture (Kompiler::Architecture)
13
+ # check_directive - checks whether a line is a directive call
14
+ #
15
+ # parse_instruction_line - parses an instruction line into its keyword (string) and operands with their descriptions (e.g., type of operand, content, value)
16
+ # extract_instruction_parts - parses an instruction line into a string keyword and a list of operand definitions (used by parse_instruction_line)
17
+ # extract_instruction_operands - parses the string after the keyword to extract only the operand definitions (used by extract_instruction_parts)
18
+ # parse_operand_str - parses an operand definition (raw string) into its type, value, and other type-dependent information (uses check_register_operand, check_immediate_operand, check_expression_operand, check_label_operand)
19
+ # check_operand_match - checks whether an operand's info (returned by parse_operand_str) matches the input operand description. Operand descriptions are mostly stored in instruction files (e.g., lib/kompiler/architectures/armv8a/instructions.rb) in the :operands key
20
+ # match_parsed_line_to_instruction - checks whether a parsed instruction line (keyword + operand info) matches an instruction entry, mostly stored in instruction files (example one line above) (used by check_instruction)
21
+ #
22
+ #
23
+
24
+
4
25
  module Kompiler
5
26
 
6
27
  module Parsers