kompiler 0.3.0.pre.4 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/kompile +241 -33
- data/lib/kompiler/arch_manager.rb +10 -1
- data/lib/kompiler/architecture.rb +5 -1
- data/lib/kompiler/architectures/armv8a/instructions.rb +76 -27
- data/lib/kompiler/architectures/armv8a/load.rb +3 -1
- data/lib/kompiler/architectures/armv8a/simd_fp_instructions.rb +1308 -0
- data/lib/kompiler/architectures/armv8a/simd_fp_registers.rb +23 -0
- data/lib/kompiler/architectures/armv8a/sys_registers.rb +3 -0
- data/lib/kompiler/compiler_functions.rb +32 -8
- data/lib/kompiler/config.rb +12 -1
- data/lib/kompiler/directives.rb +13 -0
- data/lib/kompiler/math_ast.rb +29 -1
- data/lib/kompiler/mc_builder.rb +48 -0
- data/lib/kompiler/parsers.rb +21 -0
- data/lib/kompiler/wrappers/elf_wrapper.rb +697 -0
- data/lib/kompiler/wrappers/packed_bytes.rb +68 -0
- data/lib/kompiler/wrappers.rb +1 -0
- data/lib/kompiler.rb +2 -1
- metadata +8 -3
@@ -0,0 +1,23 @@
|
|
1
|
+
# Copyright 2024 Kyrylo Shyshko
|
2
|
+
# Licensed under the Apache License, Version 2.0. See LICENSE file for details.
|
3
|
+
|
4
|
+
module Kompiler
|
5
|
+
|
6
|
+
module ARMv8A
|
7
|
+
|
8
|
+
def self.simd_fp_registers
|
9
|
+
@simd_fp_registers
|
10
|
+
end
|
11
|
+
|
12
|
+
@simd_fp_registers = [
|
13
|
+
|
14
|
+
]
|
15
|
+
|
16
|
+
(0..31).each do |reg_i|
|
17
|
+
@simd_fp_registers << {reg_name: "q#{reg_i}", reg_type: "simd_fp_reg", re_num: reg_i}
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end # Kompiler::ARMv8A
|
22
|
+
|
23
|
+
end # Kompiler
|
@@ -54,6 +54,9 @@ end
|
|
54
54
|
|
55
55
|
{reg_name: "ISR_EL1", reg_size: 64, reg_type: "sr", reg_encoding: {"op0"=>0b11, "op1"=>0b000, "CRn"=>0b1100, "CRm"=>0b0001, "op2"=>0b000}},
|
56
56
|
|
57
|
+
{reg_name: "CPACR_EL1", reg_size: 64, reg_type: "sr", reg_encoding: {"op0"=>0b11, "op1"=>0b000, "CRn"=>0b0001, "CRm"=>0b0000, "op2"=>0b010}},
|
58
|
+
{reg_name: "CPACR_EL12", reg_size: 64, reg_type: "sr", reg_encoding: {"op0"=>0b11, "op1"=>0b101, "CRn"=>0b0001, "CRm"=>0b0000, "op2"=>0b010}},
|
59
|
+
|
57
60
|
# Special registers for the MSR (immediate) instruction (some of them were previously defined already)
|
58
61
|
{reg_name: "SPSel", reg_type: "pstate_reg"},
|
59
62
|
{reg_name: "DAIFSet", reg_type: "pstate_reg"},
|
@@ -1,6 +1,16 @@
|
|
1
1
|
# Copyright 2024 Kyrylo Shyshko
|
2
2
|
# Licensed under the Apache License, Version 2.0. See LICENSE file for details.
|
3
3
|
|
4
|
+
#
|
5
|
+
# Implements main logic happening during compilation.
|
6
|
+
# Main functions:
|
7
|
+
# parse_code - main code parser that turns all lines into an abstract structure (determining types of lines and connecting them into a program)
|
8
|
+
# construct_program_mc - transforms the parse_code's AST into machine code (MC)
|
9
|
+
# detailed_compile - stitches parse_code and construct_program_mc to fully compile a program with a detailed output
|
10
|
+
# compile - calls detailed_compile and removes the extra output information
|
11
|
+
#
|
12
|
+
|
13
|
+
|
4
14
|
module Kompiler
|
5
15
|
|
6
16
|
module CompilerFunctions
|
@@ -138,8 +148,11 @@ def self.parse_code(lines)
|
|
138
148
|
|
139
149
|
parsed_lines = state[:parsed_lines] + parsed_lines[insert_i..]
|
140
150
|
end
|
141
|
-
|
142
|
-
|
151
|
+
|
152
|
+
|
153
|
+
state = {parsed_lines: parsed_lines, current_address: instr_adr, lines: lines, line_i: line_i, extra_state: extra_state}
|
154
|
+
|
155
|
+
return state
|
143
156
|
end
|
144
157
|
|
145
158
|
|
@@ -204,7 +217,7 @@ def self.construct_program_mc(parsed_lines, labels)
|
|
204
217
|
end
|
205
218
|
end
|
206
219
|
end
|
207
|
-
|
220
|
+
|
208
221
|
lines_bytes
|
209
222
|
end
|
210
223
|
|
@@ -250,15 +263,26 @@ def self.bit_lines_to_bytes(bit_lines)
|
|
250
263
|
end
|
251
264
|
|
252
265
|
|
253
|
-
def self.compile(code
|
266
|
+
def self.compile(code)
|
267
|
+
detailed_result = detailed_compile(code)
|
268
|
+
|
269
|
+
return detailed_result[:machine_code]
|
270
|
+
end
|
254
271
|
|
272
|
+
def self.detailed_compile(code)
|
255
273
|
lines = Kompiler::Parsers.get_code_lines(code)
|
256
|
-
|
257
|
-
|
258
|
-
|
274
|
+
|
275
|
+
parsed_state = parse_code(lines)
|
276
|
+
|
277
|
+
# pp parsed_state
|
278
|
+
|
279
|
+
parsed_lines = parsed_state[:parsed_lines]
|
280
|
+
|
259
281
|
labels = get_labels(parsed_lines)
|
260
|
-
|
282
|
+
|
261
283
|
machine_code_bytes = construct_program_mc(parsed_lines, labels)
|
284
|
+
|
285
|
+
return {machine_code: machine_code_bytes, labels: labels, parsed_state: parsed_state}
|
262
286
|
end
|
263
287
|
|
264
288
|
|
data/lib/kompiler/config.rb
CHANGED
@@ -1,10 +1,21 @@
|
|
1
|
+
|
2
|
+
#
|
3
|
+
# Contains config options for how Kompiler interprets different characters.
|
4
|
+
#
|
5
|
+
# Main config options:
|
6
|
+
# keyword_chars - list of characters that a keyword can contain
|
7
|
+
# label_chars - list of characters that a label name can contain
|
8
|
+
# whitespace_chars - list of characters that qualify as whitespace / separators of words
|
9
|
+
# string_delimiters - a list of characters that denote the start and end of a string
|
10
|
+
#
|
11
|
+
|
1
12
|
module Kompiler
|
2
13
|
|
3
14
|
module Config
|
4
15
|
|
5
16
|
@keyword_chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["_", "."]
|
6
17
|
@label_chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["_", "."]
|
7
|
-
@whitespace_chars = [" ", "\t"]
|
18
|
+
@whitespace_chars = [" ", "\t", "\r"]
|
8
19
|
@string_delimiters = ['"', "'"]
|
9
20
|
|
10
21
|
# Returns the permittable keyword characters
|
data/lib/kompiler/directives.rb
CHANGED
@@ -1,6 +1,19 @@
|
|
1
1
|
# Copyright 2024 Kyrylo Shyshko
|
2
2
|
# Licensed under the Apache License, Version 2.0. See LICENSE file for details.
|
3
3
|
|
4
|
+
#
|
5
|
+
# Implements all directives available in programs.
|
6
|
+
#
|
7
|
+
# Kompiler::Directives.directives is a list of all available directives.
|
8
|
+
#
|
9
|
+
# An entry's structure is:
|
10
|
+
# keyword - string or list of strings specifying the keywords by which the directive can be accessed.
|
11
|
+
# func - a Ruby lambda that receives the call operands and current state as arguments, and outputs the new state.
|
12
|
+
# collect_operands - optional (default is true). Specifies whether the operands should be parsed before calling the :func key lambda.
|
13
|
+
# If false, operands will be a raw string containing the string after the keyword.
|
14
|
+
#
|
15
|
+
|
16
|
+
|
4
17
|
module Kompiler
|
5
18
|
|
6
19
|
module Directives
|
data/lib/kompiler/math_ast.rb
CHANGED
@@ -1,3 +1,31 @@
|
|
1
|
+
|
2
|
+
#
|
3
|
+
# Implements logic to parse math-like expressions into an ASTs.
|
4
|
+
#
|
5
|
+
# Main functions:
|
6
|
+
# str_to_ast - converts a raw string into an AST.
|
7
|
+
# run_ast - runs the AST created by str_to_ast.
|
8
|
+
#
|
9
|
+
# Config options are available in Kompiler::Parsers::SymAST::Config :
|
10
|
+
# word_begin_chars - a list of characters that a word can begin with
|
11
|
+
# word_chars - a list of characters that a word can contain
|
12
|
+
# number_begin_chars - a list of characters that a number can begin with
|
13
|
+
# number_chars - a list of characters that a number can contain
|
14
|
+
# whitespace_chars - a list of whitespace / separator characters
|
15
|
+
# parse_functions - a boolean specifying whether functions with syntax func(x + 2) should be parsed or throw an error
|
16
|
+
# sign_types - a list of available signs, their names and character sequences that qualify as the sign.
|
17
|
+
# Entries appearing earlier in the list are prioritized.
|
18
|
+
# one_element_ast_operations - a list of one element operations, their names, sign types, and checking direction (1 for left to right, -1 for right to left).
|
19
|
+
# For example, the negation "-x" is a one element operation, with the sign type "sub", and check_direction -1 (checks from right to left) because it is on the left of the 'x'
|
20
|
+
# Entries appearing earlier in the list are prioritized.
|
21
|
+
# two_element_ast_operations - a list of two element operation 'groups'. Groups are implemented to list operations on the same priority level with the same check direction.
|
22
|
+
# Each group has a check_direction (1 for left to right, -1 for opposite), and a list of operations in this group, their names and sign types, similar to one element operations.
|
23
|
+
# Entries appearing earlier in the list are prioritized.
|
24
|
+
# An example group could be multiplication and division. The check_direction will be 1, and there will be two operations (mul and div). This group will be below the power (a ** b) group.
|
25
|
+
# functions - a list of available functions in expressions in Kompiler programs
|
26
|
+
#
|
27
|
+
|
28
|
+
|
1
29
|
module Kompiler
|
2
30
|
|
3
31
|
module Parsers
|
@@ -662,4 +690,4 @@ end # Kompiler::Parsers::SymAST
|
|
662
690
|
|
663
691
|
end # Kompiler::Parsers
|
664
692
|
|
665
|
-
end # Kompiler
|
693
|
+
end # Kompiler
|
data/lib/kompiler/mc_builder.rb
CHANGED
@@ -1,6 +1,37 @@
|
|
1
1
|
# Copyright 2024 Kyrylo Shyshko
|
2
2
|
# Licensed under the Apache License, Version 2.0. See LICENSE file for details.
|
3
3
|
|
4
|
+
#
|
5
|
+
# Implements a custom AST structure and interpreter for instructions on how to build the instruction's machine code (MC).
|
6
|
+
#
|
7
|
+
# @MC_AST_NODES contains a list of all available instructions / AST nodes for building machine code.
|
8
|
+
# Each entry's structure is:
|
9
|
+
# name - contains the node's / instruction's name
|
10
|
+
# n_args - either an integer or "any". Contains the amount of arguments this instruction must receive.
|
11
|
+
# func - a lambda receiving the arguments and the current program's state as inputs. Should output the instruction's result.
|
12
|
+
# eval_args - optional, default true. Specifies whether to pre-evaluate the node's arguments.
|
13
|
+
#
|
14
|
+
# A MC instruction example:
|
15
|
+
# ["get_bits", ["get_current_address"], 0, 10]
|
16
|
+
# Which returns an array of ten integers, or bits, of the current address.
|
17
|
+
#
|
18
|
+
# Each MC instruction is in the form of an array with a string, the instruction's name, as the first element.
|
19
|
+
# All other elements will count as arguments.
|
20
|
+
# For most nodes, the arguments will be evaluated / computed before calling the node's logic. For example, in:
|
21
|
+
# ["get_bits", ["get_current_address"], 0, 10]
|
22
|
+
# ["get_current_address"] will be evaluted first, and then the result will be passed into get_bits. This is similar to a Ruby piece of code like this:
|
23
|
+
# get_bits(get_current_address(), 0, 10)
|
24
|
+
#
|
25
|
+
# In more special nodes that have eval_args = false, such as the if_eq_else node, the arguments aren't pre-evaluated, which is required in an if-statement scenario. E.g., an error shouldn't be thrown before a check that the error must be raised.
|
26
|
+
#
|
27
|
+
#
|
28
|
+
# Main functions are:
|
29
|
+
# build_mc - builds machine code from an input AST
|
30
|
+
# run_mc_ast - runs an MC AST node
|
31
|
+
# is_ast_node - returns if an object is an AST node, by checking whether it is an array with the first element being a string
|
32
|
+
#
|
33
|
+
|
34
|
+
|
4
35
|
module Kompiler
|
5
36
|
|
6
37
|
module MachineCode_AST
|
@@ -44,6 +75,13 @@ module MachineCode_AST
|
|
44
75
|
{name: "raise_warning", n_args: 1, func: lambda {|args, state| puts args[0]; [] } },
|
45
76
|
|
46
77
|
{name: "get_key", n_args: 2, func: lambda {|args, state| args[0].keys.include?(args[1]) ? args[0][args[1]] : raise("MC Constructor get_key Error: The key \"#{args[1]}\" doesn't exist - Program build not possible. This is likely a problem with the ISA configuration, not the program being compiled.") }},
|
78
|
+
|
79
|
+
# Concatenation of get_key and get_operand through get_key(get_operand(arg1), arg2)
|
80
|
+
{name: "get_operand_key", n_args: 2, func: lambda do |args, state|
|
81
|
+
op = state[:operands][args[0]][:value]
|
82
|
+
op.keys.include?(args[1]) ? op[args[1]] : raise("MC Constructor get_operand_key Error: key \"#{args[1]}\" doesn't exist. This is likely an error with the ISA configuration, not the program being compiled.")
|
83
|
+
end},
|
84
|
+
|
47
85
|
{name: "concat", n_args: "any", func: lambda {|args, state| args.flatten}},
|
48
86
|
{name: "set_var", n_args: 2, func: lambda {|args, state| state[:instruction_variables][args[0]] = args[1]; [] }},
|
49
87
|
{name: "get_var", n_args: 1, func: lambda {|args, state| state[:instruction_variables].keys.include?(args[0]) ? state[:instruction_variables][args[0]] : raise("Instruction variable \"#{args[0]}\" not found: Program build not possible. This is likely a program with the ISA configuration, not the program being compiled.") }},
|
@@ -54,6 +92,16 @@ module MachineCode_AST
|
|
54
92
|
# Bit manipulations
|
55
93
|
{name: "bit_and", n_args: 2, func: lambda {|args, state| args[0] & args[1] }},
|
56
94
|
{name: "bit_or", n_args: 2, func: lambda {|args, state| args[0] | args[1] }},
|
95
|
+
|
96
|
+
# Ensure equality between all arguments. Last argument provides the error message if not equal
|
97
|
+
{name: "ensure_eq", n_args: "any", func: lambda do |args, state|
|
98
|
+
args[1...-1].each do |arg|
|
99
|
+
if args[0] != arg
|
100
|
+
raise args.last
|
101
|
+
end
|
102
|
+
end
|
103
|
+
[]
|
104
|
+
end}
|
57
105
|
]
|
58
106
|
|
59
107
|
def self.is_ast_node(val)
|
data/lib/kompiler/parsers.rb
CHANGED
@@ -1,6 +1,27 @@
|
|
1
1
|
# Copyright 2024 Kyrylo Shyshko
|
2
2
|
# Licensed under the Apache License, Version 2.0. See LICENSE file for details.
|
3
3
|
|
4
|
+
#
|
5
|
+
# Implements generic parsers used everywhere and checks specific to the compilation process
|
6
|
+
#
|
7
|
+
# Functions:
|
8
|
+
# parse_str - parses a string definition from the input text, and returns the amount of characters parsed and the string's contents
|
9
|
+
# get_code_lines - parses the initial raw code text into lines, removing comments along the way
|
10
|
+
#
|
11
|
+
# Compilation specific functions:
|
12
|
+
# check_instruction - checks whether a line is a valid instruction with the current architecture (Kompiler::Architecture)
|
13
|
+
# check_directive - checks whether a line is a directive call
|
14
|
+
#
|
15
|
+
# parse_instruction_line - parses an instruction line into its keyword (string) and operands with their descriptions (e.g., type of operand, content, value)
|
16
|
+
# extract_instruction_parts - parses an instruction line into a string keyword and a list of operand definitions (used by parse_instruction_line)
|
17
|
+
# extract_instruction_operands - parses the string after the keyword to extract only the operand definitions (used by extract_instruction_parts)
|
18
|
+
# parse_operand_str - parses an operand definition (raw string) into its type, value, and other type-dependent information (uses check_register_operand, check_immediate_operand, check_expression_operand, check_label_operand)
|
19
|
+
# check_operand_match - checks whether an operand's info (returned by parse_operand_str) matches the input operand description. Operand descriptions are mostly stored in instruction files (e.g., lib/kompiler/architectures/armv8a/instructions.rb) in the :operands key
|
20
|
+
# match_parsed_line_to_instruction - checks whether a parsed instruction line (keyword + operand info) matches an instruction entry, mostly stored in instruction files (example one line above) (used by check_instruction)
|
21
|
+
#
|
22
|
+
#
|
23
|
+
|
24
|
+
|
4
25
|
module Kompiler
|
5
26
|
|
6
27
|
module Parsers
|