haxor 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +101 -335
- data/bin/{hcc → hc} +1 -9
- data/bin/hvm +13 -8
- data/examples/build.sh +1 -1
- data/examples/guess-the-number.hax +65 -76
- data/lib/haxor.rb +2 -14
- data/lib/haxor/compiler/core.rb +249 -18
- data/lib/haxor/consts.rb +4 -17
- data/lib/haxor/linker.rb +12 -4
- data/lib/haxor/token/cmd.rb +29 -2
- data/lib/haxor/utils.rb +24 -0
- data/lib/haxor/vm/core.rb +6 -9
- data/lib/haxor/vm/cpu/core.rb +214 -83
- data/lib/haxor/vm/mem.rb +1 -36
- data/lib/haxor/vm/os.rb +7 -13
- data/lib/haxor/vm/stack.rb +4 -4
- data/media/memory.png +0 -0
- metadata +5 -19
- data/lib/haxor/compiler/component/arithmetic.rb +0 -45
- data/lib/haxor/compiler/component/base.rb +0 -41
- data/lib/haxor/compiler/component/data.rb +0 -32
- data/lib/haxor/compiler/component/jumps.rb +0 -83
- data/lib/haxor/compiler/component/logical.rb +0 -35
- data/lib/haxor/compiler/component/other.rb +0 -21
- data/lib/haxor/compiler/component/transfer.rb +0 -25
- data/lib/haxor/compiler/component/various.rb +0 -30
- data/lib/haxor/vm/cpu/unit/arithmetic.rb +0 -78
- data/lib/haxor/vm/cpu/unit/base.rb +0 -46
- data/lib/haxor/vm/cpu/unit/jumps.rb +0 -118
- data/lib/haxor/vm/cpu/unit/logical.rb +0 -59
- data/lib/haxor/vm/cpu/unit/transfer.rb +0 -37
- data/lib/haxor/vm/cpu/unit/various.rb +0 -47
- data/media/vm.png +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9cd0c9814ac3e3f2fdef12190761ba4eba60b08
|
4
|
+
data.tar.gz: 6660137e743eb98ba7c64d3770d0326ea7e84710
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0d90ca99f25427569baef3b3f897351448449f32306a162f2566cac5ddb0bdc42bc8f72574bfd316d0e85af7d24b68ba23a2a97b79d9ce330eb263b8c0be6bc
|
7
|
+
data.tar.gz: d5451c7125f93d755b7decc9ebb7af83e0abf11ef11a8f5e4f0a9fda5d9a60891399fb7f4a3c2fe4c185f342d835e1a750d57c7a55b00443cfa3c29d3008200e
|
data/README.md
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
[](https://badge.fury.io/rb/haxor)
|
3
3
|
[](https://codeclimate.com/github/krzysztof-magosa/haxor)
|
4
4
|
|
5
|
-
Haxor consists of compiler
|
6
|
-
|
5
|
+
Haxor consists of compiler _hc_, linker _hld_ and virtual machine _hvm_.
|
6
|
+
_hc_ translates asm-like code into tokens, _hld_ links them into bytecode, while _hvm_ runs it.
|
7
7
|
|
8
8
|
## Man, why have you written that?
|
9
9
|
Writing own implementation of VM gives a lot of knowledge about
|
@@ -33,43 +33,30 @@ Haxor is licensed under BSD 3-clause license. You can read it [here](LICENSE.txt
|
|
33
33
|
|
34
34
|
## Architecture
|
35
35
|
|
36
|
-
<img src="media/vm.png" width="50%">
|
37
|
-
|
38
36
|
### General information
|
39
|
-
*
|
40
|
-
*
|
41
|
-
*
|
42
|
-
*
|
43
|
-
*
|
44
|
-
*
|
45
|
-
*
|
37
|
+
* Design: RISC
|
38
|
+
* Endianness: Little Endian
|
39
|
+
* WORD size: 64-bit
|
40
|
+
* Registers size: 64-bit
|
41
|
+
* Instruction: fixed size, 64-bit
|
42
|
+
* Arithmetic: integer only, 64-bit
|
43
|
+
* Memory model: flat, no protection
|
46
44
|
|
47
45
|
### OpCodes
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
46
|
+
Instruction is 64-bit, and contains:
|
47
|
+
* 0:6 bits - instruction code (7 bits, unsigned)
|
48
|
+
* 7:8 bits - flags (2 bits, unsigned, not used at the moment)
|
49
|
+
* 9:14 bits - register 1 (6 bits, unsigned)
|
50
|
+
* 15:20 bits - register 2 (6 bits, unsigned)
|
51
|
+
* 21:26 bits - register 3 (6 bits, unsigned)
|
52
|
+
* 27:63 bits - immediate value (37 bits, signed)
|
55
53
|
|
56
54
|
### vCPU
|
57
|
-
vCPU
|
58
|
-
*
|
59
|
-
*
|
60
|
-
*
|
61
|
-
*
|
62
|
-
* _dr_ - arithmetic register II
|
63
|
-
* _fr_ - flags register
|
64
|
-
* _sc_ - syscall register
|
65
|
-
* _op_ - currently processed opcode
|
66
|
-
* _r01-10_ - general usage registres
|
67
|
-
|
68
|
-
### Flags register
|
69
|
-
Flags register is 64 bit.
|
70
|
-
* bit 0 - ZERO (`A-B=0`, so numbers are equal)
|
71
|
-
* bit 1 - SIGN (result is negative)
|
72
|
-
* rest is reserved for future use
|
55
|
+
vCPU has 64 registers, some of them have special role:
|
56
|
+
* $0 - always zero register, writes are ignored
|
57
|
+
* $61 (alias $sp) - stack pointer
|
58
|
+
* $62 (alias $ret) - return address for linked jumps
|
59
|
+
* $63 (alias $sc) - syscall function id and return code
|
73
60
|
|
74
61
|
## Memory map
|
75
62
|
<img src="media/memory.png" width="50%">
|
@@ -77,305 +64,89 @@ Flags register is 64 bit.
|
|
77
64
|
## Language
|
78
65
|
Haxor uses primitive asm-like syntax. Each command goes into separate line.
|
79
66
|
You can add comments in code, but they also need to be separate lines, beginning
|
80
|
-
from _#_
|
81
|
-
For two-argument commands destination goes into first argument while source into second one.
|
82
|
-
In some commands you can dereference value by enclosing it in brackets (e.g. `[sp]`).
|
83
|
-
Program starts from _main_ label.
|
67
|
+
from _#_. Program starts from _main_ label. Labels are created by putting name and color on the end of line (e.g. `main:`).
|
84
68
|
|
85
|
-
|
86
|
-
|
87
|
-
```
|
69
|
+
Most of instructions take 3 registers or 2 registers and immediate value.
|
70
|
+
If not stated differently result goes to first specified register.
|
88
71
|
|
89
72
|
## Instructions
|
90
|
-
###
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
add
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
### Logical
|
148
|
-
#### and
|
149
|
-
Performs bitwise AND operation.
|
150
|
-
_A_ or/and _B_ can be dereferenced.
|
151
|
-
OpCode: 0x40.
|
152
|
-
```
|
153
|
-
and A, B
|
154
|
-
```
|
155
|
-
|
156
|
-
#### neg
|
157
|
-
Reverses the sign of number _A_.
|
158
|
-
_A_ can be dereferenced.
|
159
|
-
OpCode: 0x41.
|
160
|
-
```
|
161
|
-
neg A
|
162
|
-
```
|
163
|
-
|
164
|
-
#### not
|
165
|
-
Performs bitwise NOT operation.
|
166
|
-
_A_ can be dereferenced.
|
167
|
-
OpCode: 0x42.
|
168
|
-
```
|
169
|
-
not A
|
170
|
-
```
|
171
|
-
|
172
|
-
#### or
|
173
|
-
Performs bitwise OR operation.
|
174
|
-
_A_ or/and _B_ can be dereferenced.
|
175
|
-
OpCode: 0x43.
|
176
|
-
```
|
177
|
-
or A, B
|
178
|
-
```
|
179
|
-
|
180
|
-
#### xor
|
181
|
-
Performs bitwise XOR operation.
|
182
|
-
_A_ or/and _B_ can be dereferenced.
|
183
|
-
OpCode: 0x44.
|
184
|
-
```
|
185
|
-
xor A, B
|
186
|
-
```
|
187
|
-
|
188
|
-
### Transfer
|
189
|
-
#### mov
|
190
|
-
Moves data from _B_ to _A_.
|
191
|
-
_A_ or/and _B_ can be dereferenced.
|
192
|
-
OpCode: 0x60.
|
193
|
-
```
|
194
|
-
mov A, B
|
195
|
-
```
|
196
|
-
|
197
|
-
#### push
|
198
|
-
Places _A_ on top of stack.
|
199
|
-
OpCode: 0x61.
|
200
|
-
```
|
201
|
-
push A
|
202
|
-
```
|
203
|
-
|
204
|
-
#### pop
|
205
|
-
Removes element from top of the stack and into _A_.
|
206
|
-
OpCode: 0x62.
|
207
|
-
```
|
208
|
-
pop A
|
209
|
-
```
|
210
|
-
|
211
|
-
### Jumps
|
212
|
-
#### call
|
213
|
-
Places _ip_ register on the stack and jumps to _A_.
|
214
|
-
_A_ can be dereferenced.
|
215
|
-
OpCode: 0x20.
|
216
|
-
```
|
217
|
-
call A
|
218
|
-
```
|
219
|
-
|
220
|
-
#### ret
|
221
|
-
Pops _ip_ from the stack, and jumps to it.
|
222
|
-
OpCode: 0x2c.
|
223
|
-
```
|
224
|
-
ret
|
225
|
-
```
|
226
|
-
|
227
|
-
### iret
|
228
|
-
Comes back from interrupt.
|
229
|
-
OpCode: 0x2d.
|
230
|
-
```
|
231
|
-
iret
|
232
|
-
```
|
233
|
-
|
234
|
-
#### jmp
|
235
|
-
Performs unconditional jump to _A_.
|
236
|
-
_A_ can be dereferenced.
|
237
|
-
OpCode: 0x21.
|
238
|
-
```
|
239
|
-
jmp A
|
240
|
-
```
|
241
|
-
|
242
|
-
#### je
|
243
|
-
Jumps to _A_ if in _cmp_ _A_ is equal to _B_.
|
244
|
-
_A_ can be dereferenced.
|
245
|
-
OpCode: 0x22.
|
246
|
-
```
|
247
|
-
je A
|
248
|
-
```
|
249
|
-
|
250
|
-
#### jg
|
251
|
-
Jumps to _A_ if in _cmp_ _A_ is greater than _B_.
|
252
|
-
_A_ can be dereferenced.
|
253
|
-
OpCode: 0x23.
|
254
|
-
```
|
255
|
-
jg A
|
256
|
-
```
|
257
|
-
|
258
|
-
#### jge
|
259
|
-
Jumps to _A_ if in _cmp_ _A_ is greater or equal to _B_.
|
260
|
-
_A_ can be dereferenced.
|
261
|
-
OpCode: 0x24.
|
262
|
-
```
|
263
|
-
jge A
|
264
|
-
```
|
265
|
-
|
266
|
-
#### jl
|
267
|
-
Jumps to _A_ if in _cmp_ _A_ is less than _B_.
|
268
|
-
_A_ can be dereferenced.
|
269
|
-
OpCode: 0x25.
|
270
|
-
```
|
271
|
-
jl A
|
272
|
-
```
|
273
|
-
|
274
|
-
#### jle
|
275
|
-
Jumps to _A_ if in _cmp_ _A_ is less or equal to _B_.
|
276
|
-
_A_ can be dereferenced.
|
277
|
-
OpCode: 0x26.
|
278
|
-
```
|
279
|
-
jle A
|
280
|
-
```
|
281
|
-
|
282
|
-
#### jne
|
283
|
-
Jumps to _A_ if in _cmp_ _A_ is not equal to _B_.
|
284
|
-
_A_ can be dereferenced.
|
285
|
-
OpCode: 0x27.
|
286
|
-
```
|
287
|
-
jne A
|
288
|
-
```
|
289
|
-
|
290
|
-
#### jng
|
291
|
-
Jumps to _A_ if in _cmp_ _A_ is not greater than _B_.
|
292
|
-
_A_ can be dereferenced.
|
293
|
-
OpCode: 0x28.
|
294
|
-
```
|
295
|
-
jng A
|
296
|
-
```
|
297
|
-
|
298
|
-
#### jnge
|
299
|
-
Jumps to _A_ if in _cmp_ _A_ is not greater or equal to _B_.
|
300
|
-
_A_ can be dereferenced.
|
301
|
-
OpCode: 0x29.
|
302
|
-
```
|
303
|
-
jnge A
|
304
|
-
```
|
305
|
-
|
306
|
-
#### jnl
|
307
|
-
Jumps to _A_ if in _cmp_ _A_ is not less than _B_.
|
308
|
-
_A_ can be dereferenced.
|
309
|
-
OpCode: 0x2a.
|
310
|
-
```
|
311
|
-
jnl A
|
312
|
-
```
|
313
|
-
|
314
|
-
#### jnle
|
315
|
-
Jumps to _A_ if in _cmp_ _A_ is not less or equal to _B_.
|
316
|
-
_A_ can be dereferenced.
|
317
|
-
OpCode: 0x2b.
|
318
|
-
```
|
319
|
-
jnle A
|
320
|
-
```
|
321
|
-
|
322
|
-
### Various
|
323
|
-
#### lea
|
324
|
-
Pushes address of _B_ into _A_.
|
325
|
-
OpCode: 0x80.
|
326
|
-
```
|
327
|
-
lea A, B
|
328
|
-
```
|
329
|
-
|
330
|
-
#### nop
|
331
|
-
Does nothing.
|
332
|
-
OpCode: 0x81.
|
333
|
-
```
|
334
|
-
nop
|
335
|
-
```
|
336
|
-
|
337
|
-
#### int
|
338
|
-
Generate software interrupt with ID specified by _A_.
|
339
|
-
_A_ can be dereferenced.
|
340
|
-
OpCode: 0x85.
|
341
|
-
```
|
342
|
-
int A
|
343
|
-
```
|
344
|
-
|
345
|
-
#### syscall
|
346
|
-
Asks Haxor VM to do "system" call. OpCode: 0x86.
|
347
|
-
```
|
348
|
-
syscall
|
349
|
-
```
|
73
|
+
### Native instructions
|
74
|
+
|Syntax|OpCode|Description|
|
75
|
+
|------|------|-----------|
|
76
|
+
|nop |0x00|Does nothing.|
|
77
|
+
|exiti imm |0x01|Closes VM with specified exit code.|
|
78
|
+
|syscall |0x02|Performs Syscall with ID stored in $sc register.|
|
79
|
+
|add reg1, reg2, reg3 |0x10|reg1 = reg2 + reg3|
|
80
|
+
|addi reg1, reg2, imm |0x11|reg1 = reg2 + imm|
|
81
|
+
|sub reg1, reg2, reg3 |0x12|reg1 = reg2 - reg3|
|
82
|
+
|mult reg1, reg2, reg3|0x13|reg1 = reg2 * reg3|
|
83
|
+
|div reg1, reg2, reg3 |0x14|reg1 = reg2 / reg3|
|
84
|
+
|mod reg1, reg2, reg3 |0x15|reg1 = reg2 % reg3|
|
85
|
+
|lw reg1, reg2, imm |0x20|reg1 = memory[reg2 + imm]|
|
86
|
+
|sw reg1, imm, reg2 |0x21|memory[reg1+imm] = reg2|
|
87
|
+
|lui reg1, imm |0x22|reg1 = (imm << 32)|
|
88
|
+
|and reg1, reg2, reg3 |0x30|reg1 = reg2 & reg3|
|
89
|
+
|andi reg1, reg2, imm |0x31|reg1 = reg2 & imm|
|
90
|
+
|or reg1, reg2, reg3 |0x32|reg1 = reg2 \| reg3|
|
91
|
+
|ori reg1, reg2, imm |0x33|reg1 = reg2 \| imm|
|
92
|
+
|xor reg1, reg2, reg3 |0x34|reg1 = reg2 ^ reg3|
|
93
|
+
|nor reg1, reg2, reg3 |0x35|reg1 = ~(reg2 \| reg3)|
|
94
|
+
|slt reg1, reg2, reg3 |0x36|reg1 = reg2 < reg3|
|
95
|
+
|slti reg1, reg2, imm |0x37|reg1 = reg2 < imm|
|
96
|
+
|slli reg1, reg2, imm |0x40|reg1 = reg2 << imm|
|
97
|
+
|srli reg1, reg2, imm |0x41|reg1 = reg2 >> imm|
|
98
|
+
|sll reg1, reg2, reg3 |0x42|reg1 = reg2 << reg3|
|
99
|
+
|srl reg1, reg2, reg3 |0x43|reg1 = reg2 >> reg3|
|
100
|
+
|beq reg1, reg2, imm |0x50|goto imm if reg1 == reg2|
|
101
|
+
|beql reg1, reg2, imm |0x51|$ret = pc, goto imm if reg1 == reg2|
|
102
|
+
|bne reg1, reg2, imm |0x52|goto imm if reg1 != reg2|
|
103
|
+
|bnel reg1, reg2, imm |0x53|$ret = pc, goto imm if reg1 != reg2|
|
104
|
+
|j imm |0x54|goto imm|
|
105
|
+
|jr reg1 |0x55|goto reg1|
|
106
|
+
|jal imm |0x56|$ret = pc, goto imm|
|
107
|
+
|
108
|
+
### Pseudo instructions
|
109
|
+
|Syntax|Description|
|
110
|
+
|------|-----------|
|
111
|
+
|push reg1 |Pushes register onto stack|
|
112
|
+
|pushi imm |Pushes const onto stack|
|
113
|
+
|pushm imm |Pushes word stored at specified address|
|
114
|
+
|pop reg1 |Pops value into register|
|
115
|
+
|popm imm |Pops value into specified address|
|
116
|
+
|move reg1, reg2 |reg1 = reg2|
|
117
|
+
|clear reg1 |reg1 = 0|
|
118
|
+
|not reg1, reg2 |reg1 = ~reg2|
|
119
|
+
|ret |Jumps to address stored in $ret|
|
120
|
+
|b imm |Unconditional branch|
|
121
|
+
|bal imm |Unconditional linked branch|
|
122
|
+
|bgt reg1, reg2, imm|goto imm if reg1 > reg2|
|
123
|
+
|blt reg1, reg2, imm|goto imm if reg1 < reg2|
|
124
|
+
|bge reg1, reg2, imm|goto imm if reg1 >= reg2|
|
125
|
+
|ble reg1, reg2, imm|goto imm if reg1 <= reg2|
|
126
|
+
|blez reg1, imm |goto imm if reg1 <= 0|
|
127
|
+
|bgtz reg1, imm |goto imm if reg1 > 0|
|
128
|
+
|beqz reg1, imm |goto imm if reg1 == 0|
|
350
129
|
|
351
130
|
## System calls
|
352
131
|
Using _syscall_ command you can run some system calls provided by Haxor VM.
|
353
|
-
System call number is passed via
|
354
|
-
|
355
|
-
### exit (01h)
|
356
|
-
Terminates VM process with specified exit code.
|
357
|
-
Takes 1 argument:
|
358
|
-
* exit code
|
359
|
-
```
|
360
|
-
push 100
|
361
|
-
mov sc, 01h
|
362
|
-
syscall
|
363
|
-
```
|
132
|
+
System call number is passed via _$sc_ register, arguments go via stack in reversed order.
|
364
133
|
|
365
|
-
### printf (
|
134
|
+
### printf (01h)
|
366
135
|
Prints formatted text into file specified by descriptor.
|
367
136
|
Takes 2 or more arguments:
|
368
137
|
* file descriptor (1 for standard output, 2 for standard error)
|
369
138
|
* format string
|
370
139
|
* data depending on format string...
|
140
|
+
|
141
|
+
Example:
|
371
142
|
```
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
143
|
+
addi $sc, $0, 01h
|
144
|
+
pushi msg_fmt
|
145
|
+
pushi 1
|
146
|
+
syscall
|
376
147
|
```
|
377
148
|
|
378
|
-
### scanf (
|
149
|
+
### scanf (02h)
|
379
150
|
Converts data from file specified by descriptor.
|
380
151
|
Remember that memory is not automatically
|
381
152
|
allocated by this function. You need to prepare
|
@@ -386,33 +157,28 @@ Takes 2 or more arguments:
|
|
386
157
|
* file descriptor (0 for standard input)
|
387
158
|
* format string
|
388
159
|
* addresses in memory to put data into them...
|
389
|
-
```
|
390
|
-
section .data
|
391
|
-
dw scanfmt, "%100s", 0
|
392
160
|
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
lea r01, scanfmt
|
400
|
-
push r01
|
401
|
-
push 0
|
402
|
-
mov sc, 03h
|
161
|
+
Example:
|
162
|
+
```
|
163
|
+
addi $sc, $0, 02h
|
164
|
+
pushi answer
|
165
|
+
pushi format
|
166
|
+
pushi 0
|
403
167
|
syscall
|
404
168
|
```
|
405
169
|
|
406
|
-
### random (
|
170
|
+
### random (03h)
|
407
171
|
Generates random integer from specified range.
|
408
172
|
Arguments:
|
409
173
|
* minimum (inclusive)
|
410
174
|
* maximum (inclusive)
|
411
175
|
|
412
176
|
Generated number is pushed onto stack.
|
177
|
+
|
178
|
+
Example:
|
413
179
|
```
|
414
|
-
|
415
|
-
|
416
|
-
|
180
|
+
addi $sc, $0, 03h
|
181
|
+
pushi 100
|
182
|
+
pushi 1
|
417
183
|
syscall
|
418
184
|
```
|