kompiler 0.3.0.pre.3 → 0.3.0.pre.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,665 @@
1
+ module Kompiler
2
+
3
+ module Parsers
4
+
5
+
6
+ module SymAST
7
+
8
+ module Config
9
+
10
+ # Word begin characters are characters from which a word can begin (right now, everything except numbers)
11
+ @word_begin_chars = ("a".."z").to_a + ("A".."Z").to_a + ["_"]
12
+ # Word characters are characters that the word can contain (excluding the first character)
13
+ @word_chars = @word_begin_chars + ("0".."9").to_a
14
+
15
+ # Number begin characters. Same as word_begin_chars but for numbers
16
+ @number_begin_chars = ("0".."9").to_a
17
+ # Number characters. Same as word_chars but for numbers
18
+ @number_chars = ("0".."9").to_a + ["."]
19
+
20
+ # Whitespace characters
21
+ @whitespace_chars = [" ", "\t"]
22
+
23
+ class <<self
24
+ attr_accessor :word_begin_chars, :word_chars, :number_begin_chars, :number_chars, :whitespace_chars
25
+ end
26
+
27
+
28
+
29
+ # Include function operations parsing (e.g., func(x + 2) )
30
+ @parse_functions = true
31
+
32
+ class <<self
33
+ attr_accessor :parse_functions
34
+ end
35
+
36
+
37
+ @sign_types = [
38
+ {name: "open_bracket", chars: ["("]},
39
+ {name: "close_bracket", chars: [")"]},
40
+ {name: "power", chars: ["**"]},
41
+ {name: "div", chars: ["/"]},
42
+ {name: "mul", chars: ["*"]},
43
+ {name: "add", chars: ["+"]},
44
+ {name: "sub", chars: ["-"]},
45
+ {name: "shift_left", chars: ["<<"]},
46
+ {name: "shift_right", chars: [">>"]},
47
+ {name: "or", chars: ["|"]},
48
+ {name: "and", chars: ["&"]},
49
+ {name: "modulo_sign", chars: ["%"]},
50
+ {name: "equal_sign", chars: ["=="]},
51
+ {name: "not_equal_sign", chars: ["!="]},
52
+ {name: "less_or_eq_sign", chars: ["<="]},
53
+ {name: "greater_or_eq_sign", chars: [">="]},
54
+ {name: "less_than_sign", chars: ["<"]},
55
+ {name: "greater_than_sign", chars: [">"]},
56
+
57
+ {name: "exclamation_mark", chars: ["!"]},
58
+ ]
59
+
60
+ # One element operations (e.g., negate operation as "-x" or factorial as "x!"). Elements earlier have higher priority
61
+ # check_direction means whether parsing should start from the left (-1) or from the right (1)
62
+ @one_element_ast_operations = [
63
+ {name: "negate", sign_type: "sub", check_direction: -1},
64
+ {name: "factorial", sign_type: "exclamation_mark", check_direction: 1},
65
+ ]
66
+
67
+ # # Two element operations (e.g., division as "a / b"). Elements earlier have higher priority
68
+ # # check_direction means whether parsing should start from the left (-1) or from the right (1)
69
+ # @two_element_ast_operations = [
70
+ # {name: "power", sign_type: "power", check_direction: -1},
71
+ # {name: "div", sign_type: "div", check_direction: 1},
72
+ # {name: "mul", sign_type: "mul", check_direction: 1},
73
+ # {name: "add", sign_type: "add", check_direction: 1},
74
+ # {name: "sub", sign_type: "sub", check_direction: 1},
75
+ # {name: "bitshift_left", sign_type: "shift_left", check_direction: 1},
76
+ # {name: "bitshift_right", sign_type: "shift_right", check_direction: 1},
77
+ # {name: "bit_or", sign_type: "or", check_direction: 1},
78
+ # {name: "bit_and", sign_type: "and", check_direction: 1},
79
+ # ]
80
+
81
+ # Two element operations (e.g., division as "a / b"). Elements earlier have higher priority
82
+ # check_direction means whether parsing should start from the left (-1) or from the right (1)
83
+ @two_element_ast_operations = [
84
+ {
85
+ group_check_direction: -1,
86
+ group_operations: [
87
+ {name: "power", sign_type: "power"},
88
+ ]
89
+ },
90
+ {
91
+ group_check_direction: 1,
92
+ group_operations: [
93
+ {name: "modulo", sign_type: "modulo_sign"},
94
+ ]
95
+ },
96
+ {
97
+ group_check_direction: 1,
98
+ group_operations: [
99
+ {name: "div", sign_type: "div"},
100
+ {name: "mul", sign_type: "mul"},
101
+ ]
102
+ },
103
+ {
104
+ group_check_direction: 1,
105
+ group_operations: [
106
+ {name: "bitshift_left", sign_type: "shift_left"},
107
+ {name: "bitshift_right", sign_type: "shift_right"},
108
+ {name: "bit_or", sign_type: "or"},
109
+ {name: "bit_and", sign_type: "and"},
110
+ ]
111
+ },
112
+ {
113
+ group_check_direction: 1,
114
+ group_operations: [
115
+ {name: "add", sign_type: "add"},
116
+ {name: "sub", sign_type: "sub"},
117
+ ]
118
+ },
119
+ {
120
+ group_check_direction: 1,
121
+ group_operations: [
122
+ {name: "equal", sign_type: "equal_sign"},
123
+ {name: "not_equal", sign_type: "not_equal_sign"},
124
+ {name: "less_than", sign_type: "less_than_sign"},
125
+ {name: "greater_than", sign_type: "less_than_sign"},
126
+ {name: "less_or_eq", sign_type: "less_or_eq_sign"},
127
+ {name: "greater_or_eq", sign_type: "greater_or_eq_sign"},
128
+ ]
129
+ },
130
+ ]
131
+
132
+
133
+ @functions = {
134
+ "len" => lambda do |arg|
135
+ if !arg.is_a?(String) then raise "Math AST len() error 1" end
136
+ return arg.size
137
+ end,
138
+ "floor" => lambda do |arg|
139
+ if !arg.is_a?(Numeric) then raise "Math AST floor() error 1" end
140
+ return arg.floor
141
+ end
142
+ }
143
+
144
+ class <<self
145
+ attr_accessor :sign_types, :one_element_ast_operations, :two_element_ast_operations, :functions
146
+ end
147
+ end
148
+
149
+
150
+ def self.str_to_tokens str
151
+
152
+ tokens = []
153
+
154
+ char_i = 0
155
+
156
+ # Types of available signs (the first one is prioritized)
157
+ sign_types = Config.sign_types
158
+
159
+
160
+ full_word = ""
161
+
162
+ while char_i < str.size
163
+
164
+
165
+ # Check if the character is a whitespace
166
+ if Config.whitespace_chars.include?(str[char_i])
167
+ char_i += 1 # Move to the next character
168
+ next # Skip
169
+ end
170
+
171
+ cut_str = str[char_i..]
172
+
173
+ # Check if the current position is a math sign
174
+
175
+ sign_found = false
176
+ str_found = false
177
+
178
+ sign_types.each do |sign|
179
+ sign[:chars].each do |seq|
180
+ if cut_str.start_with? seq
181
+ # Here when the sign matched
182
+ tokens << {type: "sign", sign_type: sign[:name], match_seq: seq}
183
+ char_i += seq.size
184
+ sign_found = true
185
+ end
186
+ break if sign_found
187
+ end
188
+ break if sign_found
189
+ end
190
+
191
+
192
+ if !sign_found && Kompiler::Config.string_delimiters.include?(str[char_i])
193
+ str_content, len_parsed = Kompiler::Parsers.parse_str(cut_str)
194
+
195
+
196
+ case str[char_i]
197
+ when '"'
198
+ tokens << {type: "string", str_content: str_content}
199
+ when "'"
200
+ if str_content.size != 1 then raise "Math AST parse error - a character definition cannot be longer than 1" end
201
+ full_str = str[char_i...(char_i + len_parsed)]
202
+ tokens << {type: "number", number_content: full_str}
203
+ end
204
+
205
+ str_found = true
206
+ char_i += len_parsed
207
+ end
208
+
209
+
210
+ if sign_found || str_found
211
+ next if full_word.size == 0
212
+
213
+ is_imm, imm_value = Kompiler::Parsers.check_immediate_operand(full_word)
214
+ if is_imm
215
+ tokens.insert -2, {type: "number", number_content: full_word, number_value: imm_value[:value]}
216
+ full_word = ""
217
+ next
218
+ end
219
+
220
+ if Config.word_begin_chars.include?(full_word[0]) && !(full_word[1..].each_char.map{|c| Config.word_chars.include?(c)}.include?(false))
221
+ tokens.insert -2, {type: "word", word_content: full_word}
222
+ full_word = ""
223
+ next
224
+ end
225
+
226
+ raise "Math AST Error 1"
227
+ else
228
+ full_word << str[char_i]
229
+ char_i += 1
230
+ end
231
+
232
+ next
233
+
234
+
235
+
236
+ next_word = ""
237
+ word_char_i = char_i.dup
238
+
239
+ while word_char_i < str.size && !Config.whitespace_chars.include?(str[word_char_i])
240
+ next_word += str[word_char_i]
241
+ word_char_i += 1
242
+ end
243
+
244
+ is_imm, imm_value = Kompiler::Parsers.check_immediate_operand(next_word)
245
+
246
+ if is_imm
247
+ tokens << {type: "number", number_content: next_word, number_value: imm_value[:value]}
248
+ char_i = word_char_i
249
+ next
250
+ end
251
+
252
+ # if Config.number_begin_chars.include?(str[char_i])
253
+ # full_number = str[char_i]
254
+ # char_i += 1
255
+ #
256
+ # while char_i < str.size && Config.number_chars.include?(str[char_i])
257
+ # full_number << str[char_i]
258
+ # char_i += 1
259
+ # end
260
+ #
261
+ # tokens << {type: "number", number_content: full_number}
262
+ #
263
+ # next
264
+ # end
265
+
266
+
267
+ if Config.word_begin_chars.include?(str[char_i])
268
+ full_word = str[char_i]
269
+ char_i += 1
270
+
271
+ while char_i < str.size && Config.word_chars.include?(str[char_i])
272
+ full_word << str[char_i]
273
+ char_i += 1
274
+ end
275
+
276
+ tokens << {type: "word", word_content: full_word}
277
+
278
+ next
279
+ end
280
+
281
+
282
+ # Here when non of the checks worked
283
+ raise "\"#{str}\" - unrecognized syntax at position #{char_i}"
284
+ end
285
+
286
+ a = 0
287
+ while a != 1
288
+ a = 1
289
+ if full_word.size > 0
290
+ is_imm, imm_value = Kompiler::Parsers.check_immediate_operand(full_word)
291
+ if is_imm
292
+ tokens.insert -1, {type: "number", number_content: full_word, number_value: imm_value[:value]}
293
+ full_word = ""
294
+ next
295
+ end
296
+
297
+ if Config.word_begin_chars.include?(full_word[0]) && !(full_word[1..].each_char.map{|c| Config.word_chars.include?(c)}.include?(false))
298
+ tokens.insert -1, {type: "word", word_content: full_word}
299
+ full_word = ""
300
+ next
301
+ end
302
+
303
+ raise "Math AST Error 2"
304
+ end
305
+ end
306
+
307
+
308
+ tokens
309
+ end
310
+
311
+
312
+ # A recursive function that makes blocks (bracket enclosed) into single tokens
313
+ def self.parse_blocks_from_tokens tokens
314
+
315
+ final_tokens = []
316
+
317
+ token_i = 0
318
+
319
+ while token_i < tokens.size
320
+
321
+ token = tokens[token_i]
322
+
323
+ if !(token[:type] == "sign" && ["open_bracket", "close_bracket"].include?(token[:sign_type]))
324
+ final_tokens << token
325
+ token_i += 1
326
+ next
327
+ end
328
+
329
+ if token[:sign_type] == "close_bracket"
330
+ raise "Parsing error - unexpected close bracket at token #{token_i}"
331
+ end
332
+
333
+ # Set up a bracket count that counts the bracket level (zero means 'absolute' / ground level)
334
+ bracket_count = 1
335
+ block_end_i = token_i + 1
336
+
337
+ while block_end_i < tokens.size && bracket_count != 0
338
+ if tokens[block_end_i][:type] != "sign"
339
+ block_end_i += 1
340
+ next
341
+ end
342
+
343
+ case tokens[block_end_i][:sign_type]
344
+ when "open_bracket"
345
+ bracket_count += 1
346
+ when "close_bracket"
347
+ bracket_count -= 1
348
+ end
349
+
350
+ block_end_i += 1
351
+ end
352
+
353
+ raise "Parsing error - Bracket amount does not match" if bracket_count != 0
354
+
355
+ block_tokens = tokens[(token_i + 1)...(block_end_i - 1)]
356
+
357
+ parsed_block_tokens = parse_blocks_from_tokens(block_tokens)
358
+ parsed_block_tokens = parse_functions_from_tokens(parsed_block_tokens)
359
+
360
+ final_tokens << {type: "block", content: parsed_block_tokens}
361
+
362
+ token_i = block_end_i
363
+ end
364
+
365
+ final_tokens
366
+
367
+ end
368
+
369
+
370
+ def self.parse_functions_from_tokens tokens
371
+
372
+ final_tokens = []
373
+
374
+ token_i = 0
375
+
376
+ while token_i < (tokens.size - 1)
377
+ token = tokens[token_i]
378
+
379
+ if !(token[:type] == "word" && tokens[token_i + 1][:type] == "block")
380
+ token_i += 1
381
+ final_tokens << token
382
+ next
383
+ end
384
+
385
+ final_tokens << {type: "func", func_name: token[:word_content], func_arg_block: tokens[token_i + 1]}
386
+ token_i += 2
387
+ end
388
+
389
+ final_tokens << tokens.last
390
+
391
+ final_tokens
392
+ end
393
+
394
+
395
+ def self.tokens_to_ast tokens
396
+
397
+
398
+ # Swap words and numbers for operations of type word and number
399
+ token_i = 0
400
+
401
+ while token_i < tokens.size
402
+ token = tokens[token_i]
403
+
404
+ if !["word", "number", "block", "string", "func"].include?(token[:type])
405
+ token_i += 1
406
+ next
407
+ end
408
+
409
+ case token[:type]
410
+ when "word"
411
+ tokens[token_i] = {type: "operation", op_type: "word", elements: [token[:word_content]]}
412
+ when "number"
413
+ tokens[token_i] = {type: "operation", op_type: "number", elements: [token[:number_content]]}
414
+ when "string"
415
+ tokens[token_i] = {type: "operation", op_type: "string", elements: [token[:str_content]]}
416
+ when "block"
417
+ tokens[token_i] = tokens_to_ast(token[:content])
418
+ when "func"
419
+ tokens[token_i] = {type: "operation", op_type: "func", elements: [token[:func_name], tokens_to_ast(token[:func_arg_block][:content])]}
420
+ end
421
+
422
+ token_i += 1
423
+ end
424
+
425
+ # Check for negation operations of type "-x"
426
+
427
+ one_element_ast_ops = Config.one_element_ast_operations
428
+
429
+ one_element_ast_ops.each do |operation|
430
+
431
+ if operation[:check_direction] == -1
432
+ token_i = tokens.size - 1
433
+ token_i_change = -1
434
+ check_condition = -> {token_i >= 0}
435
+ check_boundary = 0
436
+ elsif operation[:check_direction] == 1
437
+ token_i = 0
438
+ token_i_change = 1
439
+ check_condition = -> {token_i < tokens.size}
440
+ check_boundary = tokens.size - 1
441
+ end
442
+
443
+ while check_condition.call
444
+ token = tokens[token_i]
445
+
446
+ if token[:type] != "sign"
447
+ token_i += token_i_change
448
+ next
449
+ end
450
+
451
+ if token[:sign_type] != operation[:sign_type]
452
+ token_i += token_i_change
453
+ next
454
+ end
455
+
456
+ # Check if this is the first token (and a minus sign), which means "[-]x"
457
+ # Or check if this token is preceded by another sign, e.g. "+[-]x"
458
+ if token_i == check_boundary || ["sign"].include?(tokens[token_i + token_i_change][:type])
459
+ ast_node = {type: "operation", op_type: operation[:name], elements: [tokens[token_i - token_i_change]]}
460
+ if token_i_change == -1
461
+ tokens = tokens[...token_i] + [ast_node] + tokens[(token_i + 1 + 1)..]
462
+ elsif token_i_change == 1
463
+ tokens = tokens[...(token_i - 1)] + [ast_node] + tokens[(token_i + 1)..]
464
+ end
465
+ check_boundary -= token_i_change
466
+ next
467
+ end
468
+
469
+ token_i += token_i_change
470
+ end
471
+
472
+ end
473
+
474
+
475
+ # Math AST operations sorted in priority order
476
+ two_element_ast_ops = Config.two_element_ast_operations
477
+
478
+ two_element_ast_ops.each do |operations_group|
479
+
480
+ if operations_group[:group_check_direction] == -1
481
+ token_i = tokens.size - 1
482
+ token_i_change = -1
483
+ check_condition = -> {token_i >= 0}
484
+ elsif operations_group[:group_check_direction] == 1
485
+ token_i = 0
486
+ token_i_change = 1
487
+ check_condition = -> {token_i < tokens.size}
488
+ end
489
+
490
+ while check_condition.call
491
+ token = tokens[token_i]
492
+
493
+ if token[:type] != "sign"
494
+ token_i += token_i_change
495
+ next
496
+ end
497
+
498
+ operation_found = false
499
+
500
+ operations_group[:group_operations].each do |operation|
501
+ if token[:sign_type] != operation[:sign_type]
502
+ next
503
+ end
504
+
505
+ elements = [tokens[token_i - 1], tokens[token_i + 1]]
506
+
507
+ # Check if there are some non-operation elements, which shouldn't happen
508
+ raise "Parsing error - something went wrong, compute elements were not operations" if elements.filter{|e| e[:type] != "operation"}.size > 0
509
+
510
+ operation_found = true
511
+
512
+ ast_node = {type: "operation", op_type: operation[:name], elements: }
513
+
514
+ tokens = tokens[...(token_i - 1)] + [ast_node] + tokens[(token_i + 1 + 1)..]
515
+
516
+ # token_i += token_i_change
517
+
518
+ break
519
+ end
520
+
521
+ if !operation_found
522
+ token_i += token_i_change
523
+ next
524
+ end
525
+
526
+ # if token[:sign_type] != operation[:sign_type]
527
+ # token_i += token_i_change
528
+ # next
529
+ # end
530
+ #
531
+ # elements = [tokens[token_i - 1], tokens[token_i + 1]]
532
+ #
533
+ # # Check if there are some non-operation elements, which shouldn't happen
534
+ # raise "Parsing error - something went wrong, compute elements were not operations" if elements.filter{|e| e[:type] != "operation"}.size > 0
535
+ #
536
+ # ast_node = {type: "operation", op_type: operation[:name], elements: }
537
+ #
538
+ # tokens = tokens[...(token_i - 1)] + [ast_node] + tokens[(token_i + 1 + 1)..]
539
+ #
540
+ # token_i += token_i_change
541
+ end
542
+
543
+ end
544
+
545
+
546
+ raise "Parsing error - something went wrong, tokens should've collapsed into a single AST, but didn't :(" if tokens.size != 1
547
+
548
+ tokens[0]
549
+ end
550
+
551
+ def self.token_ast_to_ast(token_ast)
552
+ final_ast = Hash.new
553
+
554
+ final_ast[:type] = token_ast[:op_type]
555
+
556
+ elements = token_ast[:elements]
557
+
558
+ elements.map! do |el|
559
+ if el.is_a?(Hash) && el.keys.include?(:type) && el[:type] == "operation"
560
+ el = token_ast_to_ast(el)
561
+ end
562
+ el
563
+ end
564
+
565
+ final_ast[:elements] = elements
566
+
567
+ final_ast
568
+ end
569
+
570
+
571
+ def self.str_to_ast str
572
+ tokens = str_to_tokens(str)
573
+
574
+ tokens = parse_blocks_from_tokens(tokens)
575
+
576
+ tokens = parse_functions_from_tokens(tokens) if Config.parse_functions
577
+
578
+ token_ast = tokens_to_ast(tokens)
579
+
580
+ ast = token_ast_to_ast(token_ast)
581
+
582
+ ast
583
+ end
584
+
585
+ # Strange looking thing to create an alias for str_to_ast
586
+ class <<self
587
+ alias_method :parse, :str_to_ast
588
+ end
589
+
590
+
591
+ def self.run_ast ast, words=Hash.new, functions=Hash.new
592
+
593
+ # p ast
594
+
595
+ case ast[:type]
596
+ when "word"
597
+ return words[ast[:elements][0]]
598
+ when "number"
599
+ # if ast[:elements][0].include?(".")
600
+ # return ast[:elements][0].to_f
601
+ # else
602
+ # return ast[:elements][0].to_i
603
+ # end
604
+ is_num, imm_value = Kompiler::Parsers.check_immediate_operand(ast[:elements][0])
605
+ raise "AST recognition error - \"#{ast[:elements][0]}\" is not a number" if !is_num
606
+
607
+ return imm_value[:value]
608
+ when "string"
609
+ return ast[:elements][0]
610
+ when "func"
611
+ func_name = ast[:elements][0]
612
+ return Config.functions[func_name].call(run_ast(ast[:elements][1]))
613
+ when "add"
614
+ return run_ast(ast[:elements][0], words, functions) + run_ast(ast[:elements][1], words, functions)
615
+ when "sub"
616
+ return run_ast(ast[:elements][0], words, functions) - run_ast(ast[:elements][1], words, functions)
617
+ when "mul"
618
+ return run_ast(ast[:elements][0], words, functions) * run_ast(ast[:elements][1], words, functions)
619
+ when "div"
620
+ return run_ast(ast[:elements][0], words, functions) / run_ast(ast[:elements][1], words, functions)
621
+ when "power"
622
+ return run_ast(ast[:elements][0], words, functions) ** run_ast(ast[:elements][1], words, functions)
623
+ when "negate"
624
+ return -run_ast(ast[:elements][0], words, functions)
625
+ when "bitshift_left"
626
+ return run_ast(ast[:elements][0], words, functions) << run_ast(ast[:elements][1], words, functions)
627
+ when "bitshift_right"
628
+ return run_ast(ast[:elements][0], words, functions) >> run_ast(ast[:elements][1], words, functions)
629
+ when "bit_or"
630
+ return run_ast(ast[:elements][0], words, functions) | run_ast(ast[:elements][1], words, functions)
631
+ when "bit_and"
632
+ return run_ast(ast[:elements][0], words, functions) & run_ast(ast[:elements][1], words, functions)
633
+ when "factorial"
634
+ res = 1
635
+ lim = run_ast(ast[:elements][0], words, functions)
636
+ (1..lim).each do |n|
637
+ res *= n
638
+ end
639
+ return res
640
+ when "modulo"
641
+ return run_ast(ast[:elements][0], words, functions) % run_ast(ast[:elements][1], words, functions)
642
+
643
+ when "equal"
644
+ return (run_ast(ast[:elements][0], words, functions) == run_ast(ast[:elements][1], words, functions)) ? 1 : 0
645
+ when "not_equal"
646
+ return (run_ast(ast[:elements][0], words, functions) != run_ast(ast[:elements][1], words, functions)) ? 1 : 0
647
+ when "less_than"
648
+ return (run_ast(ast[:elements][0], words, functions) < run_ast(ast[:elements][1], words, functions)) ? 1 : 0
649
+ when "greater_than"
650
+ return (run_ast(ast[:elements][0], words, functions) > run_ast(ast[:elements][1], words, functions)) ? 1 : 0
651
+ when "less_or_eq"
652
+ return (run_ast(ast[:elements][0], words, functions) <= run_ast(ast[:elements][1], words, functions)) ? 1 : 0
653
+ when "greater_or_eq"
654
+ return (run_ast(ast[:elements][0], words, functions) >= run_ast(ast[:elements][1], words, functions)) ? 1 : 0
655
+ end
656
+
657
+
658
+ end
659
+
660
+
661
+ end # Kompiler::Parsers::SymAST
662
+
663
+ end # Kompiler::Parsers
664
+
665
+ end # Kompiler