owl-basic 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. owl_basic/__init__.py +3 -0
  2. owl_basic/algorithms.py +29 -0
  3. owl_basic/ast_utils.py +204 -0
  4. owl_basic/basic_visitor.py +55 -0
  5. owl_basic/cfg_vertex.py +65 -0
  6. owl_basic/codegen/__init__.py +0 -0
  7. owl_basic/codegen/clr/__init__.py +0 -0
  8. owl_basic/codegen/clr/cil_visitor.py +1296 -0
  9. owl_basic/codegen/clr/cts.py +56 -0
  10. owl_basic/codegen/clr/emitters.py +94 -0
  11. owl_basic/codegen/clr/generate.py +539 -0
  12. owl_basic/correlation_visitor.py +119 -0
  13. owl_basic/data_visitor.py +62 -0
  14. owl_basic/decoder.py +339 -0
  15. owl_basic/errors.py +22 -0
  16. owl_basic/flow/__init__.py +17 -0
  17. owl_basic/flow/basic_block.py +34 -0
  18. owl_basic/flow/basic_block_identifier.py +66 -0
  19. owl_basic/flow/basic_block_orderer.py +29 -0
  20. owl_basic/flow/connectors.py +19 -0
  21. owl_basic/flow/convert_sub_visitor.py +28 -0
  22. owl_basic/flow/entry_point_locator.py +55 -0
  23. owl_basic/flow/entry_point_visitor.py +48 -0
  24. owl_basic/flow/flow_analysis.py +56 -0
  25. owl_basic/flow/flow_graph_creator.py +14 -0
  26. owl_basic/flow/flowgraph_visitor.py +178 -0
  27. owl_basic/flow/longjump_converter.py +20 -0
  28. owl_basic/flow/longjump_visitor.py +53 -0
  29. owl_basic/flow/subroutine_converter.py +38 -0
  30. owl_basic/flow/traversal.py +110 -0
  31. owl_basic/gml_visitor.py +151 -0
  32. owl_basic/line_mapper.py +43 -0
  33. owl_basic/line_number_visitor.py +65 -0
  34. owl_basic/main.py +381 -0
  35. owl_basic/node.py +21 -0
  36. owl_basic/options.py +22 -0
  37. owl_basic/owltyping/__init__.py +1 -0
  38. owl_basic/owltyping/function_type_inferer.py +50 -0
  39. owl_basic/owltyping/hindley_milner.py +524 -0
  40. owl_basic/owltyping/set_function_type_visitor.py +25 -0
  41. owl_basic/owltyping/type_system.py +220 -0
  42. owl_basic/owltyping/typecheck.py +60 -0
  43. owl_basic/owltyping/typecheck_visitor.py +471 -0
  44. owl_basic/parent_visitor.py +37 -0
  45. owl_basic/process.py +36 -0
  46. owl_basic/separation_visitor.py +98 -0
  47. owl_basic/sigil.py +30 -0
  48. owl_basic/simplify_visitor.py +204 -0
  49. owl_basic/singleton.py +127 -0
  50. owl_basic/source_debugging.py +124 -0
  51. owl_basic/symbol_table_visitor.py +220 -0
  52. owl_basic/symbol_tables.py +195 -0
  53. owl_basic/syntax/__init__.py +0 -0
  54. owl_basic/syntax/ast.py +1081 -0
  55. owl_basic/syntax/ast_meta.py +228 -0
  56. owl_basic/syntax/grammar.py +1972 -0
  57. owl_basic/syntax/lexer.py +943 -0
  58. owl_basic/syntax/parser.py +77 -0
  59. owl_basic/utility.py +26 -0
  60. owl_basic/visitor.py +43 -0
  61. owl_basic/xml_blocks.py +137 -0
  62. owl_basic/xml_visitor.py +101 -0
  63. owl_basic-0.6.0.dist-info/METADATA +37 -0
  64. owl_basic-0.6.0.dist-info/RECORD +69 -0
  65. owl_basic-0.6.0.dist-info/WHEEL +5 -0
  66. owl_basic-0.6.0.dist-info/entry_points.txt +2 -0
  67. owl_basic-0.6.0.dist-info/licenses/LICENSE +21 -0
  68. owl_basic-0.6.0.dist-info/licenses/THIRD-PARTY-NOTICES.md +57 -0
  69. owl_basic-0.6.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,943 @@
1
+ # BBC BASIC Lexer
2
+
3
+ import re
4
+
5
+ from owl_basic.errors import error
6
+
7
+ tokens = (
8
+ 'EOL',
9
+ 'ARRAYID_LPAREN',
10
+ 'PROC_ID',
11
+ 'FN_ID',
12
+ 'ID',
13
+ 'LITERAL_STRING',
14
+ 'LITERAL_FLOAT',
15
+ 'LITERAL_INTEGER',
16
+ 'QUERY',
17
+ 'PLING',
18
+ 'PIPE',
19
+ 'HASH',
20
+ 'DOLLAR',
21
+ 'APOSTROPHE',
22
+ 'COLON',
23
+ 'COMMA',
24
+ 'SEMICOLON',
25
+ 'PLUS',
26
+ 'MINUS',
27
+ 'TIMES',
28
+ 'DIVIDE',
29
+ 'EQ',
30
+ 'NE',
31
+ 'LTE',
32
+ 'GTE',
33
+ 'LT',
34
+ 'GT',
35
+ 'PLUS_ASSIGN',
36
+ 'MINUS_ASSIGN',
37
+ 'TIMES_ASSIGN',
38
+ 'DIVIDE_ASSIGN',
39
+ 'AND_ASSIGN',
40
+ 'DIV_ASSIGN',
41
+ 'EOR_ASSIGN',
42
+ 'MOD_ASSIGN',
43
+ 'OR_ASSIGN',
44
+ 'SHIFT_LEFT',
45
+ 'SHIFT_RIGHT',
46
+ 'SHIFT_RIGHT_UNSIGNED',
47
+ 'AMPERSAND',
48
+ 'LPAREN',
49
+ 'RPAREN',
50
+ 'LBRAC',
51
+ 'RBRAC',
52
+ 'CARET',
53
+ 'TILDE',
54
+ 'DOT',
55
+ 'AND',
56
+ 'DIV',
57
+ 'EOR',
58
+ 'MOD',
59
+ 'OR',
60
+ 'ERROR',
61
+ 'LINE',
62
+ 'OFF',
63
+ 'STEP',
64
+ 'STEREO',
65
+ 'SPC',
66
+ 'TAB_LPAREN',
67
+ 'ELSE',
68
+ 'THEN',
69
+ 'OPENIN',
70
+ 'ABS',
71
+ 'ACS',
72
+ 'ADVAL',
73
+ 'ASC',
74
+ 'ASN',
75
+ 'ATN',
76
+ 'BGET',
77
+ 'BEAT',
78
+ 'BEATS',
79
+ 'COS',
80
+ 'COUNT',
81
+ 'DEG',
82
+ 'ERL',
83
+ 'ERR',
84
+ 'EVAL',
85
+ 'EXP',
86
+ 'EXT',
87
+ 'FALSE',
88
+ 'GET',
89
+ 'INKEY',
90
+ 'INSTR_LPAREN',
91
+ 'INT',
92
+ 'LEN',
93
+ 'LN',
94
+ 'LOG',
95
+ 'OPENUP',
96
+ 'OPENOUT',
97
+ 'PI',
98
+ 'POINT',
99
+ 'POINT_LPAREN',
100
+ 'POS',
101
+ 'RAD',
102
+ 'RND',
103
+ 'RND_LPAREN',
104
+ 'SGN',
105
+ 'SIN',
106
+ 'SQR',
107
+ 'TAN',
108
+ 'TO',
109
+ 'TOP',
110
+ 'TRUE',
111
+ 'USR',
112
+ 'VAL',
113
+ 'VPOS',
114
+ 'CHR_STR',
115
+ 'GET_STR',
116
+ 'INKEY_STR',
117
+ 'LEFT_STR_LPAREN',
118
+ 'MID_STR_LPAREN',
119
+ 'RIGHT_STR_LPAREN',
120
+ 'STR_STR',
121
+ 'STRING_STR_LPAREN',
122
+ 'EOF',
123
+ 'SUM',
124
+ 'SUMLEN',
125
+ 'WHILE',
126
+ 'CASE',
127
+ 'WHEN',
128
+ 'OF',
129
+ 'ENDCASE',
130
+ 'OTHERWISE',
131
+ 'ENDIF',
132
+ 'ENDWHILE',
133
+ 'PTR',
134
+ 'PAGE',
135
+ 'TIME',
136
+ 'TIME_STR',
137
+ 'LOMEM',
138
+ 'HIMEM',
139
+ 'SOUND',
140
+ 'BPUT',
141
+ 'CALL',
142
+ 'CHAIN',
143
+ 'CLEAR',
144
+ 'CLOSE',
145
+ 'CLG',
146
+ 'CLS',
147
+ 'DATA',
148
+ 'DEF',
149
+ 'DIM',
150
+ 'DIM_LPAREN',
151
+ 'DRAW',
152
+ 'END',
153
+ 'ENDPROC',
154
+ 'ENVELOPE',
155
+ 'FOR',
156
+ 'GOSUB',
157
+ 'GOTO',
158
+ 'GCOL',
159
+ 'IF',
160
+ 'INPUT',
161
+ 'LET',
162
+ 'LIBRARY',
163
+ 'LOCAL',
164
+ 'MODE',
165
+ 'MOVE',
166
+ 'NEXT',
167
+ 'ON',
168
+ 'VDU',
169
+ 'VOICES',
170
+ 'PLOT',
171
+ 'PRINT',
172
+ 'READ',
173
+ 'REPEAT',
174
+ 'REPORT',
175
+ 'REPORT_STR',
176
+ 'RESTORE',
177
+ 'RETURN',
178
+ 'RUN',
179
+ 'STOP',
180
+ 'TEMPO',
181
+ 'COLOUR',
182
+ 'TRACE',
183
+ 'UNTIL',
184
+ 'WIDTH',
185
+ 'OSCLI',
186
+ 'CIRCLE',
187
+ 'ELLIPSE',
188
+ 'FILL',
189
+ 'MOUSE',
190
+ 'ORIGIN',
191
+ 'QUIT',
192
+ 'RECTANGLE',
193
+ 'SWAP',
194
+ 'SYS',
195
+ 'TINT',
196
+ 'WAIT',
197
+ 'INSTALL',
198
+ 'PRIVATE',
199
+ 'BY',
200
+ 'EXIT',
201
+ 'NOT',
202
+ 'MANDEL',
203
+ 'COMMENT',
204
+ 'STAR_FX',
205
+ 'STAR_CAT'
206
+ )
207
+
208
+ def t_COMMENT(t):
209
+ r'REM[^\n]*'
210
+ # Note: REM captures everything until the
211
+ # end of the line. We need to capture
212
+ # REMs because its is possible to RESTORE
213
+ # to a REMed line and use the DATA within it
214
+ m = re.match(r'REM([^\n]*)', t.value)
215
+ t.value = m.group(1)
216
+ return t
217
+
218
+ def t_STAR_FX(t):
219
+ r'\*+\s*FX[^\n]*'
220
+ return t
221
+
222
+ def t_STAR_CAT(t):
223
+ r'\*+\s*CAT[^\n]*'
224
+ return t
225
+
226
+ # Define a rule so we can split lines with a trailing backslash and leading backslash
227
+ def t_CONTINUATION(t):
228
+ r'\\[ \t]*[\r\n][ \t]*\\'
229
+ t.lexer.lineno += 1
230
+ pass
231
+
232
+ # Define a rule so we can track line numbers
233
+ def t_EOL(t):
234
+ r'[\r\n]+'
235
+ t.lexer.lineno += len(t.value) # possible error with line number on different platforms
236
+ #print "t.lexer.lineno = %s" % t.lexer.lineno
237
+ return t
238
+
239
+ # In BBC BASIC identifiers cannot begin with prefixes, so we go contrary
240
+ # to the advice in the PLY manual, since we want PRINTED to be lexed as
241
+ # PRINT ED
242
+
243
+ # BBC Basic keywords
244
+
245
+ # The order of these tokens is significant, since some keywords such as
246
+ # TO form the first part of TOP. Also, most keywords are disallowed at the
247
+ # start of variable names, however, some keywords are allowed at the start
248
+ # of an identifier name. These are listed after the t_ID token. Finally, some
249
+ # keywords incorporate the left parenthesis as part of the keyword, for statements
250
+ # where no space is permitted between the keyword and the parenthesis.
251
+
252
+ # Nine letter keywords
253
+
254
+ def t_OTHERWISE(t):
255
+ r'OTHERWISE'
256
+ return t
257
+
258
+ def t_RECTANGLE(t):
259
+ r'RECTANGLE'
260
+ return t
261
+
262
+ # Eight letter keywords
263
+
264
+ def t_ENVELOPE(t):
265
+ r'ENVELOPE'
266
+ return t
267
+
268
+ def t_ENDWHILE(t):
269
+ r'ENDWHILE'
270
+ return t
271
+
272
+ # Seven letter keywords
273
+
274
+ def t_ELLIPSE(t):
275
+ r'ELLIPSE'
276
+ return t
277
+
278
+ def t_INSTALL(t):
279
+ r'INSTALL'
280
+ return t
281
+
282
+ def t_OPENOUT(t):
283
+ r'OPENOUT'
284
+ return t
285
+
286
+ def t_PRIVATE(t):
287
+ r'PRIVATE'
288
+ return t
289
+
290
+ def t_REPORT_STR(t):
291
+ r'REPORT\$'
292
+ return t
293
+
294
+ def t_RESTORE(t):
295
+ r'RESTORE'
296
+ return t
297
+
298
+ def t_RIGHT_STR_LPAREN(t):
299
+ r'RIGHT\$\('
300
+ return t
301
+
302
+ def t_STRING_STR_LPAREN(t):
303
+ r'STRING\$\('
304
+ return t
305
+
306
+ def t_LIBRARY(t):
307
+ r'LIBRARY'
308
+ return t
309
+
310
+ # Six letter keywords
311
+
312
+ def t_CIRCLE(t):
313
+ r'CIRCLE'
314
+ return t
315
+
316
+ def t_COLOUR(t):
317
+ r'COLOU?R' #major error here. cannot return a string.
318
+ t.type = 'COLOUR'
319
+ return t # have removed the return of 'COLOUR' due to tokenising failure
320
+
321
+ def t_INKEY_STR(t):
322
+ r'INKEY\$'
323
+ return t
324
+
325
+ def t_INSTR_LPAREN(t):
326
+ r'INSTR\('
327
+ return t
328
+
329
+ def t_LEFT_STR_LPAREN(t):
330
+ r'LEFT\$\('
331
+ return t
332
+
333
+ # Existence in ARM BASIC documented at
334
+ # http://www.g7jjf.com/acornArm.htm
335
+ def t_MANDEL(t):
336
+ r'MANDEL'
337
+ return t
338
+
339
+ def t_OPENIN(t):
340
+ r'OPENIN'
341
+ return t
342
+
343
+ def t_OPENUP(t):
344
+ r'OPENUP'
345
+ return t
346
+
347
+ def t_ORIGIN(t):
348
+ r'ORIGIN'
349
+ return t
350
+
351
+ def t_POINT_LPAREN(t):
352
+ r'POINT\('
353
+ return t
354
+
355
+ def t_REPEAT(t):
356
+ r'REPEAT'
357
+ return t
358
+
359
+ def t_RETURN(t):
360
+ r'RETURN'
361
+ return t
362
+
363
+ def t_SUMLEN(t):
364
+ r'SUMLEN'
365
+ return t
366
+
367
+ def t_STEREO(t):
368
+ r'STEREO'
369
+ return t
370
+
371
+ def t_VOICES(t):
372
+ r'VOICES'
373
+ return t
374
+
375
+ # Five letter keywords
376
+
377
+ def t_ADVAL(t):
378
+ r'ADVAL'
379
+ return t
380
+
381
+ def t_BEATS(t):
382
+ r'BEATS'
383
+ return t
384
+
385
+ def t_CHAIN(t):
386
+ r'CHAIN'
387
+ return t
388
+
389
+ def t_ERROR(t):
390
+ r'ERROR'
391
+ return t
392
+
393
+ def t_GOSUB(t):
394
+ r'GOSUB'
395
+ return t
396
+
397
+ def t_HIMEM(t):
398
+ r'HIMEM'
399
+ return t
400
+
401
+ def t_INKEY(t):
402
+ r'INKEY'
403
+ return t
404
+
405
+ def t_INPUT(t):
406
+ r'INPUT'
407
+ return t
408
+
409
+ def t_LOCAL(t):
410
+ r'LOCAL'
411
+ return t
412
+
413
+ def t_LOMEM(t):
414
+ r'LOMEM'
415
+ return t
416
+
417
+ def t_MID_STR_LPAREN(t):
418
+ r'MID\$\('
419
+ return t
420
+
421
+ def t_MOUSE(t):
422
+ r'MOUSE'
423
+ return t
424
+
425
+ def t_OSCLI(t):
426
+ r'OSCLI'
427
+ return t
428
+
429
+ def t_POINT(t):
430
+ r'POINT'
431
+ return t
432
+
433
+ def t_PRINT(t):
434
+ r'PRINT'
435
+ return t
436
+
437
+ def t_SOUND(t):
438
+ r'SOUND'
439
+ return t
440
+
441
+ def t_TRACE(t):
442
+ r'TRACE'
443
+ return t
444
+
445
+ def t_TEMPO(t):
446
+ r'TEMPO'
447
+ return t
448
+
449
+ def t_TIME_STR(t):
450
+ r'TIME\$'
451
+ return t
452
+
453
+ def t_UNTIL(t):
454
+ r'UNTIL'
455
+ return t
456
+
457
+ def t_WHILE(t):
458
+ r'WHILE'
459
+ return t
460
+
461
+ def t_WIDTH(t):
462
+ r'WIDTH'
463
+ return t
464
+
465
+ # Four letter keywords
466
+
467
+ def t_BEAT(t):
468
+ r'BEAT'
469
+ return t
470
+
471
+ def t_CASE(t):
472
+ r'CASE'
473
+ return t
474
+
475
+ def t_CHR_STR(t):
476
+ r'CHR\$'
477
+ return t
478
+
479
+ def t_DATA(t):
480
+ r'DATA[^\n]*'
481
+ # Note: Data captures everything until the
482
+ # end of the line. The data items are
483
+ # not tokenized at this point.
484
+ m = re.match(r'DATA([^\n]+)', t.value)
485
+ if m is None:
486
+ fatalError("No DATA in %s" % t.value)
487
+ t.value = m.group(1)
488
+ return t
489
+
490
+ def t_DIM_LPAREN(t):
491
+ r'DIM\('
492
+ return t
493
+
494
+ def t_DRAW(t):
495
+ r'DRAW'
496
+ return t
497
+
498
+ def t_ELSE(t):
499
+ r'ELSE'
500
+ return t
501
+
502
+ def t_EVAL(t):
503
+ r'EVAL'
504
+ return t
505
+
506
+ def t_FILL(t):
507
+ r'FILL'
508
+ return t
509
+
510
+ def t_GCOL(t):
511
+ r'GCOL'
512
+ return t
513
+
514
+ def t_GET_STR(t):
515
+ r'GET\$'
516
+ return t
517
+
518
+ def t_GOTO(t):
519
+ r'GOTO'
520
+ return t
521
+
522
+ def t_LINE(t):
523
+ r'LINE'
524
+ return t
525
+
526
+ def t_MODE(t):
527
+ r'MODE'
528
+ return t
529
+
530
+ def t_MOVE(t):
531
+ r'MOVE'
532
+ return t
533
+
534
+ def t_NEXT(t):
535
+ r'NEXT'
536
+ return t
537
+
538
+ def t_PLOT(t):
539
+ r'PLOT'
540
+ return t
541
+
542
+ def t_PAGE(t):
543
+ r'PAGE'
544
+ return t
545
+
546
+ def t_QUIT(t):
547
+ r'QUIT'
548
+ return t
549
+
550
+ def t_READ(t):
551
+ r'READ'
552
+ return t
553
+
554
+ def t_RND_LPAREN(t):
555
+ r'RND\('
556
+ return t
557
+
558
+ def t_STEP(t):
559
+ r'STEP'
560
+ return t
561
+
562
+ def t_STR_STR(t):
563
+ r'STR\$'
564
+ return t
565
+
566
+ def t_SWAP(t):
567
+ r'SWAP'
568
+ return t
569
+
570
+ def t_TAB_LPAREN(t):
571
+ r'TAB\('
572
+ return t
573
+
574
+ def t_THEN(t):
575
+ r'THEN'
576
+ return t
577
+
578
+ def t_TIME(t):
579
+ r'TIME'
580
+ return t
581
+
582
+ def t_TINT(t):
583
+ r'TINT'
584
+ return t
585
+
586
+ def t_VPOS(t):
587
+ r'VPOS'
588
+ return t
589
+
590
+ def t_WAIT(t):
591
+ r'WAIT'
592
+ return t
593
+
594
+ def t_WHEN(t):
595
+ r'WHEN'
596
+ return t
597
+
598
+ # Three letter keywords
599
+
600
+ def t_ABS(t):
601
+ r'ABS'
602
+ return t
603
+
604
+ def t_ACS(t):
605
+ r'ACS'
606
+ return t
607
+
608
+ def t_AND(t):
609
+ r'AND'
610
+ return t
611
+
612
+ def t_ASC(t):
613
+ r'ASC'
614
+ return t
615
+
616
+ def t_ASN(t):
617
+ r'ASN'
618
+ return t
619
+
620
+ def t_ATN(t):
621
+ r'ATN'
622
+ return t
623
+
624
+ def t_CALL(t):
625
+ r'CALL'
626
+ return t
627
+
628
+ def t_COS(t):
629
+ r'COS'
630
+ return t
631
+
632
+ def t_DEF(t):
633
+ r'DEF'
634
+ return t
635
+
636
+ def t_DEG(t):
637
+ r'DEG'
638
+ return t
639
+
640
+ def t_DIM(t):
641
+ r'DIM'
642
+ return t
643
+
644
+ def t_DIV(t):
645
+ r'DIV'
646
+ return t
647
+
648
+ def t_EOR(t):
649
+ r'EOR'
650
+ return t
651
+
652
+ def t_ERL(t):
653
+ r'ERL'
654
+ return t
655
+
656
+ def t_ERR(t):
657
+ r'ERR'
658
+ return t
659
+
660
+ def t_EXP(t):
661
+ r'EXP'
662
+ return t
663
+
664
+ def t_FOR(t):
665
+ r'FOR'
666
+ return t
667
+
668
+ def t_GET(t):
669
+ r'GET'
670
+ return t
671
+
672
+ def t_INT(t):
673
+ r'INT'
674
+ return t
675
+
676
+ def t_LEN(t):
677
+ r'LEN'
678
+ return t
679
+
680
+ def t_LET(t):
681
+ r'LET'
682
+ return t
683
+
684
+ def t_LOG(t):
685
+ r'LOG'
686
+ return t
687
+
688
+ def t_MOD(t):
689
+ r'MOD'
690
+ return t
691
+
692
+ def t_NOT(t):
693
+ r'NOT'
694
+ return t
695
+
696
+ def t_OFF(t):
697
+ r'OFF'
698
+ return t
699
+
700
+ def t_PTR(t):
701
+ r'PTR'
702
+ return t
703
+
704
+ def t_RAD(t):
705
+ r'RAD'
706
+ return t
707
+
708
+ def t_RND(t):
709
+ r'RND'
710
+ return t
711
+
712
+ def t_SGN(t):
713
+ r'SGN'
714
+ return t
715
+
716
+ def t_SIN(t):
717
+ r'SIN'
718
+ return t
719
+
720
+ def t_SPC(t):
721
+ r'SPC'
722
+ return t
723
+
724
+ def t_SQR(t):
725
+ r'SQR'
726
+ return t
727
+
728
+ def t_SUM(t):
729
+ r'SUM'
730
+ return t
731
+
732
+ def t_SYS(t):
733
+ r'SYS'
734
+ return t
735
+
736
+ def t_TAN(t):
737
+ r'TAN'
738
+ return t
739
+
740
+ def t_TOP(t):
741
+ r'TOP'
742
+ return t
743
+
744
+ def t_USR(t):
745
+ r'USR'
746
+ return t
747
+
748
+ def t_VAL(t):
749
+ r'VAL'
750
+ return t
751
+
752
+ def t_VDU(t):
753
+ r'VDU'
754
+ return t
755
+
756
+ # Two letter keywords
757
+
758
+ def t_IF(t):
759
+ r'IF'
760
+ return t
761
+
762
+ def t_LN(t):
763
+ r'LN'
764
+ return t
765
+
766
+ def t_ON(t):
767
+ r'ON'
768
+ return t
769
+
770
+ def t_OR(t):
771
+ r'OR'
772
+ return t
773
+
774
+ def t_TO(t):
775
+ r'TO'
776
+ return t
777
+
778
+
779
+ # Keywords before this point are disallowed at the start
780
+ # of variable names
781
+
782
+ # Now we list reserved identifiers. These cannot be used as
783
+ # identifiers, but that can feature at the start of identifiers
784
+
785
+ reserved = {
786
+ 'ENDWHILE' : 'ENDWHILE',
787
+ 'ENDCASE' : 'ENDCASE',
788
+ 'ENDPROC' : 'ENDPROC',
789
+ 'REPORT' : 'REPORT',
790
+ 'RETURN' : 'RETURN',
791
+ 'CLEAR' : 'CLEAR',
792
+ 'CLOSE' : 'CLOSE',
793
+ 'COUNT' : 'COUNT',
794
+ 'ENDIF' : 'ENDIF',
795
+ 'FALSE' : 'FALSE',
796
+ 'HIMEM' : 'HIMEM',
797
+ 'LOMEM' : 'LOMEM',
798
+ 'BGET' : 'BGET',
799
+ 'BPUT' : 'BPUT',
800
+ 'EXIT' : 'EXIT',
801
+ 'PAGE' : 'PAGE',
802
+ 'QUIT' : 'QUIT',
803
+ 'STOP' : 'STOP',
804
+ 'TIME' : 'TIME',
805
+ 'TRUE' : 'TRUE',
806
+ 'VPOS' : 'VPOS',
807
+ 'WAIT' : 'WAIT',
808
+ 'CLG' : 'CLG',
809
+ 'CLS' : 'CLS',
810
+ 'END' : 'END',
811
+ 'EOF' : 'EOF',
812
+ 'ERL' : 'ERL',
813
+ 'ERR' : 'ERR',
814
+ 'EXT' : 'EXT',
815
+ 'OFF' : 'OFF',
816
+ 'POS' : 'POS',
817
+ 'PTR' : 'PTR',
818
+ 'RND' : 'RND',
819
+ 'RUN' : 'RUN',
820
+ 'BY' : 'BY',
821
+ 'OF' : 'OF',
822
+ 'PI' : 'PI'
823
+ }
824
+
825
+ # Identifiers
826
+
827
+ def t_PROC_ID(t):
828
+ r'PROC[a-zA-Z_0-9`@]+'
829
+ t.value = t.value
830
+ return t
831
+
832
+ def t_FN_ID(t):
833
+ r'FN[a-zA-Z_0-9`@]+'
834
+ t.value = t.value
835
+ return t
836
+
837
+ def t_ARRAYID_LPAREN(t):
838
+ r'[a-zA-Z_`][a-zA-Z_0-9`]*[$%&~]?\('
839
+ t.type = reserved.get(t.value, 'ARRAYID_LPAREN')
840
+ return t
841
+
842
+ # TODO: Cannot use @ symbol at the beginning of
843
+ # any variable name. @% is a special variable
844
+ def t_ID(t):
845
+ r'([@a-zA-Z_`][a-zA-Z_0-9`]*[$%&~]?)'
846
+ # TODO: Hash doesn't seem to work in here.
847
+ # Ampersand (byte) and hash (64-bit numeric ?int) suffixes only apply to BBC BASIC for Windows
848
+ # Tilde suffix only applies to OWL BASIC - object reference
849
+ t.type = reserved.get(t.value, 'ID') # Check for reserved identifiers
850
+ return t
851
+
852
+ # Operators
853
+ t_QUERY = r'\?'
854
+ t_PLING = r'\!'
855
+ t_PIPE = r'\|'
856
+ t_HASH = r'\#'
857
+ t_DOLLAR = r'\$'
858
+ t_APOSTROPHE = r"'"
859
+ t_COLON = r':'
860
+ t_COMMA = r','
861
+ t_SEMICOLON = r';'
862
+ t_PLUS = r'\+'
863
+ t_MINUS = r'-'
864
+ t_TIMES = r'\*'
865
+ t_DIVIDE = r'/'
866
+ t_EQ = r'='
867
+ t_NE = r'<>'
868
+ t_LTE = r'<='
869
+ t_GTE = r'>='
870
+ t_LT = r'<'
871
+ t_GT = r'>'
872
+ t_PLUS_ASSIGN = r'\+='
873
+ t_MINUS_ASSIGN = r'-='
874
+ t_TIMES_ASSIGN = r'\*='
875
+ t_DIVIDE_ASSIGN = r'/='
876
+ t_AND_ASSIGN = r'AND='
877
+ t_DIV_ASSIGN = r'DIV='
878
+ t_EOR_ASSIGN = r'EOR='
879
+ t_MOD_ASSIGN = r'MOD='
880
+ t_OR_ASSIGN = r'OR='
881
+ t_SHIFT_LEFT = r'<<'
882
+ t_SHIFT_RIGHT = r'>>'
883
+ t_SHIFT_RIGHT_UNSIGNED = r'>>>'
884
+ t_AMPERSAND = r'&'
885
+ t_LPAREN = r'\('
886
+ t_RPAREN = r'\)'
887
+ t_LBRAC = r'\['
888
+ t_RBRAC = r'\]'
889
+ t_CARET = r'\^'
890
+ t_TILDE = r'~'
891
+ t_DOT = r'\.'
892
+
893
+ t_ignore = ' \t'
894
+
895
+ def t_LITERAL_STRING(t):
896
+ r'"((?:[^"]+|"")*)"(?!")'
897
+ t.value = t.value[1:-1].replace('""', '"')
898
+ return t
899
+
900
+ def t_LITERAL_FLOAT(t):
901
+ r'\d*\.\d+(E([+-]?\d+))?'
902
+ try:
903
+ t.value = float(t.value)
904
+ except ValueError:
905
+ print("Number %s is too large!" % t.value)
906
+ return t
907
+
908
+ def t_LITERAL_INTEGER(t):
909
+ r'\d+'
910
+ try:
911
+ t.value = int(t.value)
912
+ except ValueError:
913
+ print("Number %s is too large!" % t.value)
914
+ t.value = 0
915
+ return t
916
+
917
+ def t_LITERAL_HEX_INTEGER(t):
918
+ r'&[\dA-F]+'
919
+ try:
920
+ t.value = int(t.value[1:], 16)
921
+ t.type = 'LITERAL_INTEGER'
922
+ except ValueError:
923
+ print("Number %s is too large!" % t.value)
924
+ t.value = 0
925
+ return t
926
+
927
+ def t_LITERAL_BINARY_INTEGER(t):
928
+ r'%[01]+'
929
+ try:
930
+ t.value = int(t.value[1:], 2)
931
+ t.type = 'LITERAL_INTEGER'
932
+ except ValueError:
933
+ print("Number %s is too large!" % t.value)
934
+ t.value = 0
935
+ return t
936
+
937
+ # Error handling rule
938
+ def t_error(t):
939
+ print("Illegal character '%s'" % t.value[0])
940
+ t.lexer.skip(1)
941
+
942
+
943
+