owl-basic 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. owl_basic/__init__.py +3 -0
  2. owl_basic/algorithms.py +29 -0
  3. owl_basic/ast_utils.py +204 -0
  4. owl_basic/basic_visitor.py +55 -0
  5. owl_basic/cfg_vertex.py +65 -0
  6. owl_basic/codegen/__init__.py +0 -0
  7. owl_basic/codegen/clr/__init__.py +0 -0
  8. owl_basic/codegen/clr/cil_visitor.py +1296 -0
  9. owl_basic/codegen/clr/cts.py +56 -0
  10. owl_basic/codegen/clr/emitters.py +94 -0
  11. owl_basic/codegen/clr/generate.py +539 -0
  12. owl_basic/correlation_visitor.py +119 -0
  13. owl_basic/data_visitor.py +62 -0
  14. owl_basic/decoder.py +339 -0
  15. owl_basic/errors.py +22 -0
  16. owl_basic/flow/__init__.py +17 -0
  17. owl_basic/flow/basic_block.py +34 -0
  18. owl_basic/flow/basic_block_identifier.py +66 -0
  19. owl_basic/flow/basic_block_orderer.py +29 -0
  20. owl_basic/flow/connectors.py +19 -0
  21. owl_basic/flow/convert_sub_visitor.py +28 -0
  22. owl_basic/flow/entry_point_locator.py +55 -0
  23. owl_basic/flow/entry_point_visitor.py +48 -0
  24. owl_basic/flow/flow_analysis.py +56 -0
  25. owl_basic/flow/flow_graph_creator.py +14 -0
  26. owl_basic/flow/flowgraph_visitor.py +178 -0
  27. owl_basic/flow/longjump_converter.py +20 -0
  28. owl_basic/flow/longjump_visitor.py +53 -0
  29. owl_basic/flow/subroutine_converter.py +38 -0
  30. owl_basic/flow/traversal.py +110 -0
  31. owl_basic/gml_visitor.py +151 -0
  32. owl_basic/line_mapper.py +43 -0
  33. owl_basic/line_number_visitor.py +65 -0
  34. owl_basic/main.py +381 -0
  35. owl_basic/node.py +21 -0
  36. owl_basic/options.py +22 -0
  37. owl_basic/owltyping/__init__.py +1 -0
  38. owl_basic/owltyping/function_type_inferer.py +50 -0
  39. owl_basic/owltyping/hindley_milner.py +524 -0
  40. owl_basic/owltyping/set_function_type_visitor.py +25 -0
  41. owl_basic/owltyping/type_system.py +220 -0
  42. owl_basic/owltyping/typecheck.py +60 -0
  43. owl_basic/owltyping/typecheck_visitor.py +471 -0
  44. owl_basic/parent_visitor.py +37 -0
  45. owl_basic/process.py +36 -0
  46. owl_basic/separation_visitor.py +98 -0
  47. owl_basic/sigil.py +30 -0
  48. owl_basic/simplify_visitor.py +204 -0
  49. owl_basic/singleton.py +127 -0
  50. owl_basic/source_debugging.py +124 -0
  51. owl_basic/symbol_table_visitor.py +220 -0
  52. owl_basic/symbol_tables.py +195 -0
  53. owl_basic/syntax/__init__.py +0 -0
  54. owl_basic/syntax/ast.py +1081 -0
  55. owl_basic/syntax/ast_meta.py +228 -0
  56. owl_basic/syntax/grammar.py +1972 -0
  57. owl_basic/syntax/lexer.py +943 -0
  58. owl_basic/syntax/parser.py +77 -0
  59. owl_basic/utility.py +26 -0
  60. owl_basic/visitor.py +43 -0
  61. owl_basic/xml_blocks.py +137 -0
  62. owl_basic/xml_visitor.py +101 -0
  63. owl_basic-0.6.0.dist-info/METADATA +37 -0
  64. owl_basic-0.6.0.dist-info/RECORD +69 -0
  65. owl_basic-0.6.0.dist-info/WHEEL +5 -0
  66. owl_basic-0.6.0.dist-info/entry_points.txt +2 -0
  67. owl_basic-0.6.0.dist-info/licenses/LICENSE +21 -0
  68. owl_basic-0.6.0.dist-info/licenses/THIRD-PARTY-NOTICES.md +57 -0
  69. owl_basic-0.6.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,119 @@
1
+ import logging
2
+ from owl_basic import errors
3
+ from collections import deque
4
+ from owl_basic.syntax.ast import Repeat, While, ForToStep
5
+ from owl_basic.flow.connectors import connectLoop
6
+ from owl_basic.visitor import Visitor
7
+
8
+ class CorrelationVisitor(Visitor):
9
+ """
10
+ This visitor performs abstract execution of the control-flow-graph in order
11
+ to correlate the opening and closing statements or FOR..NEXT, REPEAT..UNTIL
12
+ and WHILE..ENDWHILE loops.
13
+
14
+ CFG nodes where execution branches are annotated with the current stack of
15
+ loop structures, and a depth first search is performed through the CFG.
16
+ If the stack is non-empty when a terminal node (no out-edges) is encountered
17
+ an error is reported. If loops are incorrectly nested an error is reported.
18
+ If loops are correctly nested, back-edges are inserted into the CFG.
19
+ """
20
+
21
+ def __init__(self):
22
+ self.to_visit = deque()
23
+ self.visited = set()
24
+ self.loops = [] # A stack for tracking the current abstract execution state
25
+
26
+ def start(self, entry_point):
27
+ """
28
+ The entry-point from which loop correlation should start
29
+ """
30
+ self.depthFirstSearch(entry_point)
31
+ for v in self.visited:
32
+ if hasattr(v, "loop_stack"):
33
+ del v.loop_stack
34
+
35
+ def depthFirstSearch(self, entry_point):
36
+ self.to_visit.append(entry_point)
37
+ while len(self.to_visit):
38
+ v = self.to_visit.pop()
39
+ # Restore the loop stack
40
+ if hasattr(v, "loop_stack"):
41
+ self.loops = v.loop_stack[:]
42
+ if v not in self.visited:
43
+ self.visited.add(v)
44
+ v.accept(self)
45
+ if len(v.outEdges) == 0 and len(self.loops) != 0:
46
+ # TODO: Improve this error message by printing an
47
+ # abstract stack trace
48
+ errors.fatalError("In loops at terminal statement at line %s" % v.lineNum)
49
+ # If execution splits, take a copy of the current loop stack
50
+ # and store a reference to it on each of the target nodes of
51
+ # the out edges of the current node, so the state can be
52
+ # restored later in the traversal
53
+ if len(v.outEdges) > 1:
54
+ loop_stack = self.loops[:]
55
+ for target in v.outEdges:
56
+ target.loop_stack = loop_stack
57
+ self.to_visit.extend(v.outEdges)
58
+
59
+ def visitAstStatement(self, statement):
60
+ """
61
+ Do nothing for most AST statements
62
+ """
63
+ pass
64
+
65
+ def visitRepeat(self, repeat_stmt):
66
+ self.loops.append(repeat_stmt)
67
+
68
+ def visitUntil(self, until_stmt):
69
+ if len(self.loops) == 0:
70
+ errors.fatalError("Not in a REPEAT loop at line %d." % until_stmt.lineNum)
71
+ peek = self.loops[-1]
72
+ if not isinstance(peek, Repeat):
73
+ errors.fatalError("Not in a REPEAT loop at line %d; currently in %s loop opened at line %d" % (until_stmt.lineNum, peek.description, peek.lineNum))
74
+ repeat_stmt = self.loops.pop()
75
+ connectLoop(until_stmt, repeat_stmt)
76
+
77
+ def visitWhile(self, while_stmt):
78
+ self.loops.append(while_stmt)
79
+
80
+ def visitEndwhile(self, endwhile_stmt):
81
+ if len(self.loops) == 0:
82
+ errors.fatalError("Not in a WHILE loop at line %d." % endwhile_stmt.lineNum)
83
+ peek = self.loops[-1]
84
+ if not isinstance(peek, While):
85
+ errors.fatalError("Not in a WHILE loop at line %d; currently in %s loop opened at line %d" % (endwhile_stmt.lineNum, peek.description, peek.lineNum))
86
+ while_stmt = self.loops.pop()
87
+ connectLoop(endwhile_stmt, while_stmt)
88
+
89
+ def visitForToStep(self, for_stmt):
90
+ self.loops.append(for_stmt)
91
+
92
+ def visitNext(self, next_stmt):
93
+ logging.debug("NEXT statement = %s", next_stmt)
94
+ #logging.debug("NEXT identifiers = %s", next_stmt.identifiers[0].identifier)
95
+ while True:
96
+ if len(self.loops) == 0:
97
+ errors.fatalError("Not in a FOR loop at line %d." % next_stmt.lineNum)
98
+ peek = self.loops[-1]
99
+ if not isinstance(peek, ForToStep):
100
+ errors.fatalError("Not in a FOR loop at line %d; currently in %s loop opened at line %d" % (next_stmt.lineNum, peek.description, peek.lineNum))
101
+
102
+ for_stmt = self.loops.pop()
103
+ # If the next_stmt has no attached identifiers, it applies to the
104
+ # top FOR statement on the stack
105
+ if len(next_stmt.identifiers) == 0:
106
+ next_stmt.identifiers.append(for_stmt.identifier)
107
+ id1 = for_stmt.identifier.identifier
108
+ print(next_stmt.identifiers)
109
+ id2 = next_stmt.identifiers[0].identifier
110
+ print("self.loops = ", self.loops)
111
+ print("id1 = ", id1)
112
+ print("id2 = ", id2)
113
+ # TODO: Check that the symbols are equal, not just the names
114
+ if for_stmt.identifier.identifier == next_stmt.identifiers[0].identifier:
115
+ connectLoop(next_stmt, for_stmt)
116
+ break
117
+
118
+
119
+
@@ -0,0 +1,62 @@
1
+ import logging
2
+ import re
3
+ from owl_basic.visitor import Visitor
4
+
5
+ class DataVisitor(Visitor):
6
+ '''
7
+ Extra DATA from DATA statements and hidden DATA within REM statements.
8
+ BBC BASIC allows any line to be RESTOREd to and will attempt to READ data
9
+ from either the first DATA statement or the first COMMA. This means it
10
+ is possible to do
11
+ 10 REM,"HELLO", "WORLD"
12
+ 20 RESTORE 10
13
+ 30 READ A$
14
+ 40 PRINT A$
15
+ > RUN
16
+ HELLO
17
+
18
+ For this reason, we need to store anything following a COMMA in a REM
19
+ statement. Any DATA keyword following a REM is irrelevant since it will
20
+ not be tokenized, reading will start from the first COMMA.
21
+
22
+ It is NOT possible to READ into a REMed data block from a previous DATA
23
+ statement; the REMed line must be RESTOREd to directly
24
+ '''
25
+ def __init__(self):
26
+ self.data = []
27
+ self.index = {} # physical 0-based line number -> data[index]
28
+
29
+ def parse(self, data):
30
+ "Parse the text following a DATA statement into items"
31
+ # Break the data into fields
32
+ raw_items = re.findall(r'(?:\s*"((?:[^"]+|"")*)"(?!")\s*)|([^,]+)', data)
33
+ items = []
34
+ for i, (quoted, unquoted) in enumerate(raw_items):
35
+ if quoted:
36
+ item = quoted.replace('""', '"')
37
+ else:
38
+ item = unquoted.lstrip()
39
+ # If its the last item on the line, strip trailing space
40
+ if i == len(raw_items) - 1:
41
+ item = item.rstrip()
42
+ items.append(item)
43
+ return items
44
+
45
+ def visitAstNode(self, node):
46
+ node.forEachChild(self.visit)
47
+
48
+ def visitData(self, statement):
49
+ logging.debug("DATA statement : %s" % statement.data)
50
+ self.index[statement.lineNum] = len(self.data)
51
+ items = self.parse(statement.data)
52
+ self.data.extend(items)
53
+
54
+ def visitRem(self, statement):
55
+ logging.debug("REM statement : %s" % statement.data)
56
+ # Find the index of the first comma
57
+ comma_index = statement.data.find(',')
58
+ if comma_index != -1:
59
+ # A comma was found, so it is possible to RESTORE to this line
60
+ self.index[statement.lineNum] = len(self.data)
61
+ items = self.parse(statement.data[comma_index+1:])
62
+ self.data.extend(items)
owl_basic/decoder.py ADDED
@@ -0,0 +1,339 @@
1
+ #!/usr/bin/env python
2
+ #
3
+ # (c) 2007 Matt Godbolt.
4
+ #
5
+ # Updated 2008 Ian Smallshire.
6
+ #
7
+ # Get v0.01 @ http://xania.org/200711/bbc-basic-v-format
8
+ #
9
+ # Use however you like, as long as you put credit where credit's due.
10
+ # Some information obtained from source code from RISC OS Open.
11
+ # v0.01 - first release. Doesn't deal with GOTO line numbers. (c) 2007 Matt Godbolt
12
+ # v0.02 - edited to output line numbers where needed and fixed Ian Smallshire
13
+ # the GOTO/RESTORE/GOSUB line numbers.
14
+ # v0.03 - Added file type detection for input and provision Ian Smallshire
15
+ # for BB4W encoded tokens
16
+ # v0.04 - Now decodes BB4W tokens as well as Acorn. Ian Smallshire
17
+ # v0.05 - Corrected tokens inside strings. No longer detokenized Rob & Ian Smallshire
18
+ # v0.06 - Fixed line number decoding with line numbers over 32767 Ian Smallshire
19
+
20
+ #line numbers for bb4w still need testing properly.
21
+ #if input file is plane text it must be terminated by an EOL
22
+ import struct, re, getopt, sys
23
+
24
+ # The list of BBC BASIC V tokens:
25
+ # Base tokens, starting at 0x7f
26
+
27
+ class Decoder(object):
28
+
29
+ def __init__(self, data):
30
+ self.data = data
31
+ self.lines = []
32
+
33
+ #data = property(lambda self: self.__data)
34
+
35
+ class PlainTextDecoder(Decoder):
36
+
37
+ def __init__(self, data):
38
+ super(PlainTextDecoder, self).__init__(data)
39
+
40
+ def decode(self):
41
+ split_lines = self.data.split(self.lineEnd)
42
+
43
+ # Remove any trailing empty line
44
+ if len(split_lines[-1]) == 0:
45
+ split_lines = split_lines[:-1]
46
+
47
+ has_line_numbers = None # Tri-state None, True or False
48
+ logical_line_number = 10
49
+
50
+ for line in split_lines:
51
+ m = re.match(r'\s*(\d+)?\s?(.*)', line) # TODO: Factor this regex out of here and decoder
52
+ line_number, line_body = m.group(1), m.group(2)
53
+ current_line_has_number = line_number is not None
54
+
55
+ if has_line_numbers is None:
56
+ has_line_numbers = current_line_has_number
57
+ else:
58
+ if current_line_has_number != has_line_numbers:
59
+ raise Exception("Inconsistent line numbering")
60
+
61
+ # Fake line numbers if they are missing
62
+ if has_line_numbers == False:
63
+ line_number = logical_line_number
64
+
65
+ logical_line_number += 10
66
+
67
+ self.lines.append((line_number, line_body))
68
+
69
+ return self.lines
70
+
71
+ class PlainTextCrDecoder(PlainTextDecoder):
72
+ lineEnd = '\x0d'
73
+ fileTypeName = 'plain text CR'
74
+
75
+ def __init__(self, data):
76
+ super(PlainTextCrDecoder, self).__init__(data)
77
+
78
+ class PlainTextLfDecoder(PlainTextDecoder):
79
+ lineEnd = '\x0a'
80
+ fileTypeName = 'plain text LF'
81
+
82
+ def __init__(self, data):
83
+ super(PlainTextLfDecoder, self).__init__(data)
84
+
85
+ class PlainTextLfCrDecoder(PlainTextDecoder):
86
+ lineEnd = '\x0a\x0d'
87
+ fileTypeName = 'plain text LFCR'
88
+
89
+ def __init__(self, data):
90
+ super(PlainTextLfCrDecoder, self).__init__(data)
91
+
92
+ class PlainTextCrLfDecoder(PlainTextDecoder):
93
+ lineEnd = '\x0d\x0a'
94
+ fileTypeName = 'plain text CRLF'
95
+
96
+ def __init__(self, data):
97
+ super(PlainTextCrLfDecoder, self).__init__(data)
98
+
99
+ class BbcBasicAcornDecoder(Decoder):
100
+ lineEnd = '\x0d'
101
+ fileTypeName = 'BBC BASIC (Acorn)'
102
+
103
+ def __init__(self, data):
104
+ super(BbcBasicAcornDecoder, self).__init__(data)
105
+
106
+ def decode(self):
107
+ lenLineEnd = len(self.lineEnd)
108
+ while True:
109
+ if len(self.data) < 2:
110
+ raise Exception("Bad program")
111
+ if self.data[1] == '\xff':
112
+ break
113
+ # {<cr> <linehi> <linelo> <len> <text>} <cr> <ff>
114
+ lineNumber=(ord(self.data[2]) + (ord(self.data[1]) * 256))
115
+ length=ord(self.data[3])
116
+ lineData = self.data[4:length]
117
+ self.lines.append([lineNumber, self.detokenise(lineData)])
118
+ self.data = self.data[length:]
119
+ if len(self.data) <= len(self.lineEnd):
120
+ # may need to check what data is in last chars
121
+ # all tests have been ending tokens/CR/LF
122
+ break
123
+ return self.lines
124
+
125
+ def detokenise(self, lineData):
126
+ # Acorn encoding
127
+ # This regular expression is essentially:
128
+ # Match a quoted string OR
129
+ #
130
+ # (Optional extension token) followed by
131
+ # (REM token followed by the rest of the line)
132
+ # -- this ensures we don't detokenise the REM statement itself
133
+ # OR
134
+ # (Line number following token, with 3 characters in the range 64-127)
135
+ # OR
136
+ # (any token)
137
+ return re.sub(r'"(?:(?:[^"]+|"")*)"(?!")|( ?)([\xc6-\xc8])?(\xf4.*|\x8d[\x40-\x7f]{3}|[\x7f-\xff])',
138
+ BbcBasicAcornDecoder.replaceFunc, lineData)
139
+
140
+ @staticmethod
141
+ def replaceFunc(match):
142
+ if match.group().startswith('"'):
143
+ return match.group()
144
+ else:
145
+ prefix, ext, token = match.groups()
146
+ if len(prefix) == 0:
147
+ prefix = ' '
148
+ tokenOrd = ord(token[0])
149
+ if ext: # An extended opcode, CASE/WHILE/SYS etc
150
+ if ext == '\xc6':
151
+ return cfnTokens[tokenOrd-0x8e]
152
+ if ext == '\xc7':
153
+ return comTokens[tokenOrd-0x8e]
154
+ if ext == '\xc8':
155
+ return stmtTokens[tokenOrd-0x8e]
156
+ raise Exception("Bad token")
157
+ else: # Normal token, plus any extra characters
158
+ if token[0] == '\x8d': # line number following token
159
+ #decode the 24 bit line number
160
+ return str(DecodeLineNo(token[1:]))
161
+ else:
162
+ return prefix + tokens[tokenOrd - 127] + token[1:]
163
+
164
+ class BbcBasic8086(Decoder):
165
+ lineEnd = '\x0d'
166
+ fileTypeName = 'BBC BASIC (80/86)'
167
+
168
+ def __init__(self, data):
169
+ super(BbcBasic8086, self).__init__(data)
170
+
171
+ def decode(self):
172
+ # TODO this needs testing
173
+ # i have read somewhere that bb4w uses different tokens
174
+ # and also has diff line number formatting
175
+ # (http://bb4w.wikispaces.com/Format)
176
+ # {<len> <linelo> <linehi> <text> <cr>} <00> <ff> <ff>
177
+ lenLineEnd = len(self.lineEnd)
178
+
179
+ while True:
180
+ # TODO check if order of bytes is correct
181
+ lineNumber=(ord(self.data[1]) + (ord(self.data[2]) * 256)) # line number bytes in different order
182
+ length=ord(self.data[0])
183
+ if lineNumber == -1:
184
+ break
185
+ lineData = self.data[3:length]
186
+ self.lines.append([lineNumber, self.detokenise(lineData)])
187
+ self.data = self.data[length:]
188
+ if len(self.data) <= len(self.lineEnd):
189
+ # may need to check what data is in last chars
190
+ # all tests have been ending tokens/CR/LF
191
+ break
192
+ return self.lines
193
+
194
+ def detokenise(self, lineData):
195
+ # This uses BB4W encoding
196
+ # This regular expression is essentially:
197
+ # Match a quoted string OR
198
+ #
199
+ # (REM token followed by the rest of the line)
200
+ # -- this ensures we don't detokenise the REM statement itself
201
+ # OR
202
+ # (Line number following token, with 3 characters in the range 64-127)
203
+ # OR
204
+ # (any token 127-255)
205
+ # OR
206
+ # (any token 0-15) TODO check if 16 is needed (EXIT) i think
207
+ return re.sub(r'"(?:(?:[^"]+|"")*)"(?!")|( ?)(\xf4.*|\x8d[\x40-\x7f]{3}|[\x7f-\xff]|[\x00-\x0f])',
208
+ BbcBasic8086Decoder.replaceFunc, line)
209
+
210
+ @staticmethod
211
+ def replaceFunc(match):
212
+ if match.group().startswith('"'):
213
+ return match.group()
214
+ else:
215
+ prefix, token = match.groups()
216
+ if len(prefix) == 0:
217
+ prefix = ' '
218
+ tokenOrd = ord(token[0])
219
+ if token[0] == '\x8d': # line number following token
220
+ #decode the 24 bit line number
221
+ return str(DecodeLineNo(token[1:]))
222
+ else:
223
+ return prefix + bb4wTokens[tokenOrd ^ 128] + token[1:]
224
+
225
+ def fileType(data):
226
+ '''
227
+ Factory to produce the correct decoder depending on the file contents.
228
+ '''
229
+ if len(data) < 4:
230
+ # TODO unsure how you want to return error
231
+ raise Exception("Bad Program")
232
+
233
+ fileExt = data[-4:]
234
+
235
+ # Check final byte sequence (longest sequence first)
236
+ if fileExt == '\x0d\x00\xff\xff':
237
+ return BbcBasic8086Decoder(data)
238
+ elif fileExt[2:4] == '\x0a\x0d':
239
+ return PlainTextLfCrDecoder(data)
240
+ elif fileExt[2:4] == '\x0d\x0a':
241
+ return PlainTextCrLfDecoder(data)
242
+ elif fileExt[2:4] == '\x0d\xff':
243
+ return BbcBasicAcornDecoder(data)
244
+ elif fileExt[3] == '\x0d':
245
+ return PlainTextCrDecoder(data);
246
+ elif fileExt[3] == '\x0a':
247
+ return PlainTextLfDecoder(data)
248
+ else:
249
+ raise Exception("Unrecognised program format")
250
+
251
+ tokens = [
252
+ 'OTHERWISE', # 7f
253
+ 'AND', 'DIV', 'EOR', 'MOD', 'OR', 'ERROR', 'LINE', 'OFF',
254
+ 'STEP', 'SPC', 'TAB(', 'ELSE', 'THEN', '<line>' , 'OPENIN', 'PTR',
255
+ 'PAGE', 'TIME', 'LOMEM', 'HIMEM', 'ABS', 'ACS', 'ADVAL', 'ASC',
256
+ 'ASN', 'ATN', 'BGET', 'COS', 'COUNT', 'DEG', 'ERL', 'ERR',
257
+ 'EVAL', 'EXP', 'EXT', 'FALSE', 'FN', 'GET', 'INKEY', 'INSTR(',
258
+ 'INT', 'LEN', 'LN', 'LOG', 'NOT', 'OPENUP', 'OPENOUT', 'PI',
259
+ 'POINT(', 'POS', 'RAD', 'RND', 'SGN', 'SIN', 'SQR', 'TAN',
260
+ 'TO', 'TRUE', 'USR', 'VAL', 'VPOS', 'CHR$', 'GET$', 'INKEY$',
261
+ 'LEFT$(', 'MID$(', 'RIGHT$(', 'STR$', 'STRING$(', 'EOF',
262
+ '<ESCFN>', '<ESCCOM>', '<ESCSTMT>',
263
+ 'WHEN', 'OF', 'ENDCASE', 'ELSE', 'ENDIF', 'ENDWHILE', 'PTR',
264
+ 'PAGE', 'TIME', 'LOMEM', 'HIMEM', 'SOUND', 'BPUT', 'CALL', 'CHAIN',
265
+ 'CLEAR', 'CLOSE', 'CLG', 'CLS', 'DATA', 'DEF', 'DIM', 'DRAW',
266
+ 'END', 'ENDPROC', 'ENVELOPE', 'FOR', 'GOSUB', 'GOTO', 'GCOL', 'IF',
267
+ 'INPUT', 'LET', 'LOCAL', 'MODE', 'MOVE', 'NEXT', 'ON', 'VDU',
268
+ 'PLOT', 'PRINT', 'PROC', 'READ', 'REM', 'REPEAT', 'REPORT', 'RESTORE',
269
+ 'RETURN', 'RUN', 'STOP', 'COLOUR', 'TRACE', 'UNTIL', 'WIDTH', 'OSCLI']
270
+
271
+ # Referred to as "ESCFN" tokens in the source, starting at 0x8e.
272
+ cfnTokens = [
273
+ 'SUM', 'BEAT']
274
+ # Referred to as "ESCCOM" tokens in the source, starting at 0x8e.
275
+ comTokens = [
276
+ 'APPEND', 'AUTO', 'CRUNCH', 'DELETE', 'EDIT', 'HELP', 'LIST', 'LOAD',
277
+ 'LVAR', 'NEW', 'OLD', 'RENUMBER', 'SAVE', 'TEXTLOAD', 'TEXTSAVE', 'TWIN'
278
+ 'TWINO', 'INSTALL']
279
+ # Referred to as "ESCSTMT", starting at 0x8e.
280
+ stmtTokens= [
281
+ 'CASE', 'CIRCLE', 'FILL', 'ORIGIN', 'PSET', 'RECT', 'SWAP', 'WHILE',
282
+ 'WAIT', 'MOUSE', 'QUIT', 'SYS', 'INSTALL', 'LIBRARY', 'TINT', 'ELLIPSE',
283
+ 'BEATS', 'TEMPO', 'VOICES', 'VOICE', 'STEREO', 'OVERLAY']
284
+ # BB4W tokens....
285
+ # these tokens start at 128 and wrap around to 0-15
286
+ bb4wTokens=["AND","DIV","EOR","MOD","OR","ERROR","LINE","OFF",
287
+ "STEP","SPC","TAB(","ELSE","THEN","","OPENIN","PTR",
288
+ "PAGE","TIME","LOMEM","HIMEM","ABS","ACS","ADVAL","ASC",
289
+ "ASN","ATN","BGET","COS","COUNT","DEG","ERL","ERR",
290
+ "EVAL","EXP","EXT","FALSE","FN","GET","INKEY","INSTR(",
291
+ "INT","LEN","LN","LOG","NOT","OPENUP","OPENOUT","PI",
292
+ "POINT(","POS","RAD","RND","SGN","SIN","SQR","TAN",
293
+ "TO","TRUE","USR","VAL","VPOS","CHR$","GET$","INKEY$",
294
+ "LEFT$(","MID$(","RIGHT$(","STR$","STRING$(","EOF","SUM","WHILE",
295
+ "CASE","WHEN","OF","ENDCASE","OTHERWISE","ENDIF","ENDWHILE","PTR",
296
+ "PAGE","TIME","LOMEM","HIMEM","SOUND","BPUT","CALL","CHAIN",
297
+ "CLEAR","CLOSE","CLG","CLS","DATA","DEF","DIM","DRAW",
298
+ "END","ENDPROC","ENVELOPE","FOR","GOSUB","GOTO","GCOL","IF",
299
+ "INPUT","LET","LOCAL","MODE","MOVE","NEXT","ON","VDU",
300
+ "PLOT","PRINT","PROC","READ","REM","REPEAT","REPORT","RESTORE",
301
+ "RETURN","RUN","STOP","COLOUR","TRACE","UNTIL","WIDTH","OSCLI",
302
+ "","CIRCLE","ELLIPSE","FILL","MOUSE","ORIGIN","QUIT","RECTANGLE",
303
+ "SWAP","SYS","TINT","WAIT","INSTALL","","PRIVATE","BY","EXIT"]
304
+
305
+ def DecodeLineNo(lineNo):
306
+ """Returns a line number from a 24bit encoded line number"""
307
+ byte0=ord(lineNo[0])
308
+ byte1=ord(lineNo[1])
309
+ byte2=ord(lineNo[2])
310
+ #needed to be ANDed with 255 after multiply because with this formula
311
+ #on the 6502 it moved the high bits to carry with the Logical Shift
312
+ msb = byte2 ^ (( byte0 * 16) & 255)
313
+ lsb = byte1 ^ (((byte0 & 0x30 ) * 4) & 255)
314
+ return (lsb + (msb * 256))
315
+
316
+ def ReadLines(data):
317
+ """Returns a list of [line number, tokenised line] from a binary
318
+ BBC BASIC format file."""
319
+ decoder = fileType(data)
320
+ lines = decoder.decode()
321
+ return lines
322
+
323
+ def decode(data, output):
324
+ """Decode binary data 'data' and write the result to 'output'."""
325
+ lines = ReadLines(data)
326
+ for lineNumber, lineData in lines:
327
+ output.write(str(lineNumber) + ' ')
328
+ # Normalise line endings to \n
329
+ output.write(lineData.strip() + '\n')
330
+
331
+ if __name__ == "__main__":
332
+ optlist, args = getopt.getopt(sys.argv[1:], '')
333
+ if len(args) != 2:
334
+ print("Usage: %s INPUT OUTPUT" % sys.argv[0])
335
+ sys.exit(1)
336
+ entireFile = open(args[0], 'rb').read()
337
+ output = open(args[1], 'w')
338
+ decode(entireFile, output)
339
+ output.close()
owl_basic/errors.py ADDED
@@ -0,0 +1,22 @@
1
+ import sys
2
+ import logging
3
+
4
+ error_log = set()
5
+
6
+ def warning(message):
7
+ if message not in error_log:
8
+ logging.warning(message)
9
+ error_log.add(message)
10
+
11
+ def error(message):
12
+ if message not in error_log:
13
+ logging.error(message)
14
+ error_log.add(message)
15
+
16
+ def fatalError(message):
17
+ logging.critical(message)
18
+ sys.exit(1)
19
+
20
+ def internal(message):
21
+ logging.critical(message)
22
+ sys.exit(1)
@@ -0,0 +1,17 @@
1
+ '''
2
+ Package for analysing and manipulating control flow.
3
+ '''
4
+
5
+ from .entry_point_locator import locateEntryPoints
6
+ from .flow_graph_creator import createForwardControlFlowGraph
7
+ from .longjump_converter import convertLongjumpsToExceptions
8
+ from .subroutine_converter import convertSubroutinesToProcedures
9
+ from .basic_block_identifier import identifyBasicBlocks
10
+ from .basic_block_orderer import orderBasicBlocks
11
+
12
+ __all__ = ["locateEntryPoints",
13
+ "createForwardControlFlowGraph",
14
+ "convertLongjumpsToExceptions",
15
+ "convertSubroutinesToProcedures",
16
+ "identifyBasicBlocks"]
17
+
@@ -0,0 +1,34 @@
1
+ '''
2
+ Created on 30 Jan 2010
3
+
4
+ @author: rjs
5
+ '''
6
+
7
+ from owl_basic.cfg_vertex import CfgVertex
8
+
9
+ class BasicBlock(CfgVertex):
10
+ '''
11
+ A sequence of statements with a single entry and exit point
12
+ '''
13
+
14
+ def __init__(self, statements=[], *args, **kwargs):
15
+ '''
16
+ :param statements: A list of statements comprising the basic block
17
+ '''
18
+ super(BasicBlock, self).__init__(*args, **kwargs)
19
+ self.statements = [] # The list of statements comprising the basic block
20
+ self.topological_order = None # Integer giving ordinal position in method
21
+ self.label = None # A label into which can be branched to, to enter this basic block
22
+ self.is_label_marked = False # A flag for whether the label has been marked
23
+
24
+ ''' The first statement in the BasicBlock, or None'''
25
+ entryPoint = property(lambda self: self.statements[0] if len(self.statements) > 0 else None)
26
+
27
+ '''The last statement in the BasicBlock, or None'''
28
+ exitPoint = property(lambda self: self.statements[-1] if len(self.statements) > 0 else None)
29
+
30
+ def __len__(self):
31
+ return len(self.statements)
32
+
33
+
34
+
@@ -0,0 +1,66 @@
1
+ '''
2
+ Grouping of statements into basic blocks - resulting in a coarser grained control flow graph
3
+ '''
4
+ import logging
5
+ logger = logging.getLogger('flow.basic_block_identifier')
6
+ logger.setLevel(logging.WARNING)
7
+
8
+ from .connectors import connect
9
+ from .traversal import depthFirstSearch
10
+ from .basic_block import BasicBlock
11
+
12
+ def identifyBasicBlocks(entry_points, options):
13
+ '''
14
+ Trace the control flow graph from each entry point and collect consecutive statements
15
+ into basic blocks, comprising a more coarse grained control flow graph. This function applies
16
+ a transformation to the statement level basic block, coarsening it by grouping statements into
17
+ a graph consisting only of BasicBlock instances. Each BasicBlock instance contains a list of
18
+ non-branching, or non-branch target statements.
19
+
20
+ A basic block is code that has one entry point (i.e., no code within it is the destination
21
+ of a jump instruction), one exit point and no jump instructions contained within it. The
22
+ start of a basic block may be jumped to from more than one location. The end of a basic block
23
+ may be a jump instruction or the statement before the destination of a jump instruction. Basic
24
+ blocks are usually the basic unit to which compiler optimizations are applied. Basic blocks
25
+ form the vertices or nodes in a control flow graph.
26
+
27
+ :param entry_points: A sequence of program statements which are the entry point to the program
28
+ or procedures
29
+ :param options: Program options
30
+ :returns: A dictionary of entry blocks - BasicBlock instances through which control
31
+ flow enters the graph of each program, function or procedure. The keys are
32
+ the entry point names
33
+ '''
34
+ logger.info("Identifying basic blocks")
35
+ print(entry_points)
36
+ return dict((k, coarsenControlFlowGraph(v)) for k, v in entry_points.items())
37
+
38
+ def coarsenControlFlowGraph(entry_point):
39
+ '''
40
+ Coarsen the control flow graph starting at the entry_point to consist of BasicBlocks
41
+ :param entry_point: A program statement which is the entry point to the program, procedure or function
42
+ for which the control flow graph is to be coarsened to basic blocks.
43
+ :returns: The entry block BasicBlock instance corresponding to entry_point
44
+ '''
45
+ logger.debug("entry_point = %s", entry_point)
46
+ block = assignBlockAndContinue(entry_point)
47
+ return block
48
+
49
+ # TODO: Decorate as a tail-call
50
+ def assignBlockAndContinue(vertex, block=None):
51
+ '''
52
+ Assign vertex to block and continue with successor vertices
53
+ '''
54
+ if not vertex.block:
55
+ block = ((vertex.inDegree == 1) and block) or BasicBlock()
56
+ block.statements.append(vertex)
57
+ vertex.block = block
58
+ logger.debug("%s with in-degree %s and out-degree %s at %s in %s", vertex, str(vertex.inDegree), str(vertex.outDegree), str(vertex.lineNum), vertex.block)
59
+ for target in vertex.outEdges:
60
+ successor_block = assignBlockAndContinue(target, block if vertex.outDegree == 1 else None)
61
+ if block is not successor_block:
62
+ connect(block, successor_block)
63
+ return vertex.block
64
+
65
+
66
+