owl-basic 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- owl_basic/__init__.py +3 -0
- owl_basic/algorithms.py +29 -0
- owl_basic/ast_utils.py +204 -0
- owl_basic/basic_visitor.py +55 -0
- owl_basic/cfg_vertex.py +65 -0
- owl_basic/codegen/__init__.py +0 -0
- owl_basic/codegen/clr/__init__.py +0 -0
- owl_basic/codegen/clr/cil_visitor.py +1296 -0
- owl_basic/codegen/clr/cts.py +56 -0
- owl_basic/codegen/clr/emitters.py +94 -0
- owl_basic/codegen/clr/generate.py +539 -0
- owl_basic/correlation_visitor.py +119 -0
- owl_basic/data_visitor.py +62 -0
- owl_basic/decoder.py +339 -0
- owl_basic/errors.py +22 -0
- owl_basic/flow/__init__.py +17 -0
- owl_basic/flow/basic_block.py +34 -0
- owl_basic/flow/basic_block_identifier.py +66 -0
- owl_basic/flow/basic_block_orderer.py +29 -0
- owl_basic/flow/connectors.py +19 -0
- owl_basic/flow/convert_sub_visitor.py +28 -0
- owl_basic/flow/entry_point_locator.py +55 -0
- owl_basic/flow/entry_point_visitor.py +48 -0
- owl_basic/flow/flow_analysis.py +56 -0
- owl_basic/flow/flow_graph_creator.py +14 -0
- owl_basic/flow/flowgraph_visitor.py +178 -0
- owl_basic/flow/longjump_converter.py +20 -0
- owl_basic/flow/longjump_visitor.py +53 -0
- owl_basic/flow/subroutine_converter.py +38 -0
- owl_basic/flow/traversal.py +110 -0
- owl_basic/gml_visitor.py +151 -0
- owl_basic/line_mapper.py +43 -0
- owl_basic/line_number_visitor.py +65 -0
- owl_basic/main.py +381 -0
- owl_basic/node.py +21 -0
- owl_basic/options.py +22 -0
- owl_basic/owltyping/__init__.py +1 -0
- owl_basic/owltyping/function_type_inferer.py +50 -0
- owl_basic/owltyping/hindley_milner.py +524 -0
- owl_basic/owltyping/set_function_type_visitor.py +25 -0
- owl_basic/owltyping/type_system.py +220 -0
- owl_basic/owltyping/typecheck.py +60 -0
- owl_basic/owltyping/typecheck_visitor.py +471 -0
- owl_basic/parent_visitor.py +37 -0
- owl_basic/process.py +36 -0
- owl_basic/separation_visitor.py +98 -0
- owl_basic/sigil.py +30 -0
- owl_basic/simplify_visitor.py +204 -0
- owl_basic/singleton.py +127 -0
- owl_basic/source_debugging.py +124 -0
- owl_basic/symbol_table_visitor.py +220 -0
- owl_basic/symbol_tables.py +195 -0
- owl_basic/syntax/__init__.py +0 -0
- owl_basic/syntax/ast.py +1081 -0
- owl_basic/syntax/ast_meta.py +228 -0
- owl_basic/syntax/grammar.py +1972 -0
- owl_basic/syntax/lexer.py +943 -0
- owl_basic/syntax/parser.py +77 -0
- owl_basic/utility.py +26 -0
- owl_basic/visitor.py +43 -0
- owl_basic/xml_blocks.py +137 -0
- owl_basic/xml_visitor.py +101 -0
- owl_basic-0.6.0.dist-info/METADATA +37 -0
- owl_basic-0.6.0.dist-info/RECORD +69 -0
- owl_basic-0.6.0.dist-info/WHEEL +5 -0
- owl_basic-0.6.0.dist-info/entry_points.txt +2 -0
- owl_basic-0.6.0.dist-info/licenses/LICENSE +21 -0
- owl_basic-0.6.0.dist-info/licenses/THIRD-PARTY-NOTICES.md +57 -0
- owl_basic-0.6.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from owl_basic import errors
|
|
3
|
+
from collections import deque
|
|
4
|
+
from owl_basic.syntax.ast import Repeat, While, ForToStep
|
|
5
|
+
from owl_basic.flow.connectors import connectLoop
|
|
6
|
+
from owl_basic.visitor import Visitor
|
|
7
|
+
|
|
8
|
+
class CorrelationVisitor(Visitor):
|
|
9
|
+
"""
|
|
10
|
+
This visitor performs abstract execution of the control-flow-graph in order
|
|
11
|
+
to correlate the opening and closing statements or FOR..NEXT, REPEAT..UNTIL
|
|
12
|
+
and WHILE..ENDWHILE loops.
|
|
13
|
+
|
|
14
|
+
CFG nodes where execution branches are annotated with the current stack of
|
|
15
|
+
loop structures, and a depth first search is performed through the CFG.
|
|
16
|
+
If the stack is non-empty when a terminal node (no out-edges) is encountered
|
|
17
|
+
an error is reported. If loops are incorrectly nested an error is reported.
|
|
18
|
+
If loops are correctly nested, back-edges are inserted into the CFG.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.to_visit = deque()
|
|
23
|
+
self.visited = set()
|
|
24
|
+
self.loops = [] # A stack for tracking the current abstract execution state
|
|
25
|
+
|
|
26
|
+
def start(self, entry_point):
|
|
27
|
+
"""
|
|
28
|
+
The entry-point from which loop correlation should start
|
|
29
|
+
"""
|
|
30
|
+
self.depthFirstSearch(entry_point)
|
|
31
|
+
for v in self.visited:
|
|
32
|
+
if hasattr(v, "loop_stack"):
|
|
33
|
+
del v.loop_stack
|
|
34
|
+
|
|
35
|
+
def depthFirstSearch(self, entry_point):
|
|
36
|
+
self.to_visit.append(entry_point)
|
|
37
|
+
while len(self.to_visit):
|
|
38
|
+
v = self.to_visit.pop()
|
|
39
|
+
# Restore the loop stack
|
|
40
|
+
if hasattr(v, "loop_stack"):
|
|
41
|
+
self.loops = v.loop_stack[:]
|
|
42
|
+
if v not in self.visited:
|
|
43
|
+
self.visited.add(v)
|
|
44
|
+
v.accept(self)
|
|
45
|
+
if len(v.outEdges) == 0 and len(self.loops) != 0:
|
|
46
|
+
# TODO: Improve this error message by printing an
|
|
47
|
+
# abstract stack trace
|
|
48
|
+
errors.fatalError("In loops at terminal statement at line %s" % v.lineNum)
|
|
49
|
+
# If execution splits, take a copy of the current loop stack
|
|
50
|
+
# and store a reference to it on each of the target nodes of
|
|
51
|
+
# the out edges of the current node, so the state can be
|
|
52
|
+
# restored later in the traversal
|
|
53
|
+
if len(v.outEdges) > 1:
|
|
54
|
+
loop_stack = self.loops[:]
|
|
55
|
+
for target in v.outEdges:
|
|
56
|
+
target.loop_stack = loop_stack
|
|
57
|
+
self.to_visit.extend(v.outEdges)
|
|
58
|
+
|
|
59
|
+
def visitAstStatement(self, statement):
|
|
60
|
+
"""
|
|
61
|
+
Do nothing for most AST statements
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
def visitRepeat(self, repeat_stmt):
|
|
66
|
+
self.loops.append(repeat_stmt)
|
|
67
|
+
|
|
68
|
+
def visitUntil(self, until_stmt):
|
|
69
|
+
if len(self.loops) == 0:
|
|
70
|
+
errors.fatalError("Not in a REPEAT loop at line %d." % until_stmt.lineNum)
|
|
71
|
+
peek = self.loops[-1]
|
|
72
|
+
if not isinstance(peek, Repeat):
|
|
73
|
+
errors.fatalError("Not in a REPEAT loop at line %d; currently in %s loop opened at line %d" % (until_stmt.lineNum, peek.description, peek.lineNum))
|
|
74
|
+
repeat_stmt = self.loops.pop()
|
|
75
|
+
connectLoop(until_stmt, repeat_stmt)
|
|
76
|
+
|
|
77
|
+
def visitWhile(self, while_stmt):
|
|
78
|
+
self.loops.append(while_stmt)
|
|
79
|
+
|
|
80
|
+
def visitEndwhile(self, endwhile_stmt):
|
|
81
|
+
if len(self.loops) == 0:
|
|
82
|
+
errors.fatalError("Not in a WHILE loop at line %d." % endwhile_stmt.lineNum)
|
|
83
|
+
peek = self.loops[-1]
|
|
84
|
+
if not isinstance(peek, While):
|
|
85
|
+
errors.fatalError("Not in a WHILE loop at line %d; currently in %s loop opened at line %d" % (endwhile_stmt.lineNum, peek.description, peek.lineNum))
|
|
86
|
+
while_stmt = self.loops.pop()
|
|
87
|
+
connectLoop(endwhile_stmt, while_stmt)
|
|
88
|
+
|
|
89
|
+
def visitForToStep(self, for_stmt):
|
|
90
|
+
self.loops.append(for_stmt)
|
|
91
|
+
|
|
92
|
+
def visitNext(self, next_stmt):
|
|
93
|
+
logging.debug("NEXT statement = %s", next_stmt)
|
|
94
|
+
#logging.debug("NEXT identifiers = %s", next_stmt.identifiers[0].identifier)
|
|
95
|
+
while True:
|
|
96
|
+
if len(self.loops) == 0:
|
|
97
|
+
errors.fatalError("Not in a FOR loop at line %d." % next_stmt.lineNum)
|
|
98
|
+
peek = self.loops[-1]
|
|
99
|
+
if not isinstance(peek, ForToStep):
|
|
100
|
+
errors.fatalError("Not in a FOR loop at line %d; currently in %s loop opened at line %d" % (next_stmt.lineNum, peek.description, peek.lineNum))
|
|
101
|
+
|
|
102
|
+
for_stmt = self.loops.pop()
|
|
103
|
+
# If the next_stmt has no attached identifiers, it applies to the
|
|
104
|
+
# top FOR statement on the stack
|
|
105
|
+
if len(next_stmt.identifiers) == 0:
|
|
106
|
+
next_stmt.identifiers.append(for_stmt.identifier)
|
|
107
|
+
id1 = for_stmt.identifier.identifier
|
|
108
|
+
print(next_stmt.identifiers)
|
|
109
|
+
id2 = next_stmt.identifiers[0].identifier
|
|
110
|
+
print("self.loops = ", self.loops)
|
|
111
|
+
print("id1 = ", id1)
|
|
112
|
+
print("id2 = ", id2)
|
|
113
|
+
# TODO: Check that the symbols are equal, not just the names
|
|
114
|
+
if for_stmt.identifier.identifier == next_stmt.identifiers[0].identifier:
|
|
115
|
+
connectLoop(next_stmt, for_stmt)
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
from owl_basic.visitor import Visitor
|
|
4
|
+
|
|
5
|
+
class DataVisitor(Visitor):
|
|
6
|
+
'''
|
|
7
|
+
Extra DATA from DATA statements and hidden DATA within REM statements.
|
|
8
|
+
BBC BASIC allows any line to be RESTOREd to and will attempt to READ data
|
|
9
|
+
from either the first DATA statement or the first COMMA. This means it
|
|
10
|
+
is possible to do
|
|
11
|
+
10 REM,"HELLO", "WORLD"
|
|
12
|
+
20 RESTORE 10
|
|
13
|
+
30 READ A$
|
|
14
|
+
40 PRINT A$
|
|
15
|
+
> RUN
|
|
16
|
+
HELLO
|
|
17
|
+
|
|
18
|
+
For this reason, we need to store anything following a COMMA in a REM
|
|
19
|
+
statement. Any DATA keyword following a REM is irrelevant since it will
|
|
20
|
+
not be tokenized, reading will start from the first COMMA.
|
|
21
|
+
|
|
22
|
+
It is NOT possible to READ into a REMed data block from a previous DATA
|
|
23
|
+
statement; the REMed line must be RESTOREd to directly
|
|
24
|
+
'''
|
|
25
|
+
def __init__(self):
|
|
26
|
+
self.data = []
|
|
27
|
+
self.index = {} # physical 0-based line number -> data[index]
|
|
28
|
+
|
|
29
|
+
def parse(self, data):
|
|
30
|
+
"Parse the text following a DATA statement into items"
|
|
31
|
+
# Break the data into fields
|
|
32
|
+
raw_items = re.findall(r'(?:\s*"((?:[^"]+|"")*)"(?!")\s*)|([^,]+)', data)
|
|
33
|
+
items = []
|
|
34
|
+
for i, (quoted, unquoted) in enumerate(raw_items):
|
|
35
|
+
if quoted:
|
|
36
|
+
item = quoted.replace('""', '"')
|
|
37
|
+
else:
|
|
38
|
+
item = unquoted.lstrip()
|
|
39
|
+
# If its the last item on the line, strip trailing space
|
|
40
|
+
if i == len(raw_items) - 1:
|
|
41
|
+
item = item.rstrip()
|
|
42
|
+
items.append(item)
|
|
43
|
+
return items
|
|
44
|
+
|
|
45
|
+
def visitAstNode(self, node):
|
|
46
|
+
node.forEachChild(self.visit)
|
|
47
|
+
|
|
48
|
+
def visitData(self, statement):
|
|
49
|
+
logging.debug("DATA statement : %s" % statement.data)
|
|
50
|
+
self.index[statement.lineNum] = len(self.data)
|
|
51
|
+
items = self.parse(statement.data)
|
|
52
|
+
self.data.extend(items)
|
|
53
|
+
|
|
54
|
+
def visitRem(self, statement):
|
|
55
|
+
logging.debug("REM statement : %s" % statement.data)
|
|
56
|
+
# Find the index of the first comma
|
|
57
|
+
comma_index = statement.data.find(',')
|
|
58
|
+
if comma_index != -1:
|
|
59
|
+
# A comma was found, so it is possible to RESTORE to this line
|
|
60
|
+
self.index[statement.lineNum] = len(self.data)
|
|
61
|
+
items = self.parse(statement.data[comma_index+1:])
|
|
62
|
+
self.data.extend(items)
|
owl_basic/decoder.py
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
#
|
|
3
|
+
# (c) 2007 Matt Godbolt.
|
|
4
|
+
#
|
|
5
|
+
# Updated 2008 Ian Smallshire.
|
|
6
|
+
#
|
|
7
|
+
# Get v0.01 @ http://xania.org/200711/bbc-basic-v-format
|
|
8
|
+
#
|
|
9
|
+
# Use however you like, as long as you put credit where credit's due.
|
|
10
|
+
# Some information obtained from source code from RISC OS Open.
|
|
11
|
+
# v0.01 - first release. Doesn't deal with GOTO line numbers. (c) 2007 Matt Godbolt
|
|
12
|
+
# v0.02 - edited to output line numbers where needed and fixed Ian Smallshire
|
|
13
|
+
# the GOTO/RESTORE/GOSUB line numbers.
|
|
14
|
+
# v0.03 - Added file type detection for input and provision Ian Smallshire
|
|
15
|
+
# for BB4W encoded tokens
|
|
16
|
+
# v0.04 - Now decodes BB4W tokens as well as Acorn. Ian Smallshire
|
|
17
|
+
# v0.05 - Corrected tokens inside strings. No longer detokenized Rob & Ian Smallshire
|
|
18
|
+
# v0.06 - Fixed line number decoding with line numbers over 32767 Ian Smallshire
|
|
19
|
+
|
|
20
|
+
#line numbers for bb4w still need testing properly.
|
|
21
|
+
#if input file is plane text it must be terminated by an EOL
|
|
22
|
+
import struct, re, getopt, sys
|
|
23
|
+
|
|
24
|
+
# The list of BBC BASIC V tokens:
|
|
25
|
+
# Base tokens, starting at 0x7f
|
|
26
|
+
|
|
27
|
+
class Decoder(object):
|
|
28
|
+
|
|
29
|
+
def __init__(self, data):
|
|
30
|
+
self.data = data
|
|
31
|
+
self.lines = []
|
|
32
|
+
|
|
33
|
+
#data = property(lambda self: self.__data)
|
|
34
|
+
|
|
35
|
+
class PlainTextDecoder(Decoder):
|
|
36
|
+
|
|
37
|
+
def __init__(self, data):
|
|
38
|
+
super(PlainTextDecoder, self).__init__(data)
|
|
39
|
+
|
|
40
|
+
def decode(self):
|
|
41
|
+
split_lines = self.data.split(self.lineEnd)
|
|
42
|
+
|
|
43
|
+
# Remove any trailing empty line
|
|
44
|
+
if len(split_lines[-1]) == 0:
|
|
45
|
+
split_lines = split_lines[:-1]
|
|
46
|
+
|
|
47
|
+
has_line_numbers = None # Tri-state None, True or False
|
|
48
|
+
logical_line_number = 10
|
|
49
|
+
|
|
50
|
+
for line in split_lines:
|
|
51
|
+
m = re.match(r'\s*(\d+)?\s?(.*)', line) # TODO: Factor this regex out of here and decoder
|
|
52
|
+
line_number, line_body = m.group(1), m.group(2)
|
|
53
|
+
current_line_has_number = line_number is not None
|
|
54
|
+
|
|
55
|
+
if has_line_numbers is None:
|
|
56
|
+
has_line_numbers = current_line_has_number
|
|
57
|
+
else:
|
|
58
|
+
if current_line_has_number != has_line_numbers:
|
|
59
|
+
raise Exception("Inconsistent line numbering")
|
|
60
|
+
|
|
61
|
+
# Fake line numbers if they are missing
|
|
62
|
+
if has_line_numbers == False:
|
|
63
|
+
line_number = logical_line_number
|
|
64
|
+
|
|
65
|
+
logical_line_number += 10
|
|
66
|
+
|
|
67
|
+
self.lines.append((line_number, line_body))
|
|
68
|
+
|
|
69
|
+
return self.lines
|
|
70
|
+
|
|
71
|
+
class PlainTextCrDecoder(PlainTextDecoder):
|
|
72
|
+
lineEnd = '\x0d'
|
|
73
|
+
fileTypeName = 'plain text CR'
|
|
74
|
+
|
|
75
|
+
def __init__(self, data):
|
|
76
|
+
super(PlainTextCrDecoder, self).__init__(data)
|
|
77
|
+
|
|
78
|
+
class PlainTextLfDecoder(PlainTextDecoder):
|
|
79
|
+
lineEnd = '\x0a'
|
|
80
|
+
fileTypeName = 'plain text LF'
|
|
81
|
+
|
|
82
|
+
def __init__(self, data):
|
|
83
|
+
super(PlainTextLfDecoder, self).__init__(data)
|
|
84
|
+
|
|
85
|
+
class PlainTextLfCrDecoder(PlainTextDecoder):
|
|
86
|
+
lineEnd = '\x0a\x0d'
|
|
87
|
+
fileTypeName = 'plain text LFCR'
|
|
88
|
+
|
|
89
|
+
def __init__(self, data):
|
|
90
|
+
super(PlainTextLfCrDecoder, self).__init__(data)
|
|
91
|
+
|
|
92
|
+
class PlainTextCrLfDecoder(PlainTextDecoder):
|
|
93
|
+
lineEnd = '\x0d\x0a'
|
|
94
|
+
fileTypeName = 'plain text CRLF'
|
|
95
|
+
|
|
96
|
+
def __init__(self, data):
|
|
97
|
+
super(PlainTextCrLfDecoder, self).__init__(data)
|
|
98
|
+
|
|
99
|
+
class BbcBasicAcornDecoder(Decoder):
|
|
100
|
+
lineEnd = '\x0d'
|
|
101
|
+
fileTypeName = 'BBC BASIC (Acorn)'
|
|
102
|
+
|
|
103
|
+
def __init__(self, data):
|
|
104
|
+
super(BbcBasicAcornDecoder, self).__init__(data)
|
|
105
|
+
|
|
106
|
+
def decode(self):
|
|
107
|
+
lenLineEnd = len(self.lineEnd)
|
|
108
|
+
while True:
|
|
109
|
+
if len(self.data) < 2:
|
|
110
|
+
raise Exception("Bad program")
|
|
111
|
+
if self.data[1] == '\xff':
|
|
112
|
+
break
|
|
113
|
+
# {<cr> <linehi> <linelo> <len> <text>} <cr> <ff>
|
|
114
|
+
lineNumber=(ord(self.data[2]) + (ord(self.data[1]) * 256))
|
|
115
|
+
length=ord(self.data[3])
|
|
116
|
+
lineData = self.data[4:length]
|
|
117
|
+
self.lines.append([lineNumber, self.detokenise(lineData)])
|
|
118
|
+
self.data = self.data[length:]
|
|
119
|
+
if len(self.data) <= len(self.lineEnd):
|
|
120
|
+
# may need to check what data is in last chars
|
|
121
|
+
# all tests have been ending tokens/CR/LF
|
|
122
|
+
break
|
|
123
|
+
return self.lines
|
|
124
|
+
|
|
125
|
+
def detokenise(self, lineData):
|
|
126
|
+
# Acorn encoding
|
|
127
|
+
# This regular expression is essentially:
|
|
128
|
+
# Match a quoted string OR
|
|
129
|
+
#
|
|
130
|
+
# (Optional extension token) followed by
|
|
131
|
+
# (REM token followed by the rest of the line)
|
|
132
|
+
# -- this ensures we don't detokenise the REM statement itself
|
|
133
|
+
# OR
|
|
134
|
+
# (Line number following token, with 3 characters in the range 64-127)
|
|
135
|
+
# OR
|
|
136
|
+
# (any token)
|
|
137
|
+
return re.sub(r'"(?:(?:[^"]+|"")*)"(?!")|( ?)([\xc6-\xc8])?(\xf4.*|\x8d[\x40-\x7f]{3}|[\x7f-\xff])',
|
|
138
|
+
BbcBasicAcornDecoder.replaceFunc, lineData)
|
|
139
|
+
|
|
140
|
+
@staticmethod
|
|
141
|
+
def replaceFunc(match):
|
|
142
|
+
if match.group().startswith('"'):
|
|
143
|
+
return match.group()
|
|
144
|
+
else:
|
|
145
|
+
prefix, ext, token = match.groups()
|
|
146
|
+
if len(prefix) == 0:
|
|
147
|
+
prefix = ' '
|
|
148
|
+
tokenOrd = ord(token[0])
|
|
149
|
+
if ext: # An extended opcode, CASE/WHILE/SYS etc
|
|
150
|
+
if ext == '\xc6':
|
|
151
|
+
return cfnTokens[tokenOrd-0x8e]
|
|
152
|
+
if ext == '\xc7':
|
|
153
|
+
return comTokens[tokenOrd-0x8e]
|
|
154
|
+
if ext == '\xc8':
|
|
155
|
+
return stmtTokens[tokenOrd-0x8e]
|
|
156
|
+
raise Exception("Bad token")
|
|
157
|
+
else: # Normal token, plus any extra characters
|
|
158
|
+
if token[0] == '\x8d': # line number following token
|
|
159
|
+
#decode the 24 bit line number
|
|
160
|
+
return str(DecodeLineNo(token[1:]))
|
|
161
|
+
else:
|
|
162
|
+
return prefix + tokens[tokenOrd - 127] + token[1:]
|
|
163
|
+
|
|
164
|
+
class BbcBasic8086(Decoder):
|
|
165
|
+
lineEnd = '\x0d'
|
|
166
|
+
fileTypeName = 'BBC BASIC (80/86)'
|
|
167
|
+
|
|
168
|
+
def __init__(self, data):
|
|
169
|
+
super(BbcBasic8086, self).__init__(data)
|
|
170
|
+
|
|
171
|
+
def decode(self):
|
|
172
|
+
# TODO this needs testing
|
|
173
|
+
# i have read somewhere that bb4w uses different tokens
|
|
174
|
+
# and also has diff line number formatting
|
|
175
|
+
# (http://bb4w.wikispaces.com/Format)
|
|
176
|
+
# {<len> <linelo> <linehi> <text> <cr>} <00> <ff> <ff>
|
|
177
|
+
lenLineEnd = len(self.lineEnd)
|
|
178
|
+
|
|
179
|
+
while True:
|
|
180
|
+
# TODO check if order of bytes is correct
|
|
181
|
+
lineNumber=(ord(self.data[1]) + (ord(self.data[2]) * 256)) # line number bytes in different order
|
|
182
|
+
length=ord(self.data[0])
|
|
183
|
+
if lineNumber == -1:
|
|
184
|
+
break
|
|
185
|
+
lineData = self.data[3:length]
|
|
186
|
+
self.lines.append([lineNumber, self.detokenise(lineData)])
|
|
187
|
+
self.data = self.data[length:]
|
|
188
|
+
if len(self.data) <= len(self.lineEnd):
|
|
189
|
+
# may need to check what data is in last chars
|
|
190
|
+
# all tests have been ending tokens/CR/LF
|
|
191
|
+
break
|
|
192
|
+
return self.lines
|
|
193
|
+
|
|
194
|
+
def detokenise(self, lineData):
|
|
195
|
+
# This uses BB4W encoding
|
|
196
|
+
# This regular expression is essentially:
|
|
197
|
+
# Match a quoted string OR
|
|
198
|
+
#
|
|
199
|
+
# (REM token followed by the rest of the line)
|
|
200
|
+
# -- this ensures we don't detokenise the REM statement itself
|
|
201
|
+
# OR
|
|
202
|
+
# (Line number following token, with 3 characters in the range 64-127)
|
|
203
|
+
# OR
|
|
204
|
+
# (any token 127-255)
|
|
205
|
+
# OR
|
|
206
|
+
# (any token 0-15) TODO check if 16 is needed (EXIT) i think
|
|
207
|
+
return re.sub(r'"(?:(?:[^"]+|"")*)"(?!")|( ?)(\xf4.*|\x8d[\x40-\x7f]{3}|[\x7f-\xff]|[\x00-\x0f])',
|
|
208
|
+
BbcBasic8086Decoder.replaceFunc, line)
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
def replaceFunc(match):
|
|
212
|
+
if match.group().startswith('"'):
|
|
213
|
+
return match.group()
|
|
214
|
+
else:
|
|
215
|
+
prefix, token = match.groups()
|
|
216
|
+
if len(prefix) == 0:
|
|
217
|
+
prefix = ' '
|
|
218
|
+
tokenOrd = ord(token[0])
|
|
219
|
+
if token[0] == '\x8d': # line number following token
|
|
220
|
+
#decode the 24 bit line number
|
|
221
|
+
return str(DecodeLineNo(token[1:]))
|
|
222
|
+
else:
|
|
223
|
+
return prefix + bb4wTokens[tokenOrd ^ 128] + token[1:]
|
|
224
|
+
|
|
225
|
+
def fileType(data):
|
|
226
|
+
'''
|
|
227
|
+
Factory to produce the correct decoder depending on the file contents.
|
|
228
|
+
'''
|
|
229
|
+
if len(data) < 4:
|
|
230
|
+
# TODO unsure how you want to return error
|
|
231
|
+
raise Exception("Bad Program")
|
|
232
|
+
|
|
233
|
+
fileExt = data[-4:]
|
|
234
|
+
|
|
235
|
+
# Check final byte sequence (longest sequence first)
|
|
236
|
+
if fileExt == '\x0d\x00\xff\xff':
|
|
237
|
+
return BbcBasic8086Decoder(data)
|
|
238
|
+
elif fileExt[2:4] == '\x0a\x0d':
|
|
239
|
+
return PlainTextLfCrDecoder(data)
|
|
240
|
+
elif fileExt[2:4] == '\x0d\x0a':
|
|
241
|
+
return PlainTextCrLfDecoder(data)
|
|
242
|
+
elif fileExt[2:4] == '\x0d\xff':
|
|
243
|
+
return BbcBasicAcornDecoder(data)
|
|
244
|
+
elif fileExt[3] == '\x0d':
|
|
245
|
+
return PlainTextCrDecoder(data);
|
|
246
|
+
elif fileExt[3] == '\x0a':
|
|
247
|
+
return PlainTextLfDecoder(data)
|
|
248
|
+
else:
|
|
249
|
+
raise Exception("Unrecognised program format")
|
|
250
|
+
|
|
251
|
+
tokens = [
|
|
252
|
+
'OTHERWISE', # 7f
|
|
253
|
+
'AND', 'DIV', 'EOR', 'MOD', 'OR', 'ERROR', 'LINE', 'OFF',
|
|
254
|
+
'STEP', 'SPC', 'TAB(', 'ELSE', 'THEN', '<line>' , 'OPENIN', 'PTR',
|
|
255
|
+
'PAGE', 'TIME', 'LOMEM', 'HIMEM', 'ABS', 'ACS', 'ADVAL', 'ASC',
|
|
256
|
+
'ASN', 'ATN', 'BGET', 'COS', 'COUNT', 'DEG', 'ERL', 'ERR',
|
|
257
|
+
'EVAL', 'EXP', 'EXT', 'FALSE', 'FN', 'GET', 'INKEY', 'INSTR(',
|
|
258
|
+
'INT', 'LEN', 'LN', 'LOG', 'NOT', 'OPENUP', 'OPENOUT', 'PI',
|
|
259
|
+
'POINT(', 'POS', 'RAD', 'RND', 'SGN', 'SIN', 'SQR', 'TAN',
|
|
260
|
+
'TO', 'TRUE', 'USR', 'VAL', 'VPOS', 'CHR$', 'GET$', 'INKEY$',
|
|
261
|
+
'LEFT$(', 'MID$(', 'RIGHT$(', 'STR$', 'STRING$(', 'EOF',
|
|
262
|
+
'<ESCFN>', '<ESCCOM>', '<ESCSTMT>',
|
|
263
|
+
'WHEN', 'OF', 'ENDCASE', 'ELSE', 'ENDIF', 'ENDWHILE', 'PTR',
|
|
264
|
+
'PAGE', 'TIME', 'LOMEM', 'HIMEM', 'SOUND', 'BPUT', 'CALL', 'CHAIN',
|
|
265
|
+
'CLEAR', 'CLOSE', 'CLG', 'CLS', 'DATA', 'DEF', 'DIM', 'DRAW',
|
|
266
|
+
'END', 'ENDPROC', 'ENVELOPE', 'FOR', 'GOSUB', 'GOTO', 'GCOL', 'IF',
|
|
267
|
+
'INPUT', 'LET', 'LOCAL', 'MODE', 'MOVE', 'NEXT', 'ON', 'VDU',
|
|
268
|
+
'PLOT', 'PRINT', 'PROC', 'READ', 'REM', 'REPEAT', 'REPORT', 'RESTORE',
|
|
269
|
+
'RETURN', 'RUN', 'STOP', 'COLOUR', 'TRACE', 'UNTIL', 'WIDTH', 'OSCLI']
|
|
270
|
+
|
|
271
|
+
# Referred to as "ESCFN" tokens in the source, starting at 0x8e.
|
|
272
|
+
cfnTokens = [
|
|
273
|
+
'SUM', 'BEAT']
|
|
274
|
+
# Referred to as "ESCCOM" tokens in the source, starting at 0x8e.
|
|
275
|
+
comTokens = [
|
|
276
|
+
'APPEND', 'AUTO', 'CRUNCH', 'DELETE', 'EDIT', 'HELP', 'LIST', 'LOAD',
|
|
277
|
+
'LVAR', 'NEW', 'OLD', 'RENUMBER', 'SAVE', 'TEXTLOAD', 'TEXTSAVE', 'TWIN'
|
|
278
|
+
'TWINO', 'INSTALL']
|
|
279
|
+
# Referred to as "ESCSTMT", starting at 0x8e.
|
|
280
|
+
stmtTokens= [
|
|
281
|
+
'CASE', 'CIRCLE', 'FILL', 'ORIGIN', 'PSET', 'RECT', 'SWAP', 'WHILE',
|
|
282
|
+
'WAIT', 'MOUSE', 'QUIT', 'SYS', 'INSTALL', 'LIBRARY', 'TINT', 'ELLIPSE',
|
|
283
|
+
'BEATS', 'TEMPO', 'VOICES', 'VOICE', 'STEREO', 'OVERLAY']
|
|
284
|
+
# BB4W tokens....
|
|
285
|
+
# these tokens start at 128 and wrap around to 0-15
|
|
286
|
+
bb4wTokens=["AND","DIV","EOR","MOD","OR","ERROR","LINE","OFF",
|
|
287
|
+
"STEP","SPC","TAB(","ELSE","THEN","","OPENIN","PTR",
|
|
288
|
+
"PAGE","TIME","LOMEM","HIMEM","ABS","ACS","ADVAL","ASC",
|
|
289
|
+
"ASN","ATN","BGET","COS","COUNT","DEG","ERL","ERR",
|
|
290
|
+
"EVAL","EXP","EXT","FALSE","FN","GET","INKEY","INSTR(",
|
|
291
|
+
"INT","LEN","LN","LOG","NOT","OPENUP","OPENOUT","PI",
|
|
292
|
+
"POINT(","POS","RAD","RND","SGN","SIN","SQR","TAN",
|
|
293
|
+
"TO","TRUE","USR","VAL","VPOS","CHR$","GET$","INKEY$",
|
|
294
|
+
"LEFT$(","MID$(","RIGHT$(","STR$","STRING$(","EOF","SUM","WHILE",
|
|
295
|
+
"CASE","WHEN","OF","ENDCASE","OTHERWISE","ENDIF","ENDWHILE","PTR",
|
|
296
|
+
"PAGE","TIME","LOMEM","HIMEM","SOUND","BPUT","CALL","CHAIN",
|
|
297
|
+
"CLEAR","CLOSE","CLG","CLS","DATA","DEF","DIM","DRAW",
|
|
298
|
+
"END","ENDPROC","ENVELOPE","FOR","GOSUB","GOTO","GCOL","IF",
|
|
299
|
+
"INPUT","LET","LOCAL","MODE","MOVE","NEXT","ON","VDU",
|
|
300
|
+
"PLOT","PRINT","PROC","READ","REM","REPEAT","REPORT","RESTORE",
|
|
301
|
+
"RETURN","RUN","STOP","COLOUR","TRACE","UNTIL","WIDTH","OSCLI",
|
|
302
|
+
"","CIRCLE","ELLIPSE","FILL","MOUSE","ORIGIN","QUIT","RECTANGLE",
|
|
303
|
+
"SWAP","SYS","TINT","WAIT","INSTALL","","PRIVATE","BY","EXIT"]
|
|
304
|
+
|
|
305
|
+
def DecodeLineNo(lineNo):
|
|
306
|
+
"""Returns a line number from a 24bit encoded line number"""
|
|
307
|
+
byte0=ord(lineNo[0])
|
|
308
|
+
byte1=ord(lineNo[1])
|
|
309
|
+
byte2=ord(lineNo[2])
|
|
310
|
+
#needed to be ANDed with 255 after multiply because with this formula
|
|
311
|
+
#on the 6502 it moved the high bits to carry with the Logical Shift
|
|
312
|
+
msb = byte2 ^ (( byte0 * 16) & 255)
|
|
313
|
+
lsb = byte1 ^ (((byte0 & 0x30 ) * 4) & 255)
|
|
314
|
+
return (lsb + (msb * 256))
|
|
315
|
+
|
|
316
|
+
def ReadLines(data):
|
|
317
|
+
"""Returns a list of [line number, tokenised line] from a binary
|
|
318
|
+
BBC BASIC format file."""
|
|
319
|
+
decoder = fileType(data)
|
|
320
|
+
lines = decoder.decode()
|
|
321
|
+
return lines
|
|
322
|
+
|
|
323
|
+
def decode(data, output):
|
|
324
|
+
"""Decode binary data 'data' and write the result to 'output'."""
|
|
325
|
+
lines = ReadLines(data)
|
|
326
|
+
for lineNumber, lineData in lines:
|
|
327
|
+
output.write(str(lineNumber) + ' ')
|
|
328
|
+
# Normalise line endings to \n
|
|
329
|
+
output.write(lineData.strip() + '\n')
|
|
330
|
+
|
|
331
|
+
if __name__ == "__main__":
|
|
332
|
+
optlist, args = getopt.getopt(sys.argv[1:], '')
|
|
333
|
+
if len(args) != 2:
|
|
334
|
+
print("Usage: %s INPUT OUTPUT" % sys.argv[0])
|
|
335
|
+
sys.exit(1)
|
|
336
|
+
entireFile = open(args[0], 'rb').read()
|
|
337
|
+
output = open(args[1], 'w')
|
|
338
|
+
decode(entireFile, output)
|
|
339
|
+
output.close()
|
owl_basic/errors.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
error_log = set()
|
|
5
|
+
|
|
6
|
+
def warning(message):
|
|
7
|
+
if message not in error_log:
|
|
8
|
+
logging.warning(message)
|
|
9
|
+
error_log.add(message)
|
|
10
|
+
|
|
11
|
+
def error(message):
|
|
12
|
+
if message not in error_log:
|
|
13
|
+
logging.error(message)
|
|
14
|
+
error_log.add(message)
|
|
15
|
+
|
|
16
|
+
def fatalError(message):
|
|
17
|
+
logging.critical(message)
|
|
18
|
+
sys.exit(1)
|
|
19
|
+
|
|
20
|
+
def internal(message):
|
|
21
|
+
logging.critical(message)
|
|
22
|
+
sys.exit(1)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Package for analysing and manipulating control flow.
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
from .entry_point_locator import locateEntryPoints
|
|
6
|
+
from .flow_graph_creator import createForwardControlFlowGraph
|
|
7
|
+
from .longjump_converter import convertLongjumpsToExceptions
|
|
8
|
+
from .subroutine_converter import convertSubroutinesToProcedures
|
|
9
|
+
from .basic_block_identifier import identifyBasicBlocks
|
|
10
|
+
from .basic_block_orderer import orderBasicBlocks
|
|
11
|
+
|
|
12
|
+
__all__ = ["locateEntryPoints",
|
|
13
|
+
"createForwardControlFlowGraph",
|
|
14
|
+
"convertLongjumpsToExceptions",
|
|
15
|
+
"convertSubroutinesToProcedures",
|
|
16
|
+
"identifyBasicBlocks"]
|
|
17
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Created on 30 Jan 2010
|
|
3
|
+
|
|
4
|
+
@author: rjs
|
|
5
|
+
'''
|
|
6
|
+
|
|
7
|
+
from owl_basic.cfg_vertex import CfgVertex
|
|
8
|
+
|
|
9
|
+
class BasicBlock(CfgVertex):
|
|
10
|
+
'''
|
|
11
|
+
A sequence of statements with a single entry and exit point
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
def __init__(self, statements=[], *args, **kwargs):
|
|
15
|
+
'''
|
|
16
|
+
:param statements: A list of statements comprising the basic block
|
|
17
|
+
'''
|
|
18
|
+
super(BasicBlock, self).__init__(*args, **kwargs)
|
|
19
|
+
self.statements = [] # The list of statements comprising the basic block
|
|
20
|
+
self.topological_order = None # Integer giving ordinal position in method
|
|
21
|
+
self.label = None # A label into which can be branched to, to enter this basic block
|
|
22
|
+
self.is_label_marked = False # A flag for whether the label has been marked
|
|
23
|
+
|
|
24
|
+
''' The first statement in the BasicBlock, or None'''
|
|
25
|
+
entryPoint = property(lambda self: self.statements[0] if len(self.statements) > 0 else None)
|
|
26
|
+
|
|
27
|
+
'''The last statement in the BasicBlock, or None'''
|
|
28
|
+
exitPoint = property(lambda self: self.statements[-1] if len(self.statements) > 0 else None)
|
|
29
|
+
|
|
30
|
+
def __len__(self):
|
|
31
|
+
return len(self.statements)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Grouping of statements into basic blocks - resulting in a coarser grained control flow graph
|
|
3
|
+
'''
|
|
4
|
+
import logging
|
|
5
|
+
logger = logging.getLogger('flow.basic_block_identifier')
|
|
6
|
+
logger.setLevel(logging.WARNING)
|
|
7
|
+
|
|
8
|
+
from .connectors import connect
|
|
9
|
+
from .traversal import depthFirstSearch
|
|
10
|
+
from .basic_block import BasicBlock
|
|
11
|
+
|
|
12
|
+
def identifyBasicBlocks(entry_points, options):
|
|
13
|
+
'''
|
|
14
|
+
Trace the control flow graph from each entry point and collect consecutive statements
|
|
15
|
+
into basic blocks, comprising a more coarse grained control flow graph. This function applies
|
|
16
|
+
a transformation to the statement level basic block, coarsening it by grouping statements into
|
|
17
|
+
a graph consisting only of BasicBlock instances. Each BasicBlock instance contains a list of
|
|
18
|
+
non-branching, or non-branch target statements.
|
|
19
|
+
|
|
20
|
+
A basic block is code that has one entry point (i.e., no code within it is the destination
|
|
21
|
+
of a jump instruction), one exit point and no jump instructions contained within it. The
|
|
22
|
+
start of a basic block may be jumped to from more than one location. The end of a basic block
|
|
23
|
+
may be a jump instruction or the statement before the destination of a jump instruction. Basic
|
|
24
|
+
blocks are usually the basic unit to which compiler optimizations are applied. Basic blocks
|
|
25
|
+
form the vertices or nodes in a control flow graph.
|
|
26
|
+
|
|
27
|
+
:param entry_points: A sequence of program statements which are the entry point to the program
|
|
28
|
+
or procedures
|
|
29
|
+
:param options: Program options
|
|
30
|
+
:returns: A dictionary of entry blocks - BasicBlock instances through which control
|
|
31
|
+
flow enters the graph of each program, function or procedure. The keys are
|
|
32
|
+
the entry point names
|
|
33
|
+
'''
|
|
34
|
+
logger.info("Identifying basic blocks")
|
|
35
|
+
print(entry_points)
|
|
36
|
+
return dict((k, coarsenControlFlowGraph(v)) for k, v in entry_points.items())
|
|
37
|
+
|
|
38
|
+
def coarsenControlFlowGraph(entry_point):
|
|
39
|
+
'''
|
|
40
|
+
Coarsen the control flow graph starting at the entry_point to consist of BasicBlocks
|
|
41
|
+
:param entry_point: A program statement which is the entry point to the program, procedure or function
|
|
42
|
+
for which the control flow graph is to be coarsened to basic blocks.
|
|
43
|
+
:returns: The entry block BasicBlock instance corresponding to entry_point
|
|
44
|
+
'''
|
|
45
|
+
logger.debug("entry_point = %s", entry_point)
|
|
46
|
+
block = assignBlockAndContinue(entry_point)
|
|
47
|
+
return block
|
|
48
|
+
|
|
49
|
+
# TODO: Decorate as a tail-call
|
|
50
|
+
def assignBlockAndContinue(vertex, block=None):
|
|
51
|
+
'''
|
|
52
|
+
Assign vertex to block and continue with successor vertices
|
|
53
|
+
'''
|
|
54
|
+
if not vertex.block:
|
|
55
|
+
block = ((vertex.inDegree == 1) and block) or BasicBlock()
|
|
56
|
+
block.statements.append(vertex)
|
|
57
|
+
vertex.block = block
|
|
58
|
+
logger.debug("%s with in-degree %s and out-degree %s at %s in %s", vertex, str(vertex.inDegree), str(vertex.outDegree), str(vertex.lineNum), vertex.block)
|
|
59
|
+
for target in vertex.outEdges:
|
|
60
|
+
successor_block = assignBlockAndContinue(target, block if vertex.outDegree == 1 else None)
|
|
61
|
+
if block is not successor_block:
|
|
62
|
+
connect(block, successor_block)
|
|
63
|
+
return vertex.block
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|