python-cc 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pcc/__init__.py +0 -0
- pcc/__main__.py +3 -0
- pcc/ast/__init__.py +0 -0
- pcc/ast/ast.py +179 -0
- pcc/ast/ast_transforms.py +106 -0
- pcc/ast/c_ast.py +800 -0
- pcc/codegen/__init__.py +0 -0
- pcc/codegen/c_codegen.py +4177 -0
- pcc/evaluater/__init__.py +0 -0
- pcc/evaluater/c_evaluator.py +238 -0
- pcc/generator/__init__.py +0 -0
- pcc/generator/c_generator.py +399 -0
- pcc/lex/__init__.py +0 -0
- pcc/lex/c_lexer.py +495 -0
- pcc/lex/lexer.py +68 -0
- pcc/lex/token.py +24 -0
- pcc/parse/__init__.py +0 -0
- pcc/parse/c_parser.py +1700 -0
- pcc/parse/file_parser.py +82 -0
- pcc/parse/parser.py +300 -0
- pcc/parse/plyparser.py +56 -0
- pcc/pcc.py +38 -0
- pcc/ply/__init__.py +5 -0
- pcc/ply/cpp.py +908 -0
- pcc/ply/ctokens.py +133 -0
- pcc/ply/lex.py +1097 -0
- pcc/ply/yacc.py +3471 -0
- pcc/ply/ygen.py +74 -0
- pcc/preprocessor.py +509 -0
- pcc/project.py +78 -0
- pcc/util.py +121 -0
- python_cc-0.0.2.dist-info/METADATA +182 -0
- python_cc-0.0.2.dist-info/RECORD +36 -0
- python_cc-0.0.2.dist-info/WHEEL +4 -0
- python_cc-0.0.2.dist-info/entry_points.txt +2 -0
- python_cc-0.0.2.dist-info/licenses/LICENSE +25 -0
pcc/ply/cpp.py
ADDED
|
@@ -0,0 +1,908 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# cpp.py
|
|
3
|
+
#
|
|
4
|
+
# Author: David Beazley (http://www.dabeaz.com)
|
|
5
|
+
# Copyright (C) 2007
|
|
6
|
+
# All rights reserved
|
|
7
|
+
#
|
|
8
|
+
# This module implements an ANSI-C style lexical preprocessor for PLY.
|
|
9
|
+
# -----------------------------------------------------------------------------
|
|
10
|
+
from __future__ import generators
|
|
11
|
+
|
|
12
|
+
# -----------------------------------------------------------------------------
|
|
13
|
+
# Default preprocessor lexer definitions. These tokens are enough to get
|
|
14
|
+
# a basic preprocessor working. Other modules may import these if they want
|
|
15
|
+
# -----------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
tokens = (
|
|
18
|
+
'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
|
|
22
|
+
|
|
23
|
+
# Whitespace
|
|
24
|
+
def t_CPP_WS(t):
|
|
25
|
+
r'\s+'
|
|
26
|
+
t.lexer.lineno += t.value.count("\n")
|
|
27
|
+
return t
|
|
28
|
+
|
|
29
|
+
t_CPP_POUND = r'\#'
|
|
30
|
+
t_CPP_DPOUND = r'\#\#'
|
|
31
|
+
|
|
32
|
+
# Identifier
|
|
33
|
+
t_CPP_ID = r'[A-Za-z_][\w_]*'
|
|
34
|
+
|
|
35
|
+
# Integer literal
|
|
36
|
+
def CPP_INTEGER(t):
|
|
37
|
+
r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
|
|
38
|
+
return t
|
|
39
|
+
|
|
40
|
+
t_CPP_INTEGER = CPP_INTEGER
|
|
41
|
+
|
|
42
|
+
# Floating literal
|
|
43
|
+
t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
|
|
44
|
+
|
|
45
|
+
# String literal
|
|
46
|
+
def t_CPP_STRING(t):
|
|
47
|
+
r'\"([^\\\n]|(\\(.|\n)))*?\"'
|
|
48
|
+
t.lexer.lineno += t.value.count("\n")
|
|
49
|
+
return t
|
|
50
|
+
|
|
51
|
+
# Character constant 'c' or L'c'
|
|
52
|
+
def t_CPP_CHAR(t):
|
|
53
|
+
r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
|
|
54
|
+
t.lexer.lineno += t.value.count("\n")
|
|
55
|
+
return t
|
|
56
|
+
|
|
57
|
+
# Comment
|
|
58
|
+
def t_CPP_COMMENT1(t):
|
|
59
|
+
r'(/\*(.|\n)*?\*/)'
|
|
60
|
+
ncr = t.value.count("\n")
|
|
61
|
+
t.lexer.lineno += ncr
|
|
62
|
+
# replace with one space or a number of '\n'
|
|
63
|
+
t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
|
|
64
|
+
return t
|
|
65
|
+
|
|
66
|
+
# Line comment
|
|
67
|
+
def t_CPP_COMMENT2(t):
|
|
68
|
+
r'(//.*?(\n|$))'
|
|
69
|
+
# replace with '/n'
|
|
70
|
+
t.type = 'CPP_WS'; t.value = '\n'
|
|
71
|
+
|
|
72
|
+
def t_error(t):
|
|
73
|
+
t.type = t.value[0]
|
|
74
|
+
t.value = t.value[0]
|
|
75
|
+
t.lexer.skip(1)
|
|
76
|
+
return t
|
|
77
|
+
|
|
78
|
+
import re
|
|
79
|
+
import copy
|
|
80
|
+
import time
|
|
81
|
+
import os.path
|
|
82
|
+
|
|
83
|
+
# -----------------------------------------------------------------------------
|
|
84
|
+
# trigraph()
|
|
85
|
+
#
|
|
86
|
+
# Given an input string, this function replaces all trigraph sequences.
|
|
87
|
+
# The following mapping is used:
|
|
88
|
+
#
|
|
89
|
+
# ??= #
|
|
90
|
+
# ??/ \
|
|
91
|
+
# ??' ^
|
|
92
|
+
# ??( [
|
|
93
|
+
# ??) ]
|
|
94
|
+
# ??! |
|
|
95
|
+
# ??< {
|
|
96
|
+
# ??> }
|
|
97
|
+
# ??- ~
|
|
98
|
+
# -----------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
|
|
101
|
+
_trigraph_rep = {
|
|
102
|
+
'=':'#',
|
|
103
|
+
'/':'\\',
|
|
104
|
+
"'":'^',
|
|
105
|
+
'(':'[',
|
|
106
|
+
')':']',
|
|
107
|
+
'!':'|',
|
|
108
|
+
'<':'{',
|
|
109
|
+
'>':'}',
|
|
110
|
+
'-':'~'
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
def trigraph(input):
|
|
114
|
+
return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
|
|
115
|
+
|
|
116
|
+
# ------------------------------------------------------------------
|
|
117
|
+
# Macro object
|
|
118
|
+
#
|
|
119
|
+
# This object holds information about preprocessor macros
|
|
120
|
+
#
|
|
121
|
+
# .name - Macro name (string)
|
|
122
|
+
# .value - Macro value (a list of tokens)
|
|
123
|
+
# .arglist - List of argument names
|
|
124
|
+
# .variadic - Boolean indicating whether or not variadic macro
|
|
125
|
+
# .vararg - Name of the variadic parameter
|
|
126
|
+
#
|
|
127
|
+
# When a macro is created, the macro replacement token sequence is
|
|
128
|
+
# pre-scanned and used to create patch lists that are later used
|
|
129
|
+
# during macro expansion
|
|
130
|
+
# ------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
class Macro(object):
|
|
133
|
+
def __init__(self,name,value,arglist=None,variadic=False):
|
|
134
|
+
self.name = name
|
|
135
|
+
self.value = value
|
|
136
|
+
self.arglist = arglist
|
|
137
|
+
self.variadic = variadic
|
|
138
|
+
if variadic:
|
|
139
|
+
self.vararg = arglist[-1]
|
|
140
|
+
self.source = None
|
|
141
|
+
|
|
142
|
+
# ------------------------------------------------------------------
|
|
143
|
+
# Preprocessor object
|
|
144
|
+
#
|
|
145
|
+
# Object representing a preprocessor. Contains macro definitions,
|
|
146
|
+
# include directories, and other information
|
|
147
|
+
# ------------------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
class Preprocessor(object):
|
|
150
|
+
def __init__(self,lexer=None):
|
|
151
|
+
if lexer is None:
|
|
152
|
+
lexer = lex.lexer
|
|
153
|
+
self.lexer = lexer
|
|
154
|
+
self.macros = { }
|
|
155
|
+
self.path = []
|
|
156
|
+
self.temp_path = []
|
|
157
|
+
|
|
158
|
+
# Probe the lexer for selected tokens
|
|
159
|
+
self.lexprobe()
|
|
160
|
+
|
|
161
|
+
tm = time.localtime()
|
|
162
|
+
self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
|
|
163
|
+
self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
|
|
164
|
+
self.parser = None
|
|
165
|
+
|
|
166
|
+
# -----------------------------------------------------------------------------
|
|
167
|
+
# tokenize()
|
|
168
|
+
#
|
|
169
|
+
# Utility function. Given a string of text, tokenize into a list of tokens
|
|
170
|
+
# -----------------------------------------------------------------------------
|
|
171
|
+
|
|
172
|
+
def tokenize(self,text):
|
|
173
|
+
tokens = []
|
|
174
|
+
self.lexer.input(text)
|
|
175
|
+
while True:
|
|
176
|
+
tok = self.lexer.token()
|
|
177
|
+
if not tok: break
|
|
178
|
+
tokens.append(tok)
|
|
179
|
+
return tokens
|
|
180
|
+
|
|
181
|
+
# ---------------------------------------------------------------------
|
|
182
|
+
# error()
|
|
183
|
+
#
|
|
184
|
+
# Report a preprocessor error/warning of some kind
|
|
185
|
+
# ----------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
def error(self,file,line,msg):
|
|
188
|
+
print("%s:%d %s" % (file,line,msg))
|
|
189
|
+
|
|
190
|
+
# ----------------------------------------------------------------------
|
|
191
|
+
# lexprobe()
|
|
192
|
+
#
|
|
193
|
+
# This method probes the preprocessor lexer object to discover
|
|
194
|
+
# the token types of symbols that are important to the preprocessor.
|
|
195
|
+
# If this works right, the preprocessor will simply "work"
|
|
196
|
+
# with any suitable lexer regardless of how tokens have been named.
|
|
197
|
+
# ----------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
def lexprobe(self):
|
|
200
|
+
|
|
201
|
+
# Determine the token type for identifiers
|
|
202
|
+
self.lexer.input("identifier")
|
|
203
|
+
tok = self.lexer.token()
|
|
204
|
+
if not tok or tok.value != "identifier":
|
|
205
|
+
print("Couldn't determine identifier type")
|
|
206
|
+
else:
|
|
207
|
+
self.t_ID = tok.type
|
|
208
|
+
|
|
209
|
+
# Determine the token type for integers
|
|
210
|
+
self.lexer.input("12345")
|
|
211
|
+
tok = self.lexer.token()
|
|
212
|
+
if not tok or int(tok.value) != 12345:
|
|
213
|
+
print("Couldn't determine integer type")
|
|
214
|
+
else:
|
|
215
|
+
self.t_INTEGER = tok.type
|
|
216
|
+
self.t_INTEGER_TYPE = type(tok.value)
|
|
217
|
+
|
|
218
|
+
# Determine the token type for strings enclosed in double quotes
|
|
219
|
+
self.lexer.input("\"filename\"")
|
|
220
|
+
tok = self.lexer.token()
|
|
221
|
+
if not tok or tok.value != "\"filename\"":
|
|
222
|
+
print("Couldn't determine string type")
|
|
223
|
+
else:
|
|
224
|
+
self.t_STRING = tok.type
|
|
225
|
+
|
|
226
|
+
# Determine the token type for whitespace--if any
|
|
227
|
+
self.lexer.input(" ")
|
|
228
|
+
tok = self.lexer.token()
|
|
229
|
+
if not tok or tok.value != " ":
|
|
230
|
+
self.t_SPACE = None
|
|
231
|
+
else:
|
|
232
|
+
self.t_SPACE = tok.type
|
|
233
|
+
|
|
234
|
+
# Determine the token type for newlines
|
|
235
|
+
self.lexer.input("\n")
|
|
236
|
+
tok = self.lexer.token()
|
|
237
|
+
if not tok or tok.value != "\n":
|
|
238
|
+
self.t_NEWLINE = None
|
|
239
|
+
print("Couldn't determine token for newlines")
|
|
240
|
+
else:
|
|
241
|
+
self.t_NEWLINE = tok.type
|
|
242
|
+
|
|
243
|
+
self.t_WS = (self.t_SPACE, self.t_NEWLINE)
|
|
244
|
+
|
|
245
|
+
# Check for other characters used by the preprocessor
|
|
246
|
+
chars = [ '<','>','#','##','\\','(',')',',','.']
|
|
247
|
+
for c in chars:
|
|
248
|
+
self.lexer.input(c)
|
|
249
|
+
tok = self.lexer.token()
|
|
250
|
+
if not tok or tok.value != c:
|
|
251
|
+
print("Unable to lex '%s' required for preprocessor" % c)
|
|
252
|
+
|
|
253
|
+
# ----------------------------------------------------------------------
|
|
254
|
+
# add_path()
|
|
255
|
+
#
|
|
256
|
+
# Adds a search path to the preprocessor.
|
|
257
|
+
# ----------------------------------------------------------------------
|
|
258
|
+
|
|
259
|
+
def add_path(self,path):
|
|
260
|
+
self.path.append(path)
|
|
261
|
+
|
|
262
|
+
# ----------------------------------------------------------------------
|
|
263
|
+
# group_lines()
|
|
264
|
+
#
|
|
265
|
+
# Given an input string, this function splits it into lines. Trailing whitespace
|
|
266
|
+
# is removed. Any line ending with \ is grouped with the next line. This
|
|
267
|
+
# function forms the lowest level of the preprocessor---grouping into text into
|
|
268
|
+
# a line-by-line format.
|
|
269
|
+
# ----------------------------------------------------------------------
|
|
270
|
+
|
|
271
|
+
def group_lines(self,input):
|
|
272
|
+
lex = self.lexer.clone()
|
|
273
|
+
lines = [x.rstrip() for x in input.splitlines()]
|
|
274
|
+
for i in xrange(len(lines)):
|
|
275
|
+
j = i+1
|
|
276
|
+
while lines[i].endswith('\\') and (j < len(lines)):
|
|
277
|
+
lines[i] = lines[i][:-1]+lines[j]
|
|
278
|
+
lines[j] = ""
|
|
279
|
+
j += 1
|
|
280
|
+
|
|
281
|
+
input = "\n".join(lines)
|
|
282
|
+
lex.input(input)
|
|
283
|
+
lex.lineno = 1
|
|
284
|
+
|
|
285
|
+
current_line = []
|
|
286
|
+
while True:
|
|
287
|
+
tok = lex.token()
|
|
288
|
+
if not tok:
|
|
289
|
+
break
|
|
290
|
+
current_line.append(tok)
|
|
291
|
+
if tok.type in self.t_WS and '\n' in tok.value:
|
|
292
|
+
yield current_line
|
|
293
|
+
current_line = []
|
|
294
|
+
|
|
295
|
+
if current_line:
|
|
296
|
+
yield current_line
|
|
297
|
+
|
|
298
|
+
# ----------------------------------------------------------------------
|
|
299
|
+
# tokenstrip()
|
|
300
|
+
#
|
|
301
|
+
# Remove leading/trailing whitespace tokens from a token list
|
|
302
|
+
# ----------------------------------------------------------------------
|
|
303
|
+
|
|
304
|
+
def tokenstrip(self,tokens):
|
|
305
|
+
i = 0
|
|
306
|
+
while i < len(tokens) and tokens[i].type in self.t_WS:
|
|
307
|
+
i += 1
|
|
308
|
+
del tokens[:i]
|
|
309
|
+
i = len(tokens)-1
|
|
310
|
+
while i >= 0 and tokens[i].type in self.t_WS:
|
|
311
|
+
i -= 1
|
|
312
|
+
del tokens[i+1:]
|
|
313
|
+
return tokens
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# ----------------------------------------------------------------------
|
|
317
|
+
# collect_args()
|
|
318
|
+
#
|
|
319
|
+
# Collects comma separated arguments from a list of tokens. The arguments
|
|
320
|
+
# must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
|
|
321
|
+
# where tokencount is the number of tokens consumed, args is a list of arguments,
|
|
322
|
+
# and positions is a list of integers containing the starting index of each
|
|
323
|
+
# argument. Each argument is represented by a list of tokens.
|
|
324
|
+
#
|
|
325
|
+
# When collecting arguments, leading and trailing whitespace is removed
|
|
326
|
+
# from each argument.
|
|
327
|
+
#
|
|
328
|
+
# This function properly handles nested parenthesis and commas---these do not
|
|
329
|
+
# define new arguments.
|
|
330
|
+
# ----------------------------------------------------------------------
|
|
331
|
+
|
|
332
|
+
def collect_args(self,tokenlist):
|
|
333
|
+
args = []
|
|
334
|
+
positions = []
|
|
335
|
+
current_arg = []
|
|
336
|
+
nesting = 1
|
|
337
|
+
tokenlen = len(tokenlist)
|
|
338
|
+
|
|
339
|
+
# Search for the opening '('.
|
|
340
|
+
i = 0
|
|
341
|
+
while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
|
|
342
|
+
i += 1
|
|
343
|
+
|
|
344
|
+
if (i < tokenlen) and (tokenlist[i].value == '('):
|
|
345
|
+
positions.append(i+1)
|
|
346
|
+
else:
|
|
347
|
+
self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
|
|
348
|
+
return 0, [], []
|
|
349
|
+
|
|
350
|
+
i += 1
|
|
351
|
+
|
|
352
|
+
while i < tokenlen:
|
|
353
|
+
t = tokenlist[i]
|
|
354
|
+
if t.value == '(':
|
|
355
|
+
current_arg.append(t)
|
|
356
|
+
nesting += 1
|
|
357
|
+
elif t.value == ')':
|
|
358
|
+
nesting -= 1
|
|
359
|
+
if nesting == 0:
|
|
360
|
+
if current_arg:
|
|
361
|
+
args.append(self.tokenstrip(current_arg))
|
|
362
|
+
positions.append(i)
|
|
363
|
+
return i+1,args,positions
|
|
364
|
+
current_arg.append(t)
|
|
365
|
+
elif t.value == ',' and nesting == 1:
|
|
366
|
+
args.append(self.tokenstrip(current_arg))
|
|
367
|
+
positions.append(i+1)
|
|
368
|
+
current_arg = []
|
|
369
|
+
else:
|
|
370
|
+
current_arg.append(t)
|
|
371
|
+
i += 1
|
|
372
|
+
|
|
373
|
+
# Missing end argument
|
|
374
|
+
self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
|
|
375
|
+
return 0, [],[]
|
|
376
|
+
|
|
377
|
+
# ----------------------------------------------------------------------
|
|
378
|
+
# macro_prescan()
|
|
379
|
+
#
|
|
380
|
+
# Examine the macro value (token sequence) and identify patch points
|
|
381
|
+
# This is used to speed up macro expansion later on---we'll know
|
|
382
|
+
# right away where to apply patches to the value to form the expansion
|
|
383
|
+
# ----------------------------------------------------------------------
|
|
384
|
+
|
|
385
|
+
def macro_prescan(self,macro):
|
|
386
|
+
macro.patch = [] # Standard macro arguments
|
|
387
|
+
macro.str_patch = [] # String conversion expansion
|
|
388
|
+
macro.var_comma_patch = [] # Variadic macro comma patch
|
|
389
|
+
i = 0
|
|
390
|
+
while i < len(macro.value):
|
|
391
|
+
if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
|
|
392
|
+
argnum = macro.arglist.index(macro.value[i].value)
|
|
393
|
+
# Conversion of argument to a string
|
|
394
|
+
if i > 0 and macro.value[i-1].value == '#':
|
|
395
|
+
macro.value[i] = copy.copy(macro.value[i])
|
|
396
|
+
macro.value[i].type = self.t_STRING
|
|
397
|
+
del macro.value[i-1]
|
|
398
|
+
macro.str_patch.append((argnum,i-1))
|
|
399
|
+
continue
|
|
400
|
+
# Concatenation
|
|
401
|
+
elif (i > 0 and macro.value[i-1].value == '##'):
|
|
402
|
+
macro.patch.append(('c',argnum,i-1))
|
|
403
|
+
del macro.value[i-1]
|
|
404
|
+
continue
|
|
405
|
+
elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
|
|
406
|
+
macro.patch.append(('c',argnum,i))
|
|
407
|
+
i += 1
|
|
408
|
+
continue
|
|
409
|
+
# Standard expansion
|
|
410
|
+
else:
|
|
411
|
+
macro.patch.append(('e',argnum,i))
|
|
412
|
+
elif macro.value[i].value == '##':
|
|
413
|
+
if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
|
|
414
|
+
((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
|
|
415
|
+
(macro.value[i+1].value == macro.vararg):
|
|
416
|
+
macro.var_comma_patch.append(i-1)
|
|
417
|
+
i += 1
|
|
418
|
+
macro.patch.sort(key=lambda x: x[2],reverse=True)
|
|
419
|
+
|
|
420
|
+
# ----------------------------------------------------------------------
|
|
421
|
+
# macro_expand_args()
|
|
422
|
+
#
|
|
423
|
+
# Given a Macro and list of arguments (each a token list), this method
|
|
424
|
+
# returns an expanded version of a macro. The return value is a token sequence
|
|
425
|
+
# representing the replacement macro tokens
|
|
426
|
+
# ----------------------------------------------------------------------
|
|
427
|
+
|
|
428
|
+
def macro_expand_args(self,macro,args):
|
|
429
|
+
# Make a copy of the macro token sequence
|
|
430
|
+
rep = [copy.copy(_x) for _x in macro.value]
|
|
431
|
+
|
|
432
|
+
# Make string expansion patches. These do not alter the length of the replacement sequence
|
|
433
|
+
|
|
434
|
+
str_expansion = {}
|
|
435
|
+
for argnum, i in macro.str_patch:
|
|
436
|
+
if argnum not in str_expansion:
|
|
437
|
+
str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
|
|
438
|
+
rep[i] = copy.copy(rep[i])
|
|
439
|
+
rep[i].value = str_expansion[argnum]
|
|
440
|
+
|
|
441
|
+
# Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
|
|
442
|
+
comma_patch = False
|
|
443
|
+
if macro.variadic and not args[-1]:
|
|
444
|
+
for i in macro.var_comma_patch:
|
|
445
|
+
rep[i] = None
|
|
446
|
+
comma_patch = True
|
|
447
|
+
|
|
448
|
+
# Make all other patches. The order of these matters. It is assumed that the patch list
|
|
449
|
+
# has been sorted in reverse order of patch location since replacements will cause the
|
|
450
|
+
# size of the replacement sequence to expand from the patch point.
|
|
451
|
+
|
|
452
|
+
expanded = { }
|
|
453
|
+
for ptype, argnum, i in macro.patch:
|
|
454
|
+
# Concatenation. Argument is left unexpanded
|
|
455
|
+
if ptype == 'c':
|
|
456
|
+
rep[i:i+1] = args[argnum]
|
|
457
|
+
# Normal expansion. Argument is macro expanded first
|
|
458
|
+
elif ptype == 'e':
|
|
459
|
+
if argnum not in expanded:
|
|
460
|
+
expanded[argnum] = self.expand_macros(args[argnum])
|
|
461
|
+
rep[i:i+1] = expanded[argnum]
|
|
462
|
+
|
|
463
|
+
# Get rid of removed comma if necessary
|
|
464
|
+
if comma_patch:
|
|
465
|
+
rep = [_i for _i in rep if _i]
|
|
466
|
+
|
|
467
|
+
return rep
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# ----------------------------------------------------------------------
|
|
471
|
+
# expand_macros()
|
|
472
|
+
#
|
|
473
|
+
# Given a list of tokens, this function performs macro expansion.
|
|
474
|
+
# The expanded argument is a dictionary that contains macros already
|
|
475
|
+
# expanded. This is used to prevent infinite recursion.
|
|
476
|
+
# ----------------------------------------------------------------------
|
|
477
|
+
|
|
478
|
+
def expand_macros(self,tokens,expanded=None):
|
|
479
|
+
if expanded is None:
|
|
480
|
+
expanded = {}
|
|
481
|
+
i = 0
|
|
482
|
+
while i < len(tokens):
|
|
483
|
+
t = tokens[i]
|
|
484
|
+
if t.type == self.t_ID:
|
|
485
|
+
if t.value in self.macros and t.value not in expanded:
|
|
486
|
+
# Yes, we found a macro match
|
|
487
|
+
expanded[t.value] = True
|
|
488
|
+
|
|
489
|
+
m = self.macros[t.value]
|
|
490
|
+
if not m.arglist:
|
|
491
|
+
# A simple macro
|
|
492
|
+
ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
|
|
493
|
+
for e in ex:
|
|
494
|
+
e.lineno = t.lineno
|
|
495
|
+
tokens[i:i+1] = ex
|
|
496
|
+
i += len(ex)
|
|
497
|
+
else:
|
|
498
|
+
# A macro with arguments
|
|
499
|
+
j = i + 1
|
|
500
|
+
while j < len(tokens) and tokens[j].type in self.t_WS:
|
|
501
|
+
j += 1
|
|
502
|
+
if tokens[j].value == '(':
|
|
503
|
+
tokcount,args,positions = self.collect_args(tokens[j:])
|
|
504
|
+
if not m.variadic and len(args) != len(m.arglist):
|
|
505
|
+
self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
|
|
506
|
+
i = j + tokcount
|
|
507
|
+
elif m.variadic and len(args) < len(m.arglist)-1:
|
|
508
|
+
if len(m.arglist) > 2:
|
|
509
|
+
self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
|
|
510
|
+
else:
|
|
511
|
+
self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
|
|
512
|
+
i = j + tokcount
|
|
513
|
+
else:
|
|
514
|
+
if m.variadic:
|
|
515
|
+
if len(args) == len(m.arglist)-1:
|
|
516
|
+
args.append([])
|
|
517
|
+
else:
|
|
518
|
+
args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
|
|
519
|
+
del args[len(m.arglist):]
|
|
520
|
+
|
|
521
|
+
# Get macro replacement text
|
|
522
|
+
rep = self.macro_expand_args(m,args)
|
|
523
|
+
rep = self.expand_macros(rep,expanded)
|
|
524
|
+
for r in rep:
|
|
525
|
+
r.lineno = t.lineno
|
|
526
|
+
tokens[i:j+tokcount] = rep
|
|
527
|
+
i += len(rep)
|
|
528
|
+
del expanded[t.value]
|
|
529
|
+
continue
|
|
530
|
+
elif t.value == '__LINE__':
|
|
531
|
+
t.type = self.t_INTEGER
|
|
532
|
+
t.value = self.t_INTEGER_TYPE(t.lineno)
|
|
533
|
+
|
|
534
|
+
i += 1
|
|
535
|
+
return tokens
|
|
536
|
+
|
|
537
|
+
# ----------------------------------------------------------------------
|
|
538
|
+
# evalexpr()
|
|
539
|
+
#
|
|
540
|
+
# Evaluate an expression token sequence for the purposes of evaluating
|
|
541
|
+
# integral expressions.
|
|
542
|
+
# ----------------------------------------------------------------------
|
|
543
|
+
|
|
544
|
+
def evalexpr(self,tokens):
|
|
545
|
+
# tokens = tokenize(line)
|
|
546
|
+
# Search for defined macros
|
|
547
|
+
i = 0
|
|
548
|
+
while i < len(tokens):
|
|
549
|
+
if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
|
|
550
|
+
j = i + 1
|
|
551
|
+
needparen = False
|
|
552
|
+
result = "0L"
|
|
553
|
+
while j < len(tokens):
|
|
554
|
+
if tokens[j].type in self.t_WS:
|
|
555
|
+
j += 1
|
|
556
|
+
continue
|
|
557
|
+
elif tokens[j].type == self.t_ID:
|
|
558
|
+
if tokens[j].value in self.macros:
|
|
559
|
+
result = "1L"
|
|
560
|
+
else:
|
|
561
|
+
result = "0L"
|
|
562
|
+
if not needparen: break
|
|
563
|
+
elif tokens[j].value == '(':
|
|
564
|
+
needparen = True
|
|
565
|
+
elif tokens[j].value == ')':
|
|
566
|
+
break
|
|
567
|
+
else:
|
|
568
|
+
self.error(self.source,tokens[i].lineno,"Malformed defined()")
|
|
569
|
+
j += 1
|
|
570
|
+
tokens[i].type = self.t_INTEGER
|
|
571
|
+
tokens[i].value = self.t_INTEGER_TYPE(result)
|
|
572
|
+
del tokens[i+1:j+1]
|
|
573
|
+
i += 1
|
|
574
|
+
tokens = self.expand_macros(tokens)
|
|
575
|
+
for i,t in enumerate(tokens):
|
|
576
|
+
if t.type == self.t_ID:
|
|
577
|
+
tokens[i] = copy.copy(t)
|
|
578
|
+
tokens[i].type = self.t_INTEGER
|
|
579
|
+
tokens[i].value = self.t_INTEGER_TYPE("0L")
|
|
580
|
+
elif t.type == self.t_INTEGER:
|
|
581
|
+
tokens[i] = copy.copy(t)
|
|
582
|
+
# Strip off any trailing suffixes
|
|
583
|
+
tokens[i].value = str(tokens[i].value)
|
|
584
|
+
while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
|
|
585
|
+
tokens[i].value = tokens[i].value[:-1]
|
|
586
|
+
|
|
587
|
+
expr = "".join([str(x.value) for x in tokens])
|
|
588
|
+
expr = expr.replace("&&"," and ")
|
|
589
|
+
expr = expr.replace("||"," or ")
|
|
590
|
+
expr = expr.replace("!"," not ")
|
|
591
|
+
try:
|
|
592
|
+
result = eval(expr)
|
|
593
|
+
except StandardError:
|
|
594
|
+
self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
|
|
595
|
+
result = 0
|
|
596
|
+
return result
|
|
597
|
+
|
|
598
|
+
# ----------------------------------------------------------------------
|
|
599
|
+
# parsegen()
|
|
600
|
+
#
|
|
601
|
+
# Parse an input string/
|
|
602
|
+
# ----------------------------------------------------------------------
|
|
603
|
+
def parsegen(self,input,source=None):
|
|
604
|
+
|
|
605
|
+
# Replace trigraph sequences
|
|
606
|
+
t = trigraph(input)
|
|
607
|
+
lines = self.group_lines(t)
|
|
608
|
+
|
|
609
|
+
if not source:
|
|
610
|
+
source = ""
|
|
611
|
+
|
|
612
|
+
self.define("__FILE__ \"%s\"" % source)
|
|
613
|
+
|
|
614
|
+
self.source = source
|
|
615
|
+
chunk = []
|
|
616
|
+
enable = True
|
|
617
|
+
iftrigger = False
|
|
618
|
+
ifstack = []
|
|
619
|
+
|
|
620
|
+
for x in lines:
|
|
621
|
+
for i,tok in enumerate(x):
|
|
622
|
+
if tok.type not in self.t_WS: break
|
|
623
|
+
if tok.value == '#':
|
|
624
|
+
# Preprocessor directive
|
|
625
|
+
|
|
626
|
+
# insert necessary whitespace instead of eaten tokens
|
|
627
|
+
for tok in x:
|
|
628
|
+
if tok.type in self.t_WS and '\n' in tok.value:
|
|
629
|
+
chunk.append(tok)
|
|
630
|
+
|
|
631
|
+
dirtokens = self.tokenstrip(x[i+1:])
|
|
632
|
+
if dirtokens:
|
|
633
|
+
name = dirtokens[0].value
|
|
634
|
+
args = self.tokenstrip(dirtokens[1:])
|
|
635
|
+
else:
|
|
636
|
+
name = ""
|
|
637
|
+
args = []
|
|
638
|
+
|
|
639
|
+
if name == 'define':
|
|
640
|
+
if enable:
|
|
641
|
+
for tok in self.expand_macros(chunk):
|
|
642
|
+
yield tok
|
|
643
|
+
chunk = []
|
|
644
|
+
self.define(args)
|
|
645
|
+
elif name == 'include':
|
|
646
|
+
if enable:
|
|
647
|
+
for tok in self.expand_macros(chunk):
|
|
648
|
+
yield tok
|
|
649
|
+
chunk = []
|
|
650
|
+
oldfile = self.macros['__FILE__']
|
|
651
|
+
for tok in self.include(args):
|
|
652
|
+
yield tok
|
|
653
|
+
self.macros['__FILE__'] = oldfile
|
|
654
|
+
self.source = source
|
|
655
|
+
elif name == 'undef':
|
|
656
|
+
if enable:
|
|
657
|
+
for tok in self.expand_macros(chunk):
|
|
658
|
+
yield tok
|
|
659
|
+
chunk = []
|
|
660
|
+
self.undef(args)
|
|
661
|
+
elif name == 'ifdef':
|
|
662
|
+
ifstack.append((enable,iftrigger))
|
|
663
|
+
if enable:
|
|
664
|
+
if not args[0].value in self.macros:
|
|
665
|
+
enable = False
|
|
666
|
+
iftrigger = False
|
|
667
|
+
else:
|
|
668
|
+
iftrigger = True
|
|
669
|
+
elif name == 'ifndef':
|
|
670
|
+
ifstack.append((enable,iftrigger))
|
|
671
|
+
if enable:
|
|
672
|
+
if args[0].value in self.macros:
|
|
673
|
+
enable = False
|
|
674
|
+
iftrigger = False
|
|
675
|
+
else:
|
|
676
|
+
iftrigger = True
|
|
677
|
+
elif name == 'if':
|
|
678
|
+
ifstack.append((enable,iftrigger))
|
|
679
|
+
if enable:
|
|
680
|
+
result = self.evalexpr(args)
|
|
681
|
+
if not result:
|
|
682
|
+
enable = False
|
|
683
|
+
iftrigger = False
|
|
684
|
+
else:
|
|
685
|
+
iftrigger = True
|
|
686
|
+
elif name == 'elif':
|
|
687
|
+
if ifstack:
|
|
688
|
+
if ifstack[-1][0]: # We only pay attention if outer "if" allows this
|
|
689
|
+
if enable: # If already true, we flip enable False
|
|
690
|
+
enable = False
|
|
691
|
+
elif not iftrigger: # If False, but not triggered yet, we'll check expression
|
|
692
|
+
result = self.evalexpr(args)
|
|
693
|
+
if result:
|
|
694
|
+
enable = True
|
|
695
|
+
iftrigger = True
|
|
696
|
+
else:
|
|
697
|
+
self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
|
|
698
|
+
|
|
699
|
+
elif name == 'else':
|
|
700
|
+
if ifstack:
|
|
701
|
+
if ifstack[-1][0]:
|
|
702
|
+
if enable:
|
|
703
|
+
enable = False
|
|
704
|
+
elif not iftrigger:
|
|
705
|
+
enable = True
|
|
706
|
+
iftrigger = True
|
|
707
|
+
else:
|
|
708
|
+
self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
|
|
709
|
+
|
|
710
|
+
elif name == 'endif':
|
|
711
|
+
if ifstack:
|
|
712
|
+
enable,iftrigger = ifstack.pop()
|
|
713
|
+
else:
|
|
714
|
+
self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
|
|
715
|
+
else:
|
|
716
|
+
# Unknown preprocessor directive
|
|
717
|
+
pass
|
|
718
|
+
|
|
719
|
+
else:
|
|
720
|
+
# Normal text
|
|
721
|
+
if enable:
|
|
722
|
+
chunk.extend(x)
|
|
723
|
+
|
|
724
|
+
for tok in self.expand_macros(chunk):
|
|
725
|
+
yield tok
|
|
726
|
+
chunk = []
|
|
727
|
+
|
|
728
|
+
# ----------------------------------------------------------------------
|
|
729
|
+
# include()
|
|
730
|
+
#
|
|
731
|
+
# Implementation of file-inclusion
|
|
732
|
+
# ----------------------------------------------------------------------
|
|
733
|
+
|
|
734
|
+
def include(self,tokens):
|
|
735
|
+
# Try to extract the filename and then process an include file
|
|
736
|
+
if not tokens:
|
|
737
|
+
return
|
|
738
|
+
if tokens:
|
|
739
|
+
if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
|
|
740
|
+
tokens = self.expand_macros(tokens)
|
|
741
|
+
|
|
742
|
+
if tokens[0].value == '<':
|
|
743
|
+
# Include <...>
|
|
744
|
+
i = 1
|
|
745
|
+
while i < len(tokens):
|
|
746
|
+
if tokens[i].value == '>':
|
|
747
|
+
break
|
|
748
|
+
i += 1
|
|
749
|
+
else:
|
|
750
|
+
print("Malformed #include <...>")
|
|
751
|
+
return
|
|
752
|
+
filename = "".join([x.value for x in tokens[1:i]])
|
|
753
|
+
path = self.path + [""] + self.temp_path
|
|
754
|
+
elif tokens[0].type == self.t_STRING:
|
|
755
|
+
filename = tokens[0].value[1:-1]
|
|
756
|
+
path = self.temp_path + [""] + self.path
|
|
757
|
+
else:
|
|
758
|
+
print("Malformed #include statement")
|
|
759
|
+
return
|
|
760
|
+
for p in path:
|
|
761
|
+
iname = os.path.join(p,filename)
|
|
762
|
+
try:
|
|
763
|
+
data = open(iname,"r").read()
|
|
764
|
+
dname = os.path.dirname(iname)
|
|
765
|
+
if dname:
|
|
766
|
+
self.temp_path.insert(0,dname)
|
|
767
|
+
for tok in self.parsegen(data,filename):
|
|
768
|
+
yield tok
|
|
769
|
+
if dname:
|
|
770
|
+
del self.temp_path[0]
|
|
771
|
+
break
|
|
772
|
+
except IOError:
|
|
773
|
+
pass
|
|
774
|
+
else:
|
|
775
|
+
print("Couldn't find '%s'" % filename)
|
|
776
|
+
|
|
777
|
+
# ----------------------------------------------------------------------
|
|
778
|
+
# define()
|
|
779
|
+
#
|
|
780
|
+
# Define a new macro
|
|
781
|
+
# ----------------------------------------------------------------------
|
|
782
|
+
|
|
783
|
+
def define(self,tokens):
|
|
784
|
+
if isinstance(tokens,(str,unicode)):
|
|
785
|
+
tokens = self.tokenize(tokens)
|
|
786
|
+
|
|
787
|
+
linetok = tokens
|
|
788
|
+
try:
|
|
789
|
+
name = linetok[0]
|
|
790
|
+
if len(linetok) > 1:
|
|
791
|
+
mtype = linetok[1]
|
|
792
|
+
else:
|
|
793
|
+
mtype = None
|
|
794
|
+
if not mtype:
|
|
795
|
+
m = Macro(name.value,[])
|
|
796
|
+
self.macros[name.value] = m
|
|
797
|
+
elif mtype.type in self.t_WS:
|
|
798
|
+
# A normal macro
|
|
799
|
+
m = Macro(name.value,self.tokenstrip(linetok[2:]))
|
|
800
|
+
self.macros[name.value] = m
|
|
801
|
+
elif mtype.value == '(':
|
|
802
|
+
# A macro with arguments
|
|
803
|
+
tokcount, args, positions = self.collect_args(linetok[1:])
|
|
804
|
+
variadic = False
|
|
805
|
+
for a in args:
|
|
806
|
+
if variadic:
|
|
807
|
+
print("No more arguments may follow a variadic argument")
|
|
808
|
+
break
|
|
809
|
+
astr = "".join([str(_i.value) for _i in a])
|
|
810
|
+
if astr == "...":
|
|
811
|
+
variadic = True
|
|
812
|
+
a[0].type = self.t_ID
|
|
813
|
+
a[0].value = '__VA_ARGS__'
|
|
814
|
+
variadic = True
|
|
815
|
+
del a[1:]
|
|
816
|
+
continue
|
|
817
|
+
elif astr[-3:] == "..." and a[0].type == self.t_ID:
|
|
818
|
+
variadic = True
|
|
819
|
+
del a[1:]
|
|
820
|
+
# If, for some reason, "." is part of the identifier, strip off the name for the purposes
|
|
821
|
+
# of macro expansion
|
|
822
|
+
if a[0].value[-3:] == '...':
|
|
823
|
+
a[0].value = a[0].value[:-3]
|
|
824
|
+
continue
|
|
825
|
+
if len(a) > 1 or a[0].type != self.t_ID:
|
|
826
|
+
print("Invalid macro argument")
|
|
827
|
+
break
|
|
828
|
+
else:
|
|
829
|
+
mvalue = self.tokenstrip(linetok[1+tokcount:])
|
|
830
|
+
i = 0
|
|
831
|
+
while i < len(mvalue):
|
|
832
|
+
if i+1 < len(mvalue):
|
|
833
|
+
if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
|
|
834
|
+
del mvalue[i]
|
|
835
|
+
continue
|
|
836
|
+
elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
|
|
837
|
+
del mvalue[i+1]
|
|
838
|
+
i += 1
|
|
839
|
+
m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
|
|
840
|
+
self.macro_prescan(m)
|
|
841
|
+
self.macros[name.value] = m
|
|
842
|
+
else:
|
|
843
|
+
print("Bad macro definition")
|
|
844
|
+
except LookupError:
|
|
845
|
+
print("Bad macro definition")
|
|
846
|
+
|
|
847
|
+
# ----------------------------------------------------------------------
|
|
848
|
+
# undef()
|
|
849
|
+
#
|
|
850
|
+
# Undefine a macro
|
|
851
|
+
# ----------------------------------------------------------------------
|
|
852
|
+
|
|
853
|
+
def undef(self,tokens):
|
|
854
|
+
id = tokens[0].value
|
|
855
|
+
try:
|
|
856
|
+
del self.macros[id]
|
|
857
|
+
except LookupError:
|
|
858
|
+
pass
|
|
859
|
+
|
|
860
|
+
# ----------------------------------------------------------------------
|
|
861
|
+
# parse()
|
|
862
|
+
#
|
|
863
|
+
# Parse input text.
|
|
864
|
+
# ----------------------------------------------------------------------
|
|
865
|
+
def parse(self,input,source=None,ignore={}):
|
|
866
|
+
self.ignore = ignore
|
|
867
|
+
self.parser = self.parsegen(input,source)
|
|
868
|
+
|
|
869
|
+
# ----------------------------------------------------------------------
|
|
870
|
+
# token()
|
|
871
|
+
#
|
|
872
|
+
# Method to return individual tokens
|
|
873
|
+
# ----------------------------------------------------------------------
|
|
874
|
+
def token(self):
|
|
875
|
+
try:
|
|
876
|
+
while True:
|
|
877
|
+
tok = next(self.parser)
|
|
878
|
+
if tok.type not in self.ignore: return tok
|
|
879
|
+
except StopIteration:
|
|
880
|
+
self.parser = None
|
|
881
|
+
return None
|
|
882
|
+
|
|
883
|
+
if __name__ == '__main__':
|
|
884
|
+
import ply.lex as lex
|
|
885
|
+
lexer = lex.lex()
|
|
886
|
+
|
|
887
|
+
# Run a preprocessor
|
|
888
|
+
import sys
|
|
889
|
+
f = open(sys.argv[1])
|
|
890
|
+
input = f.read()
|
|
891
|
+
|
|
892
|
+
p = Preprocessor(lexer)
|
|
893
|
+
p.parse(input,sys.argv[1])
|
|
894
|
+
while True:
|
|
895
|
+
tok = p.token()
|
|
896
|
+
if not tok: break
|
|
897
|
+
print(p.source, tok)
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
|