PyPI - python-cc - Versions diffs - 0.0.2__py3-none-any.whl - Mend

python-cc 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

pcc/__init__.py +0 -0
pcc/__main__.py +3 -0
pcc/ast/__init__.py +0 -0
pcc/ast/ast.py +179 -0
pcc/ast/ast_transforms.py +106 -0
pcc/ast/c_ast.py +800 -0
pcc/codegen/__init__.py +0 -0
pcc/codegen/c_codegen.py +4177 -0
pcc/evaluater/__init__.py +0 -0
pcc/evaluater/c_evaluator.py +238 -0
pcc/generator/__init__.py +0 -0
pcc/generator/c_generator.py +399 -0
pcc/lex/__init__.py +0 -0
pcc/lex/c_lexer.py +495 -0
pcc/lex/lexer.py +68 -0
pcc/lex/token.py +24 -0
pcc/parse/__init__.py +0 -0
pcc/parse/c_parser.py +1700 -0
pcc/parse/file_parser.py +82 -0
pcc/parse/parser.py +300 -0
pcc/parse/plyparser.py +56 -0
pcc/pcc.py +38 -0
pcc/ply/__init__.py +5 -0
pcc/ply/cpp.py +908 -0
pcc/ply/ctokens.py +133 -0
pcc/ply/lex.py +1097 -0
pcc/ply/yacc.py +3471 -0
pcc/ply/ygen.py +74 -0
pcc/preprocessor.py +509 -0
pcc/project.py +78 -0
pcc/util.py +121 -0
python_cc-0.0.2.dist-info/METADATA +182 -0
python_cc-0.0.2.dist-info/RECORD +36 -0
python_cc-0.0.2.dist-info/WHEEL +4 -0
python_cc-0.0.2.dist-info/entry_points.txt +2 -0
python_cc-0.0.2.dist-info/licenses/LICENSE +25 -0

pcc/lex/c_lexer.py ADDED Viewed

@@ -0,0 +1,495 @@
+#------------------------------------------------------------------------------
+# pycparser: c_lexer.py
+#
+# CLexer class: lexer for the C language
+#
+# Copyright (C) 2008-2015, Eli Bendersky
+# License: BSD
+#------------------------------------------------------------------------------
+import re
+import sys
+from ..ply import lex
+from ..ply.lex import TOKEN
+class CLexer(object):
+    """ A lexer for the C language. After building it, set the
+        input text with input(), and call token() to get new
+        tokens.
+        The public attribute filename can be set to an initial
+        filaneme, but the lexer will update it upon #line
+        directives.
+    """
+    def __init__(self, error_func, on_lbrace_func, on_rbrace_func,
+                 type_lookup_func):
+        """ Create a new Lexer.
+            error_func:
+                An error function. Will be called with an error
+                message, line and column as arguments, in case of
+                an error during lexing.
+            on_lbrace_func, on_rbrace_func:
+                Called when an LBRACE or RBRACE is encountered
+                (likely to push/pop type_lookup_func's scope)
+            type_lookup_func:
+                A type lookup function. Given a string, it must
+                return True IFF this string is a name of a type
+                that was defined with a typedef earlier.
+        """
+        self.error_func = error_func
+        self.on_lbrace_func = on_lbrace_func
+        self.on_rbrace_func = on_rbrace_func
+        self.type_lookup_func = type_lookup_func
+        self.filename = ''
+        # Keeps track of the last token returned from self.token()
+        self.last_token = None
+        # Allow either "# line" or "# <num>" to support GCC's
+        # cpp output
+        #
+        self.line_pattern = re.compile('([ \t]*line\W)|([ \t]*\d+)')
+        self.pragma_pattern = re.compile('[ \t]*pragma\W')
+    def build(self, **kwargs):
+        """ Builds the lexer from the specification. Must be
+            called after the lexer object is created.
+            This method exists separately, because the PLY
+            manual warns against calling lex.lex inside
+            __init__
+        """
+        self.lexer = lex.lex(object=self, **kwargs)
+    def reset_lineno(self):
+        """ Resets the internal line number counter of the lexer.
+        """
+        self.lexer.lineno = 1
+    def input(self, text):
+        self.lexer.input(text)
+    def token(self):
+        self.last_token = self.lexer.token()
+        return self.last_token
+    def find_tok_column(self, token):
+        """ Find the column of the token in its line.
+        """
+        last_cr = self.lexer.lexdata.rfind('\n', 0, token.lexpos)
+        return token.lexpos - last_cr
+    ######################--   PRIVATE   --######################
+    ##
+    ## Internal auxiliary methods
+    ##
+    def _error(self, msg, token):
+        location = self._make_tok_location(token)
+        self.error_func(msg, location[0], location[1])
+        self.lexer.skip(1)
+    def _make_tok_location(self, token):
+        return (token.lineno, self.find_tok_column(token))
+    ##
+    ## Reserved keywords
+    ##
+    keywords = (
+        '_BOOL', '_COMPLEX', 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST',
+        'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN',
+        'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG',
+        'REGISTER', 'OFFSETOF',
+        'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT',
+        'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID',
+        'VOLATILE', 'WHILE',
+    )
+    keyword_map = {}
+    for keyword in keywords:
+        if keyword == '_BOOL':
+            keyword_map['_Bool'] = keyword
+        elif keyword == '_COMPLEX':
+            keyword_map['_Complex'] = keyword
+        else:
+            keyword_map[keyword.lower()] = keyword
+    ##
+    ## All the tokens recognized by the lexer
+    ##
+    tokens = keywords + (
+        # Identifiers
+        'ID',
+        # Type identifiers (identifiers previously defined as
+        # types with typedef)
+        'TYPEID',
+        # constants
+        'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN',
+        'FLOAT_CONST', 'HEX_FLOAT_CONST',
+        'CHAR_CONST',
+        'WCHAR_CONST',
+        # String literals
+        'STRING_LITERAL',
+        'WSTRING_LITERAL',
+        # Operators
+        'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
+        'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
+        'LOR', 'LAND', 'LNOT',
+        'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
+        # Assignment
+        'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL',
+        'PLUSEQUAL', 'MINUSEQUAL',
+        'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
+        'OREQUAL',
+        # Increment/decrement
+        'PLUSPLUS', 'MINUSMINUS',
+        # Structure dereference (->)
+        'ARROW',
+        # Conditional operator (?)
+        'CONDOP',
+        # Delimeters
+        'LPAREN', 'RPAREN',         # ( )
+        'LBRACKET', 'RBRACKET',     # [ ]
+        'LBRACE', 'RBRACE',         # { }
+        'COMMA', 'PERIOD',          # . ,
+        'SEMI', 'COLON',            # ; :
+        # Ellipsis (...)
+        'ELLIPSIS',
+        # pre-processor
+        'PPHASH',      # '#'
+    )
+    ##
+    ## Regexes for use in tokens
+    ##
+    ##
+    # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
+    identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
+    hex_prefix = '0[xX]'
+    hex_digits = '[0-9a-fA-F]+'
+    bin_prefix = '0[bB]'
+    bin_digits = '[01]+'
+    # integer constants (K&R2: A.2.5.1)
+    integer_suffix_opt = r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
+    decimal_constant = '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
+    octal_constant = '0[0-7]*'+integer_suffix_opt
+    hex_constant = hex_prefix+hex_digits+integer_suffix_opt
+    bin_constant = bin_prefix+bin_digits+integer_suffix_opt
+    bad_octal_constant = '0[0-7]*[89]'
+    # character constants (K&R2: A.2.5.2)
+    # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
+    # directives with Windows paths as filenames (..\..\dir\file)
+    # For the same reason, decimal_escape allows all digit sequences. We want to
+    # parse all correct code, even if it means to sometimes parse incorrect
+    # code.
+    #
+    simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
+    decimal_escape = r"""(\d+)"""
+    hex_escape = r"""(x[0-9a-fA-F]+)"""
+    bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
+    escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
+    cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
+    char_const = "'"+cconst_char+"'"
+    wchar_const = 'L'+char_const
+    unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
+    bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
+    # string literals (K&R2: A.2.6)
+    string_char = r"""([^"\\\n]|"""+escape_sequence+')'
+    string_literal = '"'+string_char+'*"'
+    wstring_literal = 'L'+string_literal
+    bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
+    # floating constants (K&R2: A.2.5.3)
+    exponent_part = r"""([eE][-+]?[0-9]+)"""
+    fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
+    floating_constant = '(((('+fractional_constant+')'+exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
+    binary_exponent_part = r'''([pP][+-]?[0-9]+)'''
+    hex_fractional_constant = '((('+hex_digits+r""")?\."""+hex_digits+')|('+hex_digits+r"""\.))"""
+    hex_floating_constant = '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+binary_exponent_part+'[FfLl]?)'
+    ##
+    ## Lexer states: used for preprocessor \n-terminated directives
+    ##
+    states = (
+        # ppline: preprocessor line directives
+        #
+        ('ppline', 'exclusive'),
+        # pppragma: pragma
+        #
+        ('pppragma', 'exclusive'),
+    )
+    def t_PPHASH(self, t):
+        r'[ \t]*\#'
+        if self.line_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
+            t.lexer.begin('ppline')
+            self.pp_line = self.pp_filename = None
+        elif self.pragma_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
+            t.lexer.begin('pppragma')
+        else:
+            t.type = 'PPHASH'
+            return t
+    ##
+    ## Rules for the ppline state
+    ##
+    @TOKEN(string_literal)
+    def t_ppline_FILENAME(self, t):
+        if self.pp_line is None:
+            self._error('filename before line number in #line', t)
+        else:
+            self.pp_filename = t.value.lstrip('"').rstrip('"')
+    @TOKEN(decimal_constant)
+    def t_ppline_LINE_NUMBER(self, t):
+        if self.pp_line is None:
+            self.pp_line = t.value
+        else:
+            # Ignore: GCC's cpp sometimes inserts a numeric flag
+            # after the file name
+            pass
+    def t_ppline_NEWLINE(self, t):
+        r'\n'
+        if self.pp_line is None:
+            self._error('line number missing in #line', t)
+        else:
+            self.lexer.lineno = int(self.pp_line)
+            if self.pp_filename is not None:
+                self.filename = self.pp_filename
+        t.lexer.begin('INITIAL')
+    def t_ppline_PPLINE(self, t):
+        r'line'
+        pass
+    t_ppline_ignore = ' \t'
+    def t_ppline_error(self, t):
+        self._error('invalid #line directive', t)
+    ##
+    ## Rules for the pppragma state
+    ##
+    def t_pppragma_NEWLINE(self, t):
+        r'\n'
+        t.lexer.lineno += 1
+        t.lexer.begin('INITIAL')
+    def t_pppragma_PPPRAGMA(self, t):
+        r'pragma'
+        pass
+    t_pppragma_ignore = ' \t<>.-{}();=+-*/$%@&^~!?:,0123456789'
+    @TOKEN(string_literal)
+    def t_pppragma_STR(self, t): pass
+    @TOKEN(identifier)
+    def t_pppragma_ID(self, t): pass
+    def t_pppragma_error(self, t):
+        self._error('invalid #pragma directive', t)
+    ##
+    ## Rules for the normal state
+    ##
+    t_ignore = ' \t'
+    # C-style block comments /* ... */
+    def t_BLOCK_COMMENT(self, t):
+        r'/\*[\s\S]*?\*/'
+        t.lexer.lineno += t.value.count('\n')
+    # C++ style line comments // ...
+    def t_LINE_COMMENT(self, t):
+        r'//[^\n]*'
+    # Newlines
+    def t_NEWLINE(self, t):
+        r'\n+'
+        t.lexer.lineno += t.value.count("\n")
+    # Operators
+    t_PLUS              = r'\+'
+    t_MINUS             = r'-'
+    t_TIMES             = r'\*'
+    t_DIVIDE            = r'/'
+    t_MOD               = r'%'
+    t_OR                = r'\|'
+    t_AND               = r'&'
+    t_NOT               = r'~'
+    t_XOR               = r'\^'
+    t_LSHIFT            = r'<<'
+    t_RSHIFT            = r'>>'
+    t_LOR               = r'\|\|'
+    t_LAND              = r'&&'
+    t_LNOT              = r'!'
+    t_LT                = r'<'
+    t_GT                = r'>'
+    t_LE                = r'<='
+    t_GE                = r'>='
+    t_EQ                = r'=='
+    t_NE                = r'!='
+    # Assignment operators
+    t_EQUALS            = r'='
+    t_TIMESEQUAL        = r'\*='
+    t_DIVEQUAL          = r'/='
+    t_MODEQUAL          = r'%='
+    t_PLUSEQUAL         = r'\+='
+    t_MINUSEQUAL        = r'-='
+    t_LSHIFTEQUAL       = r'<<='
+    t_RSHIFTEQUAL       = r'>>='
+    t_ANDEQUAL          = r'&='
+    t_OREQUAL           = r'\|='
+    t_XOREQUAL          = r'\^='
+    # Increment/decrement
+    t_PLUSPLUS          = r'\+\+'
+    t_MINUSMINUS        = r'--'
+    # ->
+    t_ARROW             = r'->'
+    # ?
+    t_CONDOP            = r'\?'
+    # Delimeters
+    t_LPAREN            = r'\('
+    t_RPAREN            = r'\)'
+    t_LBRACKET          = r'\['
+    t_RBRACKET          = r'\]'
+    t_COMMA             = r','
+    t_PERIOD            = r'\.'
+    t_SEMI              = r';'
+    t_COLON             = r':'
+    t_ELLIPSIS          = r'\.\.\.'
+    # Scope delimiters
+    # To see why on_lbrace_func is needed, consider:
+    #   typedef char TT;
+    #   void foo(int TT) { TT = 10; }
+    #   TT x = 5;
+    # Outside the function, TT is a typedef, but inside (starting and ending
+    # with the braces) it's a parameter.  The trouble begins with yacc's
+    # lookahead token.  If we open a new scope in brace_open, then TT has
+    # already been read and incorrectly interpreted as TYPEID.  So, we need
+    # to open and close scopes from within the lexer.
+    # Similar for the TT immediately outside the end of the function.
+    #
+    @TOKEN(r'\{')
+    def t_LBRACE(self, t):
+        self.on_lbrace_func()
+        return t
+    @TOKEN(r'\}')
+    def t_RBRACE(self, t):
+        self.on_rbrace_func()
+        return t
+    t_STRING_LITERAL = string_literal
+    # The following floating and integer constants are defined as
+    # functions to impose a strict order (otherwise, decimal
+    # is placed before the others because its regex is longer,
+    # and this is bad)
+    #
+    @TOKEN(floating_constant)
+    def t_FLOAT_CONST(self, t):
+        return t
+    @TOKEN(hex_floating_constant)
+    def t_HEX_FLOAT_CONST(self, t):
+        return t
+    @TOKEN(hex_constant)
+    def t_INT_CONST_HEX(self, t):
+        return t
+    @TOKEN(bin_constant)
+    def t_INT_CONST_BIN(self, t):
+        return t
+    @TOKEN(bad_octal_constant)
+    def t_BAD_CONST_OCT(self, t):
+        msg = "Invalid octal constant"
+        self._error(msg, t)
+    @TOKEN(octal_constant)
+    def t_INT_CONST_OCT(self, t):
+        return t
+    @TOKEN(decimal_constant)
+    def t_INT_CONST_DEC(self, t):
+        return t
+    # Must come before bad_char_const, to prevent it from
+    # catching valid char constants as invalid
+    #
+    @TOKEN(char_const)
+    def t_CHAR_CONST(self, t):
+        return t
+    @TOKEN(wchar_const)
+    def t_WCHAR_CONST(self, t):
+        return t
+    @TOKEN(unmatched_quote)
+    def t_UNMATCHED_QUOTE(self, t):
+        msg = "Unmatched '"
+        self._error(msg, t)
+    @TOKEN(bad_char_const)
+    def t_BAD_CHAR_CONST(self, t):
+        msg = "Invalid char constant %s" % t.value
+        self._error(msg, t)
+    @TOKEN(wstring_literal)
+    def t_WSTRING_LITERAL(self, t):
+        return t
+    # unmatched string literals are caught by the preprocessor
+    @TOKEN(bad_string_literal)
+    def t_BAD_STRING_LITERAL(self, t):
+        msg = "String contains invalid escape code"
+        self._error(msg, t)
+    @TOKEN(identifier)
+    def t_ID(self, t):
+        t.type = self.keyword_map.get(t.value, "ID")
+        if t.type == 'ID' and self.type_lookup_func(t.value):
+            t.type = "TYPEID"
+        return t
+    def t_error(self, t):
+        msg = 'Illegal character %s' % repr(t.value[0])
+        self._error(msg, t)

pcc/lex/lexer.py ADDED Viewed

@@ -0,0 +1,68 @@
+from pcc.lex.token import TokenKind,Token
+class Lexer(object):
+    """Lexer for Kaleidoscope.
+    Initialize the lexer with a string buffer. tokens() returns a generator that
+    can be queried for tokens. The generator will emit an EOF token before
+    stopping.
+    """
+    def __init__(self, buf):
+        assert len(buf) >= 1
+        self.buf = buf
+        self.pos = 0
+        self.lastchar = self.buf[0]
+        self._keyword_map = {
+            'def':      TokenKind.DEF,
+            'extern':   TokenKind.EXTERN,
+            'if':       TokenKind.IF,
+            'then':     TokenKind.THEN,
+            'else':     TokenKind.ELSE,
+            'for':      TokenKind.FOR,
+            'in':       TokenKind.IN,
+            'binary':   TokenKind.BINARY,
+            'unary':    TokenKind.UNARY,
+            'var':      TokenKind.VAR,
+        }
+    def tokens(self):
+        while self.lastchar:
+            # Skip whitespace
+            while self.lastchar.isspace():
+                self._advance()
+            # Identifier or keyword
+            if self.lastchar.isalpha():
+                id_str = ''
+                while self.lastchar.isalnum():
+                    id_str += self.lastchar
+                    self._advance()
+                if id_str in self._keyword_map:
+                    yield Token(kind=self._keyword_map[id_str], value=id_str)
+                else:
+                    yield Token(kind=TokenKind.IDENTIFIER, value=id_str)
+            # Number
+            elif self.lastchar.isdigit() or self.lastchar == '.':
+                num_str = ''
+                while self.lastchar.isdigit() or self.lastchar == '.':
+                    num_str += self.lastchar
+                    self._advance()
+                yield Token(kind=TokenKind.NUMBER, value=num_str)
+            # Comment
+            elif self.lastchar == '#':
+                self._advance()
+                while self.lastchar and self.lastchar not in '\r\n':
+                    self._advance()
+            elif self.lastchar:
+                # Some other char
+                yield Token(kind=TokenKind.OPERATOR, value=self.lastchar)
+                self._advance()
+        yield Token(kind=TokenKind.EOF, value='')
+    def _advance(self):
+        try:
+            self.pos += 1
+            self.lastchar = self.buf[self.pos]
+        except IndexError:
+            self.lastchar = ''

pcc/lex/token.py ADDED Viewed

@@ -0,0 +1,24 @@
+from collections import namedtuple
+from enum import Enum
+# Each token is a tuple of kind and value. kind is one of the enumeration values
+# in TokenKind. value is the textual value of the token in the input.
+class TokenKind(Enum):
+    EOF = -1
+    DEF = -2
+    EXTERN = -3
+    IDENTIFIER = -4
+    NUMBER = -5
+    OPERATOR = -6
+    IF = -7
+    THEN = -8
+    ELSE = -9
+    FOR = -10
+    IN = -11
+    BINARY = -12
+    UNARY = -13
+    VAR = -14
+Token = namedtuple('Token', 'kind value')

pcc/parse/__init__.py ADDED Viewed

File without changes