PyPI - lizard - Versions diffs - 1.17.30__py2.py3-none-any.whl → 1.18.0__py2.py3-none-any.whl - Mend

lizard 1.17.30py2.py3-none-any.whl → 1.18.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/METADATA +6 -1
{lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/RECORD +25 -24
{lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/entry_points.txt +1 -0
lizard.py +43 -22
lizard_ext/__init__.py +14 -0
lizard_ext/checkstyleoutput.py +33 -0
lizard_ext/version.py +1 -1
lizard_languages/__init__.py +4 -2
lizard_languages/clike.py +60 -1
lizard_languages/code_reader.py +19 -0
lizard_languages/go.py +1 -1
lizard_languages/java.py +6 -3
lizard_languages/javascript.py +1 -1
lizard_languages/perl.py +20 -21
lizard_languages/php.py +14 -14
lizard_languages/python.py +37 -1
lizard_languages/r.py +290 -0
lizard_languages/rust.py +7 -2
lizard_languages/script_language.py +2 -2
lizard_languages/st.py +139 -0
lizard_languages/tsx.py +445 -11
lizard_languages/typescript.py +215 -15
lizard_languages/js_style_language_states.py +0 -184
lizard_languages/jsx.py +0 -337
{lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/LICENSE.txt +0 -0
{lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/WHEEL +0 -0
{lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/top_level.txt +0 -0

lizard_languages/php.py CHANGED Viewed

@@ -12,7 +12,7 @@ class PHPLanguageStates(CodeStateMachine):
     PHP-specific state machine that properly handles modern PHP syntax
     including classes, visibility modifiers and return types.
     """
     def __init__(self, context):
         super(PHPLanguageStates, self).__init__(context)
         self.function_name = ''
@@ -29,7 +29,7 @@ class PHPLanguageStates(CodeStateMachine):
         self.assignments = []
         self.in_match = False
         self.match_case_count = 0
     def _state_global(self, token):
         if token == 'class':
             self._state = self._class_declaration
@@ -76,7 +76,7 @@ class PHPLanguageStates(CodeStateMachine):
                 if self.in_trait:
                     self.in_trait = False
                     self.trait_name = None
         # Update tokens
         self.last_token = token
         if token not in [' ', '\t', '\n']:
@@ -87,7 +87,7 @@ class PHPLanguageStates(CodeStateMachine):
                 pass
             else:
                 self.last_tokens = ''
     def _trait_declaration(self, token):
         if token and not token.isspace() and token not in ['{', '(']:
             self.trait_name = token
@@ -96,7 +96,7 @@ class PHPLanguageStates(CodeStateMachine):
         elif token == '{':
             self.brace_level += 1
             self._state = self._state_global
     def _class_declaration(self, token):
         if token and not token.isspace() and token not in ['{', '(', 'extends', 'implements']:
             self.class_name = token
@@ -105,7 +105,7 @@ class PHPLanguageStates(CodeStateMachine):
         elif token == '{':
             self.brace_level += 1
             self._state = self._state_global
     def _function_name(self, token):
         if token and not token.isspace() and token != '(':
             method_name = token
@@ -140,7 +140,7 @@ class PHPLanguageStates(CodeStateMachine):
             self._state = self._function_args_continue
             self.context.push_new_function(self.function_name)
             self.started_function = True
     def _function_args(self, token):
         if token == '(':
             self.bracket_level = 1
@@ -151,7 +151,7 @@ class PHPLanguageStates(CodeStateMachine):
                 self.context.push_new_function(self.function_name)
             self.started_function = True
             self._state = self._function_args_continue
     def _function_args_continue(self, token):
         if token == '(':
             self.bracket_level += 1
@@ -165,7 +165,7 @@ class PHPLanguageStates(CodeStateMachine):
                 # Make sure we count each parameter uniquely
                 self.context.add_to_long_function_name(" " + token)
                 self.context.parameter(token)
     def _function_return_type_or_body(self, token):
         if token == ':':
             # Skip return type declaration
@@ -180,13 +180,13 @@ class PHPLanguageStates(CodeStateMachine):
                 self.context.end_of_function()
                 self.started_function = False
             self._state = self._state_global
     def _function_body_or_return_type(self, token):
         if token == '{':
             # Found the function body opening after return type
             self.brace_level += 1
             self._state = self._function_body
     def _function_body(self, token):
         if token == '{':
             self.brace_level += 1
@@ -198,12 +198,12 @@ class PHPLanguageStates(CodeStateMachine):
                     self.context.end_of_function()
                     self.started_function = False
                 self._state = self._state_global
     def _condition_expected(self, token):
         if token == '(':
             self.bracket_level = 1
             self._state = self._condition_continue
     def _condition_continue(self, token):
         if token == '(':
             self.bracket_level += 1
@@ -216,7 +216,7 @@ class PHPLanguageStates(CodeStateMachine):
         if token == '(':
             self.bracket_level = 1
             self._state = self._match_expression_continue
     def _match_expression_continue(self, token):
         if token == '(':
             self.bracket_level += 1

lizard_languages/python.py CHANGED Viewed

@@ -37,15 +37,51 @@ class PythonReader(CodeReader, ScriptLanguageMixIn):
     def __init__(self, context):
         super(PythonReader, self).__init__(context)
         self.parallel_states = [PythonStates(context, self)]
+        self._last_meaningful_token = None  # Track the last meaningful token
     @staticmethod
     def generate_tokens(source_code, addition='', token_class=None):
         return ScriptLanguageMixIn.generate_common_tokens(
             source_code,
-            r"|(?:\"\"\"(?:\\.|[^\"]|\"(?!\"\")|\"\"(?!\"))*\"\"\")" +
+            r"|(?:\"\"\"(?:\\.|[^\"]|\"(?!\"\")|\"\"(?!\"))*\"\"\")" +
             r"|(?:\'\'\'(?:\\.|[^\']|\'(?!\'\')|\'\'(?!\'))*\'\'\')",
             token_class)
+    def process_token(self, token):
+        """Process triple-quoted strings used as comments.
+        Triple-quoted strings that are not docstrings (i.e., not immediately
+        after function definitions) should be treated like comments and not
+        counted in NLOC, but only if they appear to be standalone statements
+        rather than part of assignments or other expressions.
+        Returns:
+            bool: True if the token was handled specially, False otherwise
+        """
+        if (token.startswith('"""') or token.startswith("'''")) and len(token) >= 6:
+            # Check if this is likely a standalone comment (not a docstring)
+            # Docstrings are handled separately in _state_first_line
+            current_state = self.parallel_states[0]._state
+            # If we're not in the first line state, check if this is a standalone string
+            if current_state != current_state.__self__._state_first_line:
+                # Check if the immediate previous meaningful token suggests this is part of an expression
+                assignment_tokens = ['=', '+=', '-=', '*=', '/=', '%=', '//=', '**=', '&=', '|=', '^=',
+                                     '<<=', '>>=', '(', 'return', ',', '[', '+', '-', '*', '/', '%']
+                is_part_of_expression = self._last_meaningful_token in assignment_tokens
+                # Only treat as comment if it's NOT part of an expression
+                if not is_part_of_expression:
+                    # Subtract the NLOC contribution of this triple-quoted string
+                    self.context.add_nloc(-(token.count('\n') + 1))
+        # Update last meaningful token (ignore whitespace and newlines)
+        if token not in ['\n', ' ', '\t'] and not token.isspace():
+            self._last_meaningful_token = token
+        return False  # Continue with normal processing
     def preprocess(self, tokens):
         indents = PythonIndents(self.context)
         current_leading_spaces = 0

lizard_languages/r.py ADDED Viewed

@@ -0,0 +1,290 @@
+'''
+Language parser for R
+'''
+from .code_reader import CodeReader, CodeStateMachine
+from .script_language import ScriptLanguageMixIn
+class RReader(CodeReader, ScriptLanguageMixIn):
+    """R language reader for parsing R code and calculating complexity metrics."""
+    ext = ['r', 'R']
+    language_names = ['r', 'R']
+    # R-specific conditions that increase cyclomatic complexity
+    _conditions = {
+        'if', 'else if', 'for', 'while', 'repeat', 'switch',
+        '&&', '||', '&', '|', 'ifelse',
+        'tryCatch', 'try'
+    }
+    def __init__(self, context):
+        super(RReader, self).__init__(context)
+        self.parallel_states = [RStates(context)]
+    def preprocess(self, tokens):
+        """Preprocess tokens - for now just pass them through."""
+        for token in tokens:
+            yield token
+    @staticmethod
+    def generate_tokens(source_code, addition='', token_class=None):
+        """Generate tokens for R code with R-specific patterns."""
+        # R-specific token patterns
+        r_patterns = (
+            r"|<-"          # Assignment operator <-
+            r"|->"          # Assignment operator ->
+            r"|%[a-zA-Z_*/>]+%"  # Special operators like %in%, %*%, %>%, %/%, etc.
+            r"|\.\.\."      # Ellipsis for variable arguments
+            r"|:::"         # Internal namespace operator (must come before ::)
+            r"|::"          # Namespace operator
+        )
+        return ScriptLanguageMixIn.generate_common_tokens(
+            source_code,
+            r_patterns + addition,
+            token_class
+        )
+class RStates(CodeStateMachine):
+    """State machine for parsing R function definitions and complexity."""
+    def __init__(self, context):
+        super(RStates, self).__init__(context)
+        self.recent_tokens = []  # Track recent tokens to find function names
+        self.brace_count = 0  # Track brace nesting for function bodies
+        self.in_braced_function = False  # Track if current function uses braces
+        self.additional_function_names = []  # Store additional names for multiple assignment
+    def _state_global(self, token):
+        """Global state - looking for function definitions."""
+        # Track recent non-whitespace tokens
+        if not token.isspace() and token != '\n':
+            self.recent_tokens.append(token)
+            if len(self.recent_tokens) > 10:  # Keep only last 10 tokens
+                self.recent_tokens.pop(0)
+        # Look for function keyword after assignment operators
+        if token == 'function':
+            # Check if we have recent tokens: [name, assignment_op, 'function']
+            if len(self.recent_tokens) >= 2:
+                # recent_tokens now contains [..., assignment_op, 'function']
+                assignment_op = self.recent_tokens[-2]  # The token before 'function'
+                if assignment_op in ['<-', '=']:
+                    # Handle multiple assignments by creating separate functions
+                    func_names = self._extract_function_names()
+                    # Create the first function (this will be the main one with the function body)
+                    self._start_function(func_names[0])
+                    self._state = self._function_params
+                    # Store additional names for later processing
+                    self.additional_function_names = func_names[1:] if len(func_names) > 1 else []
+                    return
+            # If we get here, it's an anonymous function or not a proper assignment
+            self._start_function("(anonymous)")
+            self._state = self._function_params
+    def _extract_function_names(self):
+        """Extract all function names from recent tokens, handling multiple assignments."""
+        if len(self.recent_tokens) < 3:
+            return ["(anonymous)"]
+        # Look backwards from the assignment operator to find all function names
+        # For multiple assignment like: a <- b <- c <- function(...)
+        # recent_tokens ends with [..., 'a', '<-', 'b', '<-', 'c', '<-', 'function']
+        assignment_index = len(self.recent_tokens) - 2  # Position of assignment operator
+        function_names = []
+        i = assignment_index - 1  # Start from token before assignment operator
+        current_name_tokens = []
+        while i >= 0:
+            token = self.recent_tokens[i]
+            # If we hit an assignment operator, we've found a complete variable name
+            if token in ['<-', '=']:
+                if current_name_tokens:
+                    function_names.append(''.join(reversed(current_name_tokens)))
+                    current_name_tokens = []
+                i -= 1
+                continue
+            # Stop if we hit keywords or operators that shouldn't be part of function names
+            if token in ['function', '(', ')', '{', '}', '\n']:
+                break
+            # Valid R identifier characters and dots
+            if token.replace('_', 'a').replace('.', 'a').isalnum() or token == '.':
+                current_name_tokens.append(token)
+                i -= 1
+            else:
+                break
+        # Add the last name if we have one
+        if current_name_tokens:
+            function_names.append(''.join(reversed(current_name_tokens)))
+        # Return names in the correct order (left to right as they appear in code)
+        return list(reversed(function_names)) if function_names else ["(anonymous)"]
+    def _extract_function_name(self):
+        """Extract the first function name (for backward compatibility)."""
+        names = self._extract_function_names()
+        return names[0] if names else "(anonymous)"
+    def _start_function(self, name):
+        """Start tracking a new function."""
+        self.context.restart_new_function(name)
+    def _function_params(self, token):
+        """Expecting function parameters."""
+        if token == '(':
+            self.context.add_to_long_function_name("(")
+            self._state = self._read_params
+        else:
+            # Single expression function without parentheses - rare in R
+            self._state = self._function_body
+            self._function_body(token)
+    def _read_params(self, token):
+        """Read function parameters until closing parenthesis."""
+        if token == ')':
+            self.context.add_to_long_function_name(")")
+            self._state = self._function_body
+        elif token not in ['\n'] and not token.isspace():
+            self.context.parameter(token)
+            if token != '(':
+                self.context.add_to_long_function_name(" " + token)
+    def _function_body(self, token):
+        """In function body - track complexity and nested functions."""
+        # Note: Complexity conditions are automatically counted by the framework
+        # based on reader.conditions, so we don't need to manually count them here
+        # Continue tracking tokens even in function body for nested function detection
+        if not token.isspace() and token != '\n':
+            self.recent_tokens.append(token)
+            if len(self.recent_tokens) > 10:  # Keep only last 10 tokens
+                self.recent_tokens.pop(0)
+        # Track braces
+        if token == '{':
+            if self.brace_count == 0:
+                self.in_braced_function = True
+            self.brace_count += 1
+        elif token == '}':
+            self.brace_count -= 1
+            if self.brace_count == 0 and self.in_braced_function:
+                # End of braced function
+                self._end_current_function()
+                return
+        # Handle nested functions - treat them as separate functions
+        if token == 'function':
+            # Check if this is a nested function assignment
+            if len(self.recent_tokens) >= 2:
+                assignment_op = self.recent_tokens[-2]  # The token before 'function'
+                if assignment_op in ['<-', '=']:
+                    # End current function first
+                    self.context.end_of_function()
+                    # Handle multiple assignments for nested functions too
+                    func_names = self._extract_function_names()
+                    # Start a new function for the nested function
+                    self._start_function(func_names[0])
+                    self._state = self._function_params
+                    # Reset brace counting for the new function
+                    self.brace_count = 0
+                    self.in_braced_function = False
+                    # Store additional names for later processing
+                    self.additional_function_names = func_names[1:] if len(func_names) > 1 else []
+                    return
+        # For single-line functions without braces, end at newline
+        elif token == '\n' and not self.in_braced_function:
+            self._end_current_function()
+    def _end_current_function(self):
+        """End the current function and reset state."""
+        # Check if this might be a right assignment case
+        # We need to temporarily not end the function to see if there's a right assignment
+        self._state = self._check_right_assignment
+        self.brace_count = 0
+        self.in_braced_function = False
+    def _check_right_assignment(self, token):
+        """Check if there's a right assignment after function end."""
+        # Skip whitespace and comments
+        if token.isspace() or token == '\n' or token.startswith('#'):
+            return
+        # Look for right assignment operator
+        if token == '->':
+            self._state = self._read_right_assignment_name
+            return
+        # If we encounter anything else, this is not a right assignment
+        # End the function and create additional functions for multiple assignments
+        self._finalize_function_with_multiple_assignments()
+        self._state = self._state_global
+        self._state_global(token)
+    def _finalize_function_with_multiple_assignments(self):
+        """End the current function and create additional functions for multiple assignments."""
+        # Get the current function's information before ending it
+        current_func = self.context.current_function
+        # End the current function
+        self.context.end_of_function()
+        # Create additional function entries for multiple assignments
+        if self.additional_function_names and current_func:
+            for func_name in self.additional_function_names:
+                # Create a new function with the same complexity and line info
+                self.context.restart_new_function(func_name)
+                # Copy the complexity from the original function
+                if hasattr(current_func, 'cyclomatic_complexity'):
+                    self.context.current_function.cyclomatic_complexity = current_func.cyclomatic_complexity
+                # Set the same line range
+                self.context.current_function.start_line = current_func.start_line
+                self.context.current_function.end_line = current_func.end_line
+                # End this function immediately
+                self.context.end_of_function()
+        # Clear the additional names
+        self.additional_function_names = []
+    def _read_right_assignment_name(self, token):
+        """Read the function name after right assignment operator."""
+        # Skip whitespace
+        if token.isspace() or token == '\n':
+            return
+        # This should be the function name
+        if token.replace('_', 'a').replace('.', 'a').isalnum() or token == '.':
+            # Update the current function's name
+            if self.context.current_function:
+                self.context.current_function.name = token
+            # End the function and create additional functions for multiple assignments
+            self._finalize_function_with_multiple_assignments()
+            self._state = self._state_global
+            return
+        # If we get something unexpected, treat as anonymous function
+        self._finalize_function_with_multiple_assignments()
+        self._state = self._state_global
+        self._state_global(token)
+    def statemachine_before_return(self):
+        """Called when processing is complete - end any open functions."""
+        if self._state in [self._function_body, self._check_right_assignment, self._read_right_assignment_name]:
+            # End any open function and process multiple assignments
+            if hasattr(self.context, 'current_function') and self.context.current_function:
+                self._finalize_function_with_multiple_assignments()

lizard_languages/rust.py CHANGED Viewed

@@ -1,5 +1,5 @@
 '''
-Language parser for Go lang
+Language parser for Rust lang
 '''
 from .code_reader import CodeReader
@@ -16,9 +16,14 @@ class RustReader(CodeReader, CCppCommentsMixin):
                       'case', 'match', 'where'])
     def __init__(self, context):
-        super(RustReader, self).__init__(context)
+        super().__init__(context)
         self.parallel_states = [RustStates(context)]
+    @staticmethod
+    def generate_tokens(source_code, addition='', token_class=None):
+        addition = r"|(?:'\w+\b)"  # lifetimes, labels
+        return CodeReader.generate_tokens(source_code, addition, token_class)
 class RustStates(GoLikeStates):  # pylint: disable=R0903
     FUNC_KEYWORD = 'fn'

lizard_languages/script_language.py CHANGED Viewed

@@ -12,13 +12,13 @@ class ScriptLanguageMixIn:
         if token.startswith("#"):
             # For forgiveness comments, return the entire comment with directive intact
             stripped = token.lstrip('#').strip()
             # Handle forgiveness directives with proper formatting
             if stripped.startswith('lizard forgive global') or stripped.startswith('#lizard forgive global'):
                 return '#lizard forgive global'  # Preserve global directive
             elif stripped.startswith('lizard forgive') or stripped.startswith('#lizard forgive'):
                 return '#lizard forgive'  # Return standardized forgiveness comment
             return stripped  # Return the stripped comment for other cases
         return None

lizard_languages/st.py ADDED Viewed

@@ -0,0 +1,139 @@
+'''
+Language parser for Structured Text.
+'''
+import re
+# import itertools
+from .code_reader import CodeStateMachine, CodeReader
+class StCommentsMixin(object):  # pylint: disable=R0903
+    @staticmethod
+    def get_comment_from_token(token):
+        if token.startswith("(*") or token.startswith("//"):
+            return token[2:]
+class StReader(CodeReader, StCommentsMixin):
+    ''' This is the reader for Structured Text. '''
+    ext = ["st"]
+    language_names = ['st']
+    macro_pattern = re.compile(r"#\s*(\w+)\s*(.*)", re.M | re.S)
+    # track block starters
+    _conditions = set([
+        'if', 'elsif', 'case', 'for', 'while', 'repeat',
+        'IF', 'ELSIF', 'CASE', 'FOR', 'WHILE', 'REPEAT'
+    ])
+    _functions = set([
+        'FUNCTION_BLOCK', 'FUNCTION', 'ACTION'
+    ])
+    _blocks = set([
+        'IF', 'FOR', 'WHILE', 'CASE', 'REPEAT',
+    ])
+    _ends = set([
+        'END',
+    ])
+    # Nesting Depth
+    loops = [
+        'if', 'case', 'for', 'while', 'repeat',
+        'IF', 'CASE', 'FOR', 'WHILE', 'REPEAT'
+    ]
+    bracket = 'END'
+    def __init__(self, context):
+        super(StReader, self).__init__(context)
+        self.parallel_states = (
+            StStates(context, self),
+        )
+    @staticmethod
+    def generate_tokens(source_code, addition='', token_class=None):
+        # Capture everything until end of logical line, where lines may be continued with \ at the end.”
+        _until_end = r'(?:\\\n|[^\n])*'
+        block_endings = '|'.join(f'END_{_}' for _ in StReader._blocks)
+        addition = (
+            r'(?i)'  # case-insensitive
+            r'//' + _until_end + r'|'    # line comment
+            r'\(\*' + _until_end + r'|'  # block comment  (* ... *)
+            r'OR|'
+            r'AND|'
+            r'XOR|'
+            r'NOT|'
+            r'ELSE\s+IF|'
+            + block_endings + addition
+        )
+        return CodeReader.generate_tokens(source_code, addition, token_class)
+    def preprocess(self, tokens):
+        """Handle compiler pragmas like #IF, #INCLUDE, etc."""
+        for token in tokens:
+            macro = self.macro_pattern.match(token)
+            if macro:
+                directive = macro.group(1).lower()
+                if directive in ("if", "ifdef", "ifndef", "elif"):
+                    self.context.add_condition()
+                elif directive == "include":
+                    yield "#include"
+                    yield macro.group(2) or "\"\""
+                for _ in macro.group(2).splitlines()[1:]:
+                    yield "\n"
+            else:
+                # ST normalization: collapse END_* into END
+                upper_tok = token.upper()
+                if upper_tok.startswith("END_"):
+                    yield "END"
+                    continue
+                # Eliminate whitespace, keep line breaks
+                if not token.isspace() or token == '\n':
+                    yield token
+class StStates(CodeStateMachine):
+    """Track Structured Text State."""
+    def __init__(self, context, reader):
+        super().__init__(context)
+        self.reader = reader
+        self.last_token = None
+    def __call__(self, token, reader=None):
+        if self._state(token):
+            self.next(self.saved_state)
+            if self.callback:
+                self.callback()
+        self.last_token = token
+        if self.to_exit:
+            return True
+    def _state_global(self, token):
+        token_upper = token.upper()
+        if token_upper in StReader._functions and self.context.current_function.top_nesting_level < 0:
+            self._state = self._function_name
+        elif token_upper in StReader._blocks:
+            self.context.add_bare_nesting()
+        elif token in StReader._ends:
+            self.context.pop_nesting()
+    def reset_state(self, token=None):
+        self._state = self._state_global
+        if token is not None:
+            self._state_global(token)
+    def _function_name(self, token):
+        self.context.restart_new_function(token)
+        self._state = self._function
+    def _function(self, token):
+        self.context.add_bare_nesting()
+        self.reset_state(token)

lizard 1.17.30__py2.py3-none-any.whl → 1.18.0__py2.py3-none-any.whl

lizard 1.17.30py2.py3-none-any.whl → 1.18.0py2.py3-none-any.whl