PyPI - just-bash - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.10__py3-none-any.whl - Mend

just-bash 0.1.5py3-none-any.whl → 0.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

just_bash/ast/factory.py +3 -1
just_bash/bash.py +28 -6
just_bash/commands/awk/awk.py +362 -17
just_bash/commands/cat/cat.py +5 -1
just_bash/commands/echo/echo.py +33 -1
just_bash/commands/grep/grep.py +141 -3
just_bash/commands/od/od.py +144 -30
just_bash/commands/printf/printf.py +289 -87
just_bash/commands/pwd/pwd.py +32 -2
just_bash/commands/read/read.py +243 -64
just_bash/commands/readlink/readlink.py +3 -9
just_bash/commands/registry.py +32 -0
just_bash/commands/rmdir/__init__.py +5 -0
just_bash/commands/rmdir/rmdir.py +160 -0
just_bash/commands/sed/sed.py +142 -31
just_bash/commands/shuf/__init__.py +5 -0
just_bash/commands/shuf/shuf.py +242 -0
just_bash/commands/stat/stat.py +9 -0
just_bash/commands/time/__init__.py +5 -0
just_bash/commands/time/time.py +74 -0
just_bash/commands/touch/touch.py +118 -8
just_bash/commands/whoami/__init__.py +5 -0
just_bash/commands/whoami/whoami.py +18 -0
just_bash/fs/in_memory_fs.py +22 -0
just_bash/fs/overlay_fs.py +22 -1
just_bash/interpreter/__init__.py +1 -1
just_bash/interpreter/builtins/__init__.py +2 -0
just_bash/interpreter/builtins/control.py +4 -8
just_bash/interpreter/builtins/declare.py +321 -24
just_bash/interpreter/builtins/getopts.py +163 -0
just_bash/interpreter/builtins/let.py +2 -2
just_bash/interpreter/builtins/local.py +71 -5
just_bash/interpreter/builtins/misc.py +22 -6
just_bash/interpreter/builtins/readonly.py +38 -10
just_bash/interpreter/builtins/set.py +58 -8
just_bash/interpreter/builtins/test.py +136 -19
just_bash/interpreter/builtins/unset.py +62 -10
just_bash/interpreter/conditionals.py +29 -4
just_bash/interpreter/control_flow.py +61 -17
just_bash/interpreter/expansion.py +1647 -104
just_bash/interpreter/interpreter.py +436 -69
just_bash/interpreter/types.py +263 -2
just_bash/parser/__init__.py +2 -0
just_bash/parser/lexer.py +295 -26
just_bash/parser/parser.py +523 -64
just_bash/types.py +11 -0
{just_bash-0.1.5.dist-info → just_bash-0.1.10.dist-info}/METADATA +40 -1
{just_bash-0.1.5.dist-info → just_bash-0.1.10.dist-info}/RECORD +49 -40
{just_bash-0.1.5.dist-info → just_bash-0.1.10.dist-info}/WHEEL +0 -0

just_bash/parser/lexer.py CHANGED Viewed

@@ -116,7 +116,7 @@ RESERVED_WORDS: dict[str, TokenType] = {
     "in": TokenType.IN,
     "function": TokenType.FUNCTION,
     "select": TokenType.SELECT,
-    "time": TokenType.TIME,
+    # "time" is handled as a regular command, not a reserved keyword
     "coproc": TokenType.COPROC,
 }
@@ -133,6 +133,7 @@ class Token:
     column: int
     quoted: bool = False
     single_quoted: bool = False
+    segments: list | None = None  # list of (text, mode) tuples for mixed quoting
 @dataclass
@@ -256,11 +257,7 @@ class Lexer:
                 break
             # Check for pending here-documents after newline
-            if (
-                self.pending_heredocs
-                and self.tokens
-                and self.tokens[-1].type == TokenType.NEWLINE
-            ):
+            if self.pending_heredocs and self.tokens and self.tokens[-1].type == TokenType.NEWLINE:
                 self._read_heredoc_content()
                 continue
@@ -292,11 +289,7 @@ class Lexer:
             if char == " " or char == "\t":
                 self.pos += 1
                 self.column += 1
-            elif (
-                char == "\\"
-                and self.pos + 1 < input_len
-                and input_text[self.pos + 1] == "\n"
-            ):
+            elif char == "\\" and self.pos + 1 < input_len and input_text[self.pos + 1] == "\n":
                 # Line continuation
                 self.pos += 2
                 self.line += 1
@@ -342,9 +335,7 @@ class Lexer:
             self.pos = pos + 3
             self.column = start_column + 3
             self._register_heredoc_from_lookahead(strip_tabs=True)
-            return self._make_token(
-                TokenType.DLESSDASH, "<<-", pos, start_line, start_column
-            )
+            return self._make_token(TokenType.DLESSDASH, "<<-", pos, start_line, start_column)
         # Check other three-char operators
         three_chars = c0 + c1 + c2
@@ -374,9 +365,7 @@ class Lexer:
         if c0 in SINGLE_CHAR_OPS:
             self.pos = pos + 1
             self.column = start_column + 1
-            return self._make_token(
-                SINGLE_CHAR_OPS[c0], c0, pos, start_line, start_column
-            )
+            return self._make_token(SINGLE_CHAR_OPS[c0], c0, pos, start_line, start_column)
         # Special handling for { and }
         if c0 == "{":
@@ -470,9 +459,17 @@ class Lexer:
             pos += 1
         # If we consumed characters and hit a simple delimiter
+        _use_fast_path = False
         if pos > fast_start:
             c = input_text[pos] if pos < input_len else ""
             if c == "" or c in WORD_BREAK_CHARS:
+                # Don't use fast path if we're at an extglob pattern: @( ?( *( +( !(
+                if c == "(" and pos > fast_start and input_text[pos - 1] in "@?*+!":
+                    _use_fast_path = False  # Fall through to slow path
+                else:
+                    _use_fast_path = True
+        if _use_fast_path:
                 value = input_text[fast_start:pos]
                 self.pos = pos
                 self.column = column + (pos - fast_start)
@@ -547,12 +544,34 @@ class Lexer:
         in_double_quote = False
         starts_with_quote = input_text[pos] in "\"'" if pos < input_len else False
+        # Segment boundary tracking for mixed quoting (e.g., "pre"{a,b}"suf")
+        # Records (value_offset, mode) at each quoting transition
+        seg_boundaries: list[tuple[int, str]] = []
+        seg_mode = "unquoted"
         while pos < input_len:
             char = input_text[pos]
             # Check for word boundaries
             if not in_single_quote and not in_double_quote:
                 if char in WORD_BREAK_CHARS:
+                    # Handle extglob patterns: @( ?( *( +( !(
+                    if char == "(" and value and value[-1] in "@?*+!":
+                        # Read balanced paren group as part of word
+                        value += char
+                        pos += 1
+                        col += 1
+                        depth = 1
+                        while pos < input_len and depth > 0:
+                            ec = input_text[pos]
+                            if ec == "(":
+                                depth += 1
+                            elif ec == ")":
+                                depth -= 1
+                            value += ec
+                            pos += 1
+                            col += 1
+                        continue
                     break
             # Handle $'' ANSI-C quoting
@@ -604,13 +623,20 @@ class Lexer:
             if char == "'" and not in_double_quote:
                 if in_single_quote:
                     in_single_quote = False
-                    if not starts_with_quote:
+                    if starts_with_quote:
+                        # Record transition: single → unquoted
+                        seg_boundaries.append((len(value), seg_mode))
+                        seg_mode = "unquoted"
+                    else:
                         value += char
                 else:
                     in_single_quote = True
                     if starts_with_quote:
                         single_quoted = True
                         quoted = True
+                        # Record transition: current → single
+                        seg_boundaries.append((len(value), seg_mode))
+                        seg_mode = "single"
                     else:
                         value += char
                 pos += 1
@@ -620,12 +646,19 @@ class Lexer:
             if char == '"' and not in_single_quote:
                 if in_double_quote:
                     in_double_quote = False
-                    if not starts_with_quote:
+                    if starts_with_quote:
+                        # Record transition: double → unquoted
+                        seg_boundaries.append((len(value), seg_mode))
+                        seg_mode = "unquoted"
+                    else:
                         value += char
                 else:
                     in_double_quote = True
                     if starts_with_quote:
                         quoted = True
+                        # Record transition: current → double
+                        seg_boundaries.append((len(value), seg_mode))
+                        seg_mode = "double"
                     else:
                         value += char
                 pos += 1
@@ -643,7 +676,7 @@ class Lexer:
                     continue
                 if in_double_quote:
                     # In double quotes, only certain escapes are special
-                    if next_char in "\"\\$`\n":
+                    if next_char in '"\\$`\n':
                         if next_char in "$`":
                             value += char + next_char
                         else:
@@ -653,7 +686,9 @@ class Lexer:
                         continue
                 else:
                     # Outside quotes, backslash escapes next character
-                    if next_char in "\"'":
+                    if next_char in "\"'{}":
+                        # Preserve backslash for quotes and braces so parser
+                        # can create EscapedPart (prevents brace expansion)
                         value += char + next_char
                     else:
                         value += next_char
@@ -707,6 +742,30 @@ class Lexer:
                     col += 1
                 continue
+            # Handle $[...] legacy arithmetic expansion
+            if char == "$" and pos + 1 < input_len and input_text[pos + 1] == "[":
+                value += char
+                pos += 1
+                col += 1
+                value += input_text[pos]  # Add the [
+                pos += 1
+                col += 1
+                # Track bracket depth
+                depth = 1
+                while depth > 0 and pos < input_len:
+                    c = input_text[pos]
+                    value += c
+                    if c == "[":
+                        depth += 1
+                    elif c == "]":
+                        depth -= 1
+                    pos += 1
+                    col += 1
+                continue
             # Handle ${...} parameter expansion
             if char == "$" and pos + 1 < input_len and input_text[pos + 1] == "{":
                 value += char
@@ -737,9 +796,7 @@ class Lexer:
                 pos += 1
                 col += 1
                 # Read variable name
-                while pos < input_len and (
-                    input_text[pos].isalnum() or input_text[pos] == "_"
-                ):
+                while pos < input_len and (input_text[pos].isalnum() or input_text[pos] == "_"):
                     value += input_text[pos]
                     pos += 1
                     col += 1
@@ -811,6 +868,24 @@ class Lexer:
                     column=column,
                 )
+        # Build segments from boundaries if we had quoting transitions
+        final_segments = None
+        if seg_boundaries:
+            final_segments = []
+            prev_offset = 0
+            for offset, mode in seg_boundaries:
+                text = value[prev_offset:offset]
+                if text:
+                    final_segments.append((text, mode))
+                prev_offset = offset
+            # Add final segment
+            final_text = value[prev_offset:]
+            if final_text:
+                final_segments.append((final_text, seg_mode))
+            # Only use segments if there are multiple (mixed quoting)
+            if len(final_segments) <= 1:
+                final_segments = None
         return Token(
             type=TokenType.WORD,
             value=value,
@@ -820,6 +895,7 @@ class Lexer:
             column=column,
             quoted=quoted,
             single_quoted=single_quoted,
+            segments=final_segments,
         )
     def _register_heredoc_from_lookahead(self, strip_tabs: bool) -> None:
@@ -856,13 +932,15 @@ class Lexer:
             if in_single_quote:
                 if c == "'":
+                    in_single_quote = False
                     pos += 1
-                    break
+                    continue
                 delimiter += c
             elif in_double_quote:
                 if c == '"':
+                    in_double_quote = False
                     pos += 1
-                    break
+                    continue
                 delimiter += c
             else:
                 if c in " \t\n;|&<>()":
@@ -873,6 +951,17 @@ class Lexer:
                     pos += 2
                     quoted = True  # Backslash makes it quoted
                     continue
+                # Handle embedded quotes (e.g., E'O'F)
+                if c == "'":
+                    in_single_quote = True
+                    quoted = True
+                    pos += 1
+                    continue
+                if c == '"':
+                    in_double_quote = True
+                    quoted = True
+                    pos += 1
+                    continue
                 delimiter += c
             pos += 1
@@ -946,3 +1035,183 @@ def tokenize(input_text: str) -> list[Token]:
     """Convenience function to tokenize input."""
     lexer = Lexer(input_text)
     return lexer.tokenize()
+# HTML entity mappings
+HTML_ENTITIES: dict[str, str] = {
+    "&lt;": "<",
+    "&gt;": ">",
+    "&amp;": "&",
+    "&quot;": '"',
+    "&apos;": "'",
+}
+def unescape_html_entities(input_text: str) -> str:
+    """Unescape HTML entities in operator positions (outside quotes and heredocs).
+    This handles LLM-generated bash commands that contain HTML-escaped
+    operators like &lt; instead of <.
+    Only unescapes entities outside of:
+    - Single quotes
+    - Double quotes
+    - Heredoc content
+    Args:
+        input_text: The bash script that may contain HTML entities.
+    Returns:
+        The script with HTML entities unescaped in operator positions.
+    """
+    result: list[str] = []
+    i = 0
+    n = len(input_text)
+    in_single_quote = False
+    in_double_quote = False
+    heredoc_delimiter: str | None = None  # None means not in heredoc
+    while i < n:
+        char = input_text[i]
+        # If we're in a heredoc, look for the end delimiter
+        if heredoc_delimiter is not None:
+            # Check if this line matches the heredoc delimiter
+            line_start = i
+            line_end = input_text.find("\n", i)
+            if line_end == -1:
+                line_end = n
+            line = input_text[line_start:line_end]
+            # For <<- heredocs, delimiter may be preceded by tabs
+            stripped_line = line.lstrip("\t")
+            if stripped_line == heredoc_delimiter or line == heredoc_delimiter:
+                # End of heredoc - output the line and exit heredoc mode
+                if line_end < n:
+                    result.append(input_text[i : line_end + 1])
+                    i = line_end + 1
+                else:
+                    result.append(input_text[i:line_end])
+                    i = line_end
+                heredoc_delimiter = None
+                continue
+            # Still in heredoc - output the entire line as-is
+            if line_end < n:
+                result.append(input_text[i : line_end + 1])
+                i = line_end + 1
+            else:
+                result.append(input_text[i:line_end])
+                i = line_end
+            continue
+        # Handle quote state tracking
+        if char == "'" and not in_double_quote:
+            in_single_quote = not in_single_quote
+            result.append(char)
+            i += 1
+            continue
+        if char == '"' and not in_single_quote:
+            in_double_quote = not in_double_quote
+            result.append(char)
+            i += 1
+            continue
+        # Handle backslash escapes (only outside single quotes)
+        if char == "\\" and not in_single_quote and i + 1 < n:
+            # Keep the backslash and next character as-is
+            result.append(char)
+            result.append(input_text[i + 1])
+            i += 2
+            continue
+        # Detect heredoc start (only outside quotes)
+        if not in_single_quote and not in_double_quote and char == "<":
+            # Check for << or <<-
+            if i + 1 < n and input_text[i + 1] == "<":
+                strip_tabs = i + 2 < n and input_text[i + 2] == "-"
+                heredoc_op_len = 3 if strip_tabs else 2
+                # Output the << or <<-
+                result.append(input_text[i : i + heredoc_op_len])
+                i += heredoc_op_len
+                # Skip whitespace after operator
+                while i < n and input_text[i] in " \t":
+                    result.append(input_text[i])
+                    i += 1
+                if i >= n:
+                    continue
+                # Parse the delimiter
+                delimiter = ""
+                if input_text[i] == "'":
+                    # Single-quoted delimiter
+                    result.append("'")
+                    i += 1
+                    while i < n and input_text[i] != "'":
+                        delimiter += input_text[i]
+                        result.append(input_text[i])
+                        i += 1
+                    if i < n:
+                        result.append("'")
+                        i += 1
+                elif input_text[i] == '"':
+                    # Double-quoted delimiter
+                    result.append('"')
+                    i += 1
+                    while i < n and input_text[i] != '"':
+                        delimiter += input_text[i]
+                        result.append(input_text[i])
+                        i += 1
+                    if i < n:
+                        result.append('"')
+                        i += 1
+                else:
+                    # Unquoted delimiter
+                    while i < n and input_text[i] not in " \t\n;|&<>()":
+                        if input_text[i] == "\\" and i + 1 < n:
+                            # Backslash-escaped character in delimiter
+                            delimiter += input_text[i + 1]
+                            result.append(input_text[i : i + 2])
+                            i += 2
+                        else:
+                            delimiter += input_text[i]
+                            result.append(input_text[i])
+                            i += 1
+                # Find the end of this line (heredoc content starts on next line)
+                while i < n and input_text[i] != "\n":
+                    result.append(input_text[i])
+                    i += 1
+                if i < n:
+                    result.append("\n")
+                    i += 1
+                    # Now in heredoc mode
+                    heredoc_delimiter = delimiter
+                continue
+        # Only attempt HTML entity replacement outside quotes
+        if not in_single_quote and not in_double_quote and char == "&":
+            # Check for HTML entities
+            matched = False
+            for entity, replacement in HTML_ENTITIES.items():
+                if input_text[i:].startswith(entity):
+                    result.append(replacement)
+                    i += len(entity)
+                    matched = True
+                    break
+            if matched:
+                continue
+        # Regular character
+        result.append(char)
+        i += 1
+    return "".join(result)

just-bash 0.1.5__py3-none-any.whl → 0.1.10__py3-none-any.whl

just-bash 0.1.5py3-none-any.whl → 0.1.10py3-none-any.whl