PyPI - dataframe-textual - Versions diffs - 1.5.0__py3-none-any.whl → 2.2.2__py3-none-any.whl - Mend

dataframe-textual 1.5.0py3-none-any.whl → 2.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

dataframe_textual/__init__.py +27 -1
dataframe_textual/__main__.py +14 -3
dataframe_textual/common.py +154 -59
dataframe_textual/data_frame_help_panel.py +0 -3
dataframe_textual/data_frame_table.py +1910 -1238
dataframe_textual/data_frame_viewer.py +354 -100
dataframe_textual/sql_screen.py +56 -20
dataframe_textual/table_screen.py +164 -144
dataframe_textual/yes_no_screen.py +90 -34
{dataframe_textual-1.5.0.dist-info → dataframe_textual-2.2.2.dist-info}/METADATA +275 -416
dataframe_textual-2.2.2.dist-info/RECORD +14 -0
{dataframe_textual-1.5.0.dist-info → dataframe_textual-2.2.2.dist-info}/WHEEL +1 -1
dataframe_textual-1.5.0.dist-info/RECORD +0 -14
{dataframe_textual-1.5.0.dist-info → dataframe_textual-2.2.2.dist-info}/entry_points.txt +0 -0
{dataframe_textual-1.5.0.dist-info → dataframe_textual-2.2.2.dist-info}/licenses/LICENSE +0 -0

dataframe_textual/__init__.py CHANGED Viewed

@@ -1,15 +1,32 @@
 """DataFrame Viewer - Interactive CSV/Excel viewer for the terminal."""
+from importlib.metadata import version
+__version__ = version("dataframe-textual")
 from .data_frame_help_panel import DataFrameHelpPanel
 from .data_frame_table import DataFrameTable, History
 from .data_frame_viewer import DataFrameViewer
-from .table_screen import FrequencyScreen, RowDetailScreen, TableScreen
+from .table_screen import (
+    FrequencyScreen,
+    MetaColumnScreen,
+    MetaShape,
+    RowDetailScreen,
+    StatisticsScreen,
+    TableScreen,
+)
 from .yes_no_screen import (
+    AddColumnScreen,
+    AddLinkScreen,
     ConfirmScreen,
     EditCellScreen,
+    EditColumnScreen,
     FilterScreen,
+    FindReplaceScreen,
     FreezeScreen,
     OpenFileScreen,
+    RenameColumnScreen,
+    RenameTabScreen,
     SaveFileScreen,
     SearchScreen,
     YesNoScreen,
@@ -23,6 +40,9 @@ __all__ = [
     "TableScreen",
     "RowDetailScreen",
     "FrequencyScreen",
+    "StatisticsScreen",
+    "MetaShape",
+    "MetaColumnScreen",
     "YesNoScreen",
     "SaveFileScreen",
     "ConfirmScreen",
@@ -31,4 +51,10 @@ __all__ = [
     "FilterScreen",
     "FreezeScreen",
     "OpenFileScreen",
+    "RenameColumnScreen",
+    "EditColumnScreen",
+    "AddColumnScreen",
+    "AddLinkScreen",
+    "FindReplaceScreen",
+    "RenameTabScreen",
 ]

dataframe_textual/__main__.py CHANGED Viewed

@@ -4,6 +4,7 @@ import argparse
 import sys
 from pathlib import Path
+from . import __version__
 from .common import SUPPORTED_FORMATS, load_dataframe
 from .data_frame_viewer import DataFrameViewer
@@ -24,6 +25,12 @@ def cli() -> argparse.Namespace:
         "  cat data.csv | %(prog)s --format csv\n",
     )
     parser.add_argument("files", nargs="*", help="Files to view (or read from stdin)")
+    parser.add_argument(
+        "-V",
+        "--version",
+        action="version",
+        version=f"%(prog)s {__version__}",
+    )
     parser.add_argument(
         "-f",
         "--format",
@@ -37,7 +44,10 @@ def cli() -> argparse.Namespace:
         help="Specify that input files have no header row when reading CSV/TSV",
     )
     parser.add_argument(
-        "-I", "--no-inferrence", action="store_true", help="Do not infer data types when reading CSV/TSV"
+        "-I", "--no-inference", action="store_true", help="Do not infer data types when reading CSV/TSV"
+    )
+    parser.add_argument(
+        "-t", "--truncate-ragged-lines", action="store_true", help="Truncate ragged lines when reading CSV/TSV"
     )
     parser.add_argument("-E", "--ignore-errors", action="store_true", help="Ignore errors when reading CSV/TSV")
     parser.add_argument(
@@ -50,7 +60,7 @@ def cli() -> argparse.Namespace:
     parser.add_argument(
         "-a", "--skip-rows-after-header", type=int, default=0, help="Skip rows after header when reading CSV/TSV"
     )
-    parser.add_argument("-u", "--null", nargs="+", help="Values to interpret as null values when reading CSV/TSV")
+    parser.add_argument("-n", "--null", nargs="+", help="Values to interpret as null values when reading CSV/TSV")
     args = parser.parse_args()
     if args.files is None:
@@ -80,13 +90,14 @@ def main() -> None:
         args.files,
         file_format=args.format,
         has_header=not args.no_header,
-        infer_schema=not args.no_inferrence,
+        infer_schema=not args.no_inference,
         comment_prefix=args.comment_prefix,
         quote_char=args.quote_char,
         skip_lines=args.skip_lines,
         skip_rows_after_header=args.skip_rows_after_header,
         null_values=args.null,
         ignore_errors=args.ignore_errors,
+        truncate_ragged_lines=args.truncate_ragged_lines,
     )
     app = DataFrameViewer(*sources)
     app.run()

dataframe_textual/common.py CHANGED Viewed

@@ -12,7 +12,7 @@ import polars as pl
 from rich.text import Text
 # Supported file formats
-SUPPORTED_FORMATS = {"tsv", "csv", "excel", "xlsx", "xls", "parquet", "json", "ndjson"}
+SUPPORTED_FORMATS = {"tsv", "tab", "csv", "excel", "xlsx", "xls", "parquet", "json", "ndjson"}
 # Boolean string mappings
@@ -34,6 +34,29 @@ NULL = "NULL"
 NULL_DISPLAY = "-"
+def format_float(value: float, thousand_separator: bool = False, precision: int = 2) -> str:
+    """Format a float value, keeping integers without decimal point.
+    Args:
+        val: The float value to format.
+        thousand_separator: Whether to include thousand separators. Defaults to False.
+    Returns:
+        The formatted float as a string.
+    """
+    if (val := int(value)) == value:
+        if precision > 0:
+            return f"{val:,}" if thousand_separator else str(val)
+        else:
+            return f"{val:,.{-precision}f}" if thousand_separator else f"{val:.{-precision}f}"
+    else:
+        if precision > 0:
+            return f"{value:,.{precision}f}" if thousand_separator else f"{value:.{precision}f}"
+        else:
+            return f"{value:,f}" if thousand_separator else str(value)
 @dataclass
 class DtypeClass:
     """Data type class configuration.
@@ -52,6 +75,35 @@ class DtypeClass:
     itype: str
     convert: Any
+    def format(
+        self, val: Any, style: str | None = None, justify: str | None = None, thousand_separator: bool = False
+    ) -> str:
+        """Format the value according to its data type.
+        Args:
+            val: The value to format.
+        Returns:
+            The formatted value as a Text.
+        """
+        # Format the value
+        if val is None:
+            text_val = NULL_DISPLAY
+        elif self.gtype == "integer" and thousand_separator:
+            text_val = f"{val:,}"
+        elif self.gtype == "float":
+            text_val = format_float(val, thousand_separator)
+        else:
+            text_val = str(val)
+        return Text(
+            text_val,
+            style="" if style == "" else (style or self.style),
+            justify="" if justify == "" else (justify or self.justify),
+            overflow="ellipsis",
+            no_wrap=True,
+        )
 # itype is used by Input widget for input validation
 # fmt: off
@@ -100,8 +152,8 @@ SUBSCRIPT_DIGITS = {
 # Cursor types ("none" removed)
 CURSOR_TYPES = ["row", "column", "cell"]
-# For row index column
-RIDX = "^_ridx_^"
+# Row index mapping between filtered and original dataframe
+RID = "^_RID_^"
 @dataclass
@@ -143,27 +195,7 @@ def DtypeConfig(dtype: pl.DataType) -> DtypeClass:
         return STYLES[pl.Unknown]
-def format_float(value: float, thousand_separator: bool = False, precision: int = 2) -> str:
-    """Format a float value, keeping integers without decimal point.
-    Args:
-        val: The float value to format.
-        thousand_separator: Whether to include thousand separators. Defaults to False.
-    Returns:
-        The formatted float as a string.
-    """
-    if (val := int(value)) == value:
-        return f"{val:,}" if thousand_separator else str(val)
-    else:
-        if precision > 0:
-            return f"{value:,.{precision}f}" if thousand_separator else f"{value:.{precision}f}"
-        else:
-            return f"{value:,f}" if thousand_separator else str(value)
-def format_row(vals, dtypes, styles=None, apply_justify=True, thousand_separator=False) -> list[Text]:
+def format_row(vals, dtypes, styles: list[str | None] | None = None, thousand_separator=False) -> list[Text]:
     """Format a single row with proper styling and justification.
     Converts raw row values to formatted Rich Text objects with appropriate
@@ -172,7 +204,7 @@ def format_row(vals, dtypes, styles=None, apply_justify=True, thousand_separator
     Args:
         vals: The list of values in the row.
         dtypes: The list of data types corresponding to each value.
-        apply_justify: Whether to apply justification styling. Defaults to True.
+        styles: Optional list of style overrides for each value. Defaults to None.
     Returns:
         A list of Rich Text objects with proper formatting applied.
@@ -181,31 +213,18 @@ def format_row(vals, dtypes, styles=None, apply_justify=True, thousand_separator
     for idx, (val, dtype) in enumerate(zip(vals, dtypes, strict=True)):
         dc = DtypeConfig(dtype)
-        # Format the value
-        if val is None:
-            text_val = NULL_DISPLAY
-        elif dc.gtype == "integer" and thousand_separator:
-            text_val = f"{val:,}"
-        elif dc.gtype == "float":
-            text_val = format_float(val, thousand_separator)
-        else:
-            text_val = str(val)
         formatted_row.append(
-            Text(
-                text_val,
-                style=styles[idx] if styles and styles[idx] else dc.style,
-                justify=dc.justify if apply_justify else "",
-                overflow="ellipsis",
-                no_wrap=True,
+            dc.format(
+                val,
+                style=styles[idx] if styles and styles[idx] else None,
+                thousand_separator=thousand_separator,
             )
         )
     return formatted_row
-def rindex(lst: list, value) -> int:
+def rindex(lst: list, value, pos: int | None = None) -> int:
     """Return the last index of value in a list. Return -1 if not found.
     Searches through the list in reverse order to find the last occurrence
@@ -218,9 +237,12 @@ def rindex(lst: list, value) -> int:
     Returns:
         The index (0-based) of the last occurrence, or -1 if not found.
     """
+    n = len(lst)
     for i, item in enumerate(reversed(lst)):
+        if pos is not None and (n - 1 - i) > pos:
+            continue
         if item == value:
-            return len(lst) - 1 - i
+            return n - 1 - i
     return -1
@@ -253,9 +275,10 @@ def parse_placeholders(template: str, columns: list[str], current_cidx: int) ->
     Supports multiple placeholder types:
     - `$_` - Current column (based on current_cidx parameter)
-    - `$#` - Row index (1-based, requires '^__ridx__^' column to be present)
+    - `$#` - Row index (1-based)
     - `$1`, `$2`, etc. - Column index (1-based)
     - `$name` - Column name (e.g., `$product_id`)
+    - `` $`col name` `` - Column name with spaces (e.g., `` $`product id` ``)
     Args:
         template: The template string containing placeholders and literal text
@@ -271,8 +294,15 @@ def parse_placeholders(template: str, columns: list[str], current_cidx: int) ->
     if "$" not in template or template.endswith("$"):
         return [template]
-    # Regex matches: $_ or $\d+ or $\w+ (column names)
-    placeholder_pattern = r"\$(_|#|\d+|[a-zA-Z_]\w*)"
+    # Regex matches: $_ or $# or $\d+ or $`...` (backtick-quoted names with spaces) or $\w+ (column names)
+    # Pattern explanation:
+    # \$(_|#|\d+|`[^`]+`|[a-zA-Z_]\w*)
+    # - $_ : current column
+    # - $# : row index
+    # - $\d+ : column by index (1-based)
+    # - $`[^`]+` : column by name with spaces (backtick quoted)
+    # - $[a-zA-Z_]\w* : column by name without spaces
+    placeholder_pattern = r"\$(_|#|\d+|`[^`]+`|[a-zA-Z_]\w*)"
     placeholders = re.finditer(placeholder_pattern, template)
     parts = []
@@ -296,7 +326,7 @@ def parse_placeholders(template: str, columns: list[str], current_cidx: int) ->
             parts.append(pl.col(col_name))
         elif placeholder == "#":
             # $# refers to row index (1-based)
-            parts.append((pl.col(RIDX)))
+            parts.append(pl.col(RID))
         elif placeholder.isdigit():
             # $1, $2, etc. refer to columns by 1-based position index
             col_idx = int(placeholder) - 1  # Convert to 0-based
@@ -305,6 +335,13 @@ def parse_placeholders(template: str, columns: list[str], current_cidx: int) ->
                 parts.append(pl.col(col_ref))
             except IndexError:
                 raise ValueError(f"Invalid column index: ${placeholder} (valid range: $1 to ${len(columns)})")
+        elif placeholder.startswith("`") and placeholder.endswith("`"):
+            # $`col name` refers to column by name with spaces
+            col_ref = placeholder[1:-1]  # Remove backticks
+            if col_ref in columns:
+                parts.append(pl.col(col_ref))
+            else:
+                raise ValueError(f"Column not found: ${placeholder} (available columns: {', '.join(columns)})")
         else:
             # $name refers to column by name
             if placeholder in columns:
@@ -330,16 +367,18 @@ def parse_polars_expression(expression: str, columns: list[str], current_cidx: i
     Replaces column references with Polars col() expressions:
     - $_ - Current selected column
-    - $# - Row index (1-based, requires '^__ridx__^' column to be present)
+    - $# - Row index (1-based)
     - $1, $2, etc. - Column index (1-based)
     - $col_name - Column name (valid identifier starting with _ or letter)
+    - $`col name` - Column name with spaces (backtick quoted)
     Examples:
     - "$_ > 50" -> "pl.col('current_col') > 50"
-    - "$# > 10" -> "pl.col('^__ridx__^') > 10"
+    - "$# > 10" -> "pl.col('^_RID_^') > 10"
     - "$1 > 50" -> "pl.col('col0') > 50"
     - "$name == 'Alex'" -> "pl.col('name') == 'Alex'"
     - "$age < $salary" -> "pl.col('age') < pl.col('salary')"
+    - "$`product id` > 100" -> "pl.col('product id') > 100"
     Args:
         expression: The input expression as a string.
@@ -368,7 +407,10 @@ def parse_polars_expression(expression: str, columns: list[str], current_cidx: i
         if isinstance(part, pl.Expr):
             col = part.meta.output_name()
-            result.append(f"pl.col('{col}')")
+            if col == RID:  # Convert to 1-based
+                result.append(f"(pl.col('{col}') + 1)")
+            else:
+                result.append(f"pl.col('{col}')")
         else:
             result.append(part)
@@ -442,6 +484,7 @@ def load_dataframe(
     skip_rows_after_header: int = 0,
     null_values: list[str] | None = None,
     ignore_errors: bool = False,
+    truncate_ragged_lines: bool = False,
 ) -> list[Source]:
     """Load DataFrames from file specifications.
@@ -480,23 +523,24 @@ def load_dataframe(
         else:
             source = filename
-        # Load from file
-        # Determine file format if not specified
-        if not file_format:
+        # If not specified, determine file format (may be different for each file)
+        fmt = file_format
+        if not fmt:
             ext = Path(filename).suffix.lower()
-            if ext == ".gz" or ext == ".bz2" or ext == ".xz":
+            if ext == ".gz":
                 ext = Path(filename).with_suffix("").suffix.lower()
             fmt = ext.removeprefix(".")
             # Default to TSV
-            file_format = fmt if fmt in SUPPORTED_FORMATS else "tsv"
+            if not fmt or fmt not in SUPPORTED_FORMATS:
+                fmt = "tsv"
         # Load the file
         data.extend(
             load_file(
                 source,
                 prefix_sheet=prefix_sheet,
-                file_format=file_format,
+                file_format=fmt,
                 has_header=has_header,
                 infer_schema=infer_schema,
                 comment_prefix=comment_prefix,
@@ -505,6 +549,7 @@ def load_dataframe(
                 skip_rows_after_header=skip_rows_after_header,
                 null_values=null_values,
                 ignore_errors=ignore_errors,
+                truncate_ragged_lines=truncate_ragged_lines,
             )
         )
@@ -551,7 +596,14 @@ def handle_compute_error(
     # Schema mismatch error
     if "found more fields than defined in 'Schema'" in err_msg:
-        print(f"Input might be malformed:\n{err_msg}.\nTry again with `-E` to ignore errors", file=sys.stderr)
+        print(f"{err_msg}.\n\nInput might be malformed. Try again with `-t` to truncate ragged lines", file=sys.stderr)
+        sys.exit(1)
+    # Field ... is not properly escaped
+    if "is not properly escaped" in err_msg:
+        print(
+            f"{err_msg}\n\nQuoting might be causing the issue. Try again with `-q` to disable quoting", file=sys.stderr
+        )
         sys.exit(1)
     # ComputeError: could not parse `n.a. as of 04.01.022` as `dtype` i64 at column 'PubChemCID' (column number 16)
@@ -581,6 +633,7 @@ def load_file(
     schema_overrides: dict[str, pl.DataType] | None = None,
     null_values: list[str] | None = None,
     ignore_errors: bool = False,
+    truncate_ragged_lines: bool = False,
 ) -> list[Source]:
     """Load a single file.
@@ -611,11 +664,18 @@ def load_file(
         List of `Source` objects.
     """
     data: list[Source] = []
     filename = f"stdin.{file_format}" if isinstance(source, StringIO) else source
     filepath = Path(filename)
+    if not file_format:
+        ext = filepath.suffix.lower()
+        if ext == ".gz":
+            ext = Path(filename).with_suffix("").suffix.lower()
+        file_format = ext.removeprefix(".")
     # Load based on file format
-    if file_format in ("tsv", "csv"):
+    if file_format in ("csv", "tsv"):
         lf = pl.scan_csv(
             source,
             separator="\t" if file_format == "tsv" else ",",
@@ -628,6 +688,7 @@ def load_file(
             schema_overrides=schema_overrides,
             null_values=null_values,
             ignore_errors=ignore_errors,
+            truncate_ragged_lines=truncate_ragged_lines,
         )
         data.append(Source(lf, filename, filepath.stem))
     elif file_format in ("xlsx", "xls", "excel"):
@@ -656,6 +717,14 @@ def load_file(
     # Attempt to collect, handling ComputeError for schema inference issues
     try:
         data = [Source(src.frame.collect(), src.filename, src.tabname) for src in data]
+    except pl.exceptions.NoDataError:
+        print(
+            "Warning: No data from stdin."
+            if isinstance(source, StringIO)
+            else f"Warning: No data found in file `{filename}`.",
+            file=sys.stderr,
+        )
+        sys.exit()
     except pl.exceptions.ComputeError as ce:
         # Handle the error and determine retry strategy
         infer_schema, schema_overrides = handle_compute_error(str(ce), file_format, infer_schema, schema_overrides)
@@ -697,3 +766,29 @@ async def sleep_async(seconds: float) -> None:
     import asyncio
     await asyncio.sleep(seconds)
+def round_to_nearest_hundreds(num: int, N: int = 100) -> tuple[int, int]:
+    """Round a number to the nearest hundred boundaries.
+    Given a number, return a tuple of the two closest hundreds that bracket it.
+    Args:
+        num: The number to round.
+    Returns:
+        A tuple (lower_hundred, upper_hundred) where:
+        - lower_hundred is the largest multiple of 100 <= num
+        - upper_hundred is the smallest multiple of 100 > num
+    Examples:
+        >>> round_to_nearest_hundreds(0)
+        (0, 100)
+        >>> round_to_nearest_hundreds(150)
+        (100, 200)
+        >>> round_to_nearest_hundreds(200)
+        (200, 300)
+    """
+    lower = (num // N) * N
+    upper = lower + N
+    return (lower, upper)

dataframe_textual/data_frame_help_panel.py CHANGED Viewed

@@ -74,9 +74,6 @@ class DataFrameHelpPanel(Widget):
         Initializes the help panel by setting up a watcher for focused widget changes
         to dynamically update help text based on which widget has focus.
-        Returns:
-            None
         """
         # def update_help(focused_widget: Widget | None):

dataframe-textual 1.5.0__py3-none-any.whl → 2.2.2__py3-none-any.whl

dataframe-textual 1.5.0py3-none-any.whl → 2.2.2py3-none-any.whl